diff --git a/.github/ISSUE_TEMPLATE/code-contrib-task.yml b/.github/ISSUE_TEMPLATE/code-contrib-task.yml deleted file mode 100644 index 3191e4fe48d..00000000000 --- a/.github/ISSUE_TEMPLATE/code-contrib-task.yml +++ /dev/null @@ -1,115 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# -# This is a dedicated issue template for 2023 Kyuubi Code Contribution Program, all proposed -# tasks will be listed at https://github.com/orgs/apache/projects/296 after approval -# -name: 2023 Kyuubi Code Contribution Task -title: "[TASK][] " -description: Propose a task for 2023 Kyuubi Code Contribution Program -labels: [ "hacktoberfest" ] -body: - - type: markdown - attributes: - value: | - You are very welcome to propose new task for 2023 Kyuubi Code Contribution Program. - Your brilliant ideas keep Apache Kyuubi evolving. - Please replace the placeholder `` in the issue title with one of the following options: - - TRIVIAL - it's usually for new contributors to learn the contributor process, e.g. how to cut branch, - how to use GitHub to send PR, how to response with reviewers, the contributor should not stay at this - stage too long. - - EASY - tasks like minor bugs, or simple features without requirements of knowledge for whole Kyuubi - architecture. - - MEDIUM - tasks typical requires that contributors have knowledge on one or more Kyuubi components, - normally, unit tests and integration tests is also required to verify the implementations. - - CHALLENGE - tasks requires that contributors have deep knowledge on one or more Kyuubi components, - have good logical thinking and the ability to solve complex problems, be proficient in programming - skills or algorithms - - - type: checkboxes - attributes: - label: Code of Conduct - description: The Code of Conduct helps create a safe space for everyone. We require that everyone agrees to it. - options: - - label: > - I agree to follow this project's [Code of Conduct](https://www.apache.org/foundation/policies/conduct) - required: true - - - type: checkboxes - attributes: - label: Search before creating - options: - - label: > - I have searched in the [task list](https://github.com/orgs/apache/projects/296) and found no similar - tasks. - required: true - - - type: checkboxes - attributes: - label: Mentor - description: Mentor is required for MEDIUM and CHALLENGE tasks, to guide contributors to complete the task. - options: - - label: > - I have sufficient knowledge and experience of this task, and I volunteer to be the mentor of this task - to guide contributors to complete the task. - required: false - - - type: textarea - attributes: - label: Skill requirements - description: Which stills are required for contributors who want to take this task? - placeholder: | - e.g. - - Basic knowledge on Scala Programing Language - - Familiar with Apache Maven, Docker and GitHub Action - - Basic knowledge on network programing and Apache Thrift RPC framework - - Familiar with Apache Spark - - ... - validations: - required: true - - - type: textarea - attributes: - label: Background and Goals - description: What's the current problem, and what's the final status should be after the task is completed? - placeholder: > - Please describe the background and your goal for requesting this task. - validations: - required: true - - - type: textarea - attributes: - label: Implementation steps - description: How could it be implemented? - placeholder: > - Please list the implementation steps in as much detail as possible so that contributors who meet - the skill requirements could complete the task quickly and independently. - validations: - required: true - - - type: textarea - attributes: - label: Additional context - placeholder: > - Anything else that related to this task that the contributors need to know. - validations: - required: false - - - type: markdown - attributes: - value: "Thanks for taking the time to fill out this task form!" diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE index 3cab99d1fe8..6de5d0adccd 100644 --- a/.github/PULL_REQUEST_TEMPLATE +++ b/.github/PULL_REQUEST_TEMPLATE @@ -1,32 +1,56 @@ - + -Here are some tips for you: - 1. If this is your first time, please read our contributor guidelines: https://kyuubi.readthedocs.io/en/latest/community/CONTRIBUTING.html - 2. If the PR is related to an issue in https://github.com/apache/kyuubi/issues, add '[KYUUBI #XXXX]' in your PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'. - 3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][KYUUBI #XXXX] Your PR title ...'. ---> +This pull request fixes # -### _Why are the changes needed?_ - +## Describe Your Solution ๐Ÿ”ง +Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change. -### _How was this patch tested?_ -- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible -- [ ] Add screenshots for manual tests if appropriate +## Types of changes :bookmark: + +- [ ] Bugfix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to change) -- [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request +## Test Plan ๐Ÿงช +#### Behavior Without This Pull Request :coffin: -### _Was this patch authored or co-authored using generative AI tooling?_ - + +#### Behavior With This Pull Request :tada: + + +#### Related Unit Tests + + +--- + +# Checklists +## ๐Ÿ“ Author Self Checklist + + +- [ ] My code follows the [style guidelines](https://kyuubi.readthedocs.io/en/master/contributing/code/style.html) of this project +- [ ] I have performed a self-review +- [ ] I have commented my code, particularly in hard-to-understand areas +- [ ] I have made corresponding changes to the documentation +- [ ] My changes generate no new warnings +- [ ] I have added tests that prove my fix is effective or that my feature works +- [ ] New and existing unit tests pass locally with my changes +- [ ] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html) + +## ๐Ÿ“ Committer Pre-Merge Checklist + +- [ ] Pull request title is okay. +- [ ] No license issues. +- [ ] Milestone correctly set? +- [ ] Test coverage is ok +- [ ] Assignees are selected. +- [ ] Minimum number of approvals +- [ ] No changes are requested + + +**Be nice. Be informative.** diff --git a/.github/labeler.yml b/.github/labeler.yml index ecec1253274..e76dad43902 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -19,109 +19,181 @@ # Pull Request Labeler Github Action Configuration: https://github.com/marketplace/actions/labeler "kind:build": - - ".dockerignore" - - ".rat-excludes" - - ".scalafmt" - - "**/*pom.xml" - - "bin/docker-image-tool.sh" - - "build/**/*" - - "docker/**/*" - - "docs/requirements" - - "kyuubi-assembly/**/*" - - "scalastyle-config.xml" - - any: ["dev/**/*", "!dev/kyuubi-codecov/**/*", "!dev/kyuubi-tpcds/**/*"] + - changed-files: + - any-glob-to-any-file: [ + '.dockerignore', + '.rat-excludes', + '.scalafmt', + '**/*pom.xml', + 'bin/docker-image-tool.sh', + 'build/**/*', + 'docker/**/*', + 'docs/requirements', + 'kyuubi-assembly/**/*', + 'scalastyle-config.xml' + ] + - all-globs-to-any-file: [ + 'dev/**/*', + '!dev/kyuubi-codecov/**/*', + '!dev/kyuubi-tpcds/**/*' + ] "kind:deploy": - - any: ["bin/**/*", "!bin/beeline", "!bin/docker-image-tool.sh"] + - changed-files: + - all-globs-to-any-file: [ + 'bin/**/*', + '!bin/beeline', + '!bin/docker-image-tool.sh' + ] "kind:documentation": - - "*.md" - - "conf/**/*" - - "docs/**/*" - - "readthedocs.yml" + - changed-files: + - any-glob-to-any-file: [ + '*.md', + 'conf/**/*', + 'docs/**/*', + 'readthedocs.yml' + ] "kind:infra": - - ".asf.yaml" - - ".gitattributes" - - ".github/**/*" - - ".gitignore" - - "LICENSE" - - "LICENSE-binary" - - "NOTICE" - - "NOTICE-binary" - - "codecov.yml" - - "dev/kyuubi-codecov/**/*" - - "licenses-binary" + - changed-files: + - any-glob-to-any-file: [ + '.asf.yaml', + '.gitattributes', + '.github/**/*', + '.gitignore', + 'LICENSE', + 'LICENSE-binary', + 'NOTICE', + 'NOTICE-binary', + 'codecov.yml', + 'dev/kyuubi-codecov/**/*', + 'licenses-binary' + ] "module:common": - - "kyuubi-common/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'kyuubi-common/**/*' + ] "module:ctl": - - "bin/beeline" - - "kyuubi-ctl/**/*" - - "kyuubi-hive-beeline/**/*" - - "kyuubi-hive-jdbc/**/*" - - "kyuubi-hive-jdbc-shaded/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'bin/beeline', + 'kyuubi-ctl/**/*', + 'kyuubi-hive-beeline/**/*', + 'kyuubi-hive-jdbc/**/*', + 'kyuubi-hive-jdbc-shaded/**/*' + ] "module:events": - - "kyuubi-events/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'kyuubi-events/**/*' + ] "module:flink": - - "externals/kyuubi-flink-sql-engine/**/*" - - "integration-tests/kyuubi-flink-it/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'externals/kyuubi-flink-sql-engine/**/*', + 'integration-tests/kyuubi-flink-it/**/*' + ] "module:ha": - - "kyuubi-ha/**/*" - - "kyuubi-zookeeper/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'kyuubi-ha/**/*', + 'kyuubi-zookeeper/**/*' + ] "module:hive": - - "bin/beeline" - - "externals/kyuubi-hive-sql-engine/**/*" - - "kyuubi-hive-beeline/**/*" - - "kyuubi-hive-jdbc/**/*" - - "kyuubi-hive-jdbc-shaded/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'bin/beeline', + 'externals/kyuubi-hive-sql-engine/**/*', + 'kyuubi-hive-beeline/**/*', + 'kyuubi-hive-jdbc/**/*', + 'kyuubi-hive-jdbc-shaded/**/*' + ] "module:jdbc": - - "externals/kyuubi-jdbc-engine/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'externals/kyuubi-jdbc-engine/**/*' + ] "module:kubernetes": - - ".dockerignore" - - "bin/docker-image-tool.sh" - - "docker/**/*" - - "integration-tests/kyuubi-kubernetes-it/**/*" - - "tools/spark-block-cleaner/**/*" + - changed-files: + - any-glob-to-any-file: [ + '.dockerignore', + 'bin/docker-image-tool.sh', + 'docker/**/*', + 'integration-tests/kyuubi-kubernetes-it/**/*', + 'tools/spark-block-cleaner/**/*' + ] "module:metrics": - - "kyuubi-metrics/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'kyuubi-metrics/**/*' + ] "module:trino": - - "externals/kyuubi-trino-engine/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'externals/kyuubi-trino-engine/**/*' + ] "module:tpcds": - - "dev/kyuubi-tpcds/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'dev/kyuubi-tpcds/**/*' + ] "module:server": - - "bin/kyuubi" - - "kyuubi-server/src/**/*" - - "kyuubi-server/pom.xml" - - "extension/server/kyuubi-server-plugin/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'bin/kyuubi', + 'kyuubi-server/src/**/*', + 'kyuubi-server/pom.xml', + 'extension/server/kyuubi-server-plugin/**/*' + ] "module:spark": - - "externals/kyuubi-spark-sql-engine/**/*" - - "extensions/spark/**/*" - - "tools/spark-block-cleaner/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'externals/kyuubi-spark-sql-engine/**/*', + 'extensions/spark/**/*', + 'tools/spark-block-cleaner/**/*' + ] "module:extensions": - - "extensions/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'extensions/**/*' + ] "module:rest-client": - - "kyuubi-rest-client/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'kyuubi-rest-client/**/*' + ] "module:integration-tests": - - "integration-tests/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'integration-tests/**/*' + ] "module:authz": - - "extensions/spark/kyuubi-spark-authz/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'extensions/spark/kyuubi-spark-authz/**/*' + ] "module:ui": - - "kyuubi-server/web-ui/**/*" + - changed-files: + - any-glob-to-any-file: [ + 'kyuubi-server/web-ui/**/*' + ] diff --git a/.github/workflows/dep.yml b/.github/workflows/dep.yml index f39e5e6a212..96f49d8d9b8 100644 --- a/.github/workflows/dep.yml +++ b/.github/workflows/dep.yml @@ -26,6 +26,8 @@ on: # when pom or dependency workflow changes - '**/pom.xml' - '.github/workflows/dep.yml' + - 'build/dependency.sh' + - 'dev/dependencyList' concurrency: group: dep-${{ github.head_ref || github.run_id }} @@ -36,9 +38,9 @@ jobs: name: Dependency check runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: setup java - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: 8 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 55cb6b8b16b..3b2ea90d660 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -31,8 +31,8 @@ jobs: name: sphinx-build runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: '3.9' cache: 'pip' diff --git a/.github/workflows/gluten.yml b/.github/workflows/gluten.yml new file mode 100644 index 00000000000..23b4f0d3bbc --- /dev/null +++ b/.github/workflows/gluten.yml @@ -0,0 +1,128 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Gluten CI + +on: + schedule: + - cron: 0 4 * * * + +env: + MVN_OPT: -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip -Dorg.slf4j.simpleLogger.defaultLogLevel=warn -Pjdbc-shaded,gen-policy -Dmaven.plugin.download.cache.path=/tmp/engine-archives + +jobs: + gluten-build: + name: Build Gluten + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + - name: Update and Upgrade + run: sudo apt-get update && sudo apt-get upgrade -y + - name: Install dependencies + run: | + sudo apt-get install -y software-properties-common + sudo apt-get install -y libunwind-dev build-essential cmake libssl-dev libre2-dev libcurl4-openssl-dev clang lldb lld libz-dev git ninja-build uuid-dev + - name: Setup JDK 8 + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: 8 + cache: 'maven' + check-latest: false + - name: Setup Maven + uses: ./.github/actions/setup-maven + - name: Get gluten cache date + id: date + run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT + - name: Check gluten cache + id: gluten-cache + uses: actions/cache@v3 + with: + path: gluten/package/target/ + key: gluten_package_${{ steps.date.outputs.date }} + - name: Build gluten project + run: | + if [[ "${{ steps.gluten-cache.outputs.cache-hit }}" != 'true' ]]; then + git clone https://github.com/oap-project/gluten.git + cd gluten + ./dev/buildbundle-veloxbe.sh + fi + - uses: actions/cache@v3 + if: steps.gluten-cache.outputs.cache-hit != 'true' + with: + path: gluten/package/target/ + key: gluten_package_${{ steps.date.outputs.date }} + + gluten-it: + name: Gluten Integration TPC-H/DS Test + needs: gluten-build + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + spark: [ '3.4', '3.3' ] + steps: + - uses: actions/checkout@v4 + - name: Tune Runner VM + uses: ./.github/actions/tune-runner-vm + - name: Update and Upgrade + run: sudo apt-get update && sudo apt-get upgrade -y + - name: Install dependencies + run: | + sudo apt-get install -y software-properties-common + sudo apt-get install -y libunwind-dev build-essential cmake libssl-dev libre2-dev libcurl4-openssl-dev clang lldb lld libz-dev git ninja-build uuid-dev + sudo apt-get install -y libsnappy-dev libthrift-dev libboost-all-dev libgflags-dev libgoogle-glog-dev + - name: Cache Engine Archives + uses: ./.github/actions/cache-engine-archives + - name: Get gluten cache date + id: date + run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT + - name: Check gluten cache + id: gluten-cache + uses: actions/cache@v3 + with: + path: gluten/package/target/ + key: gluten_package_${{ steps.date.outputs.date }} + - name: Cache Gluten Package + uses: actions/cache@v3 + with: + path: gluten/package/target/ + key: gluten_package + - name: Setup JDK 8 + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: 8 + cache: 'maven' + check-latest: false + - name: Setup Maven + uses: ./.github/actions/setup-maven + - name: Run Gluten Integration TPC-H/DS Test + run: | + TEST_MODULES="integration-tests/kyuubi-gluten-it" + ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} -am clean install -DskipTests -Pgluten-spark-${{ matrix.spark }} + ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} test -Pgluten-spark-${{ matrix.spark }} \ + -Dmaven.plugin.scalatest.exclude.tags='' -Dtest=none -Dmaven.plugin.scalatest.include.tags='org.apache.kyuubi.tags.GlutenTest' + - name: Upload test logs + if: failure() + uses: actions/upload-artifact@v3 + with: + name: unit-tests-log-spark-${{ matrix.spark }}-gluten + path: | + **/target/unit-tests.log diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index c4cad7aef2d..7d6cd5bd217 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -28,7 +28,7 @@ jobs: triage: runs-on: ubuntu-22.04 steps: - - uses: actions/labeler@v4 + - uses: actions/labeler@v5 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" sync-labels: true diff --git a/.github/workflows/license.yml b/.github/workflows/license.yml index 55ef485f8fe..cc1ab623630 100644 --- a/.github/workflows/license.yml +++ b/.github/workflows/license.yml @@ -34,9 +34,9 @@ jobs: name: License runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup JDK 8 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: 8 diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 1819c4850af..289e32c14b0 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -86,11 +86,11 @@ jobs: env: SPARK_LOCAL_IP: localhost steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - name: Setup JDK ${{ matrix.java }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: ${{ matrix.java }} @@ -101,14 +101,17 @@ jobs: - name: Setup Maven uses: ./.github/actions/setup-maven - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.9' - name: Build and test Kyuubi and Spark with maven w/o linters run: | + if [[ "${{ matrix.java }}" == "8" && "${{ matrix.spark }}" == "3.4" && "${{ matrix.spark-archive }}" == "" ]]; then + MVN_OPT="${MVN_OPT} -Pcodecov" + fi TEST_MODULES="dev/kyuubi-codecov" ./build/mvn clean install ${MVN_OPT} -pl ${TEST_MODULES} -am \ - -Pspark-${{ matrix.spark }} -Pspark-authz-hudi-test ${{ matrix.spark-archive }} ${{ matrix.exclude-tags }} + -Pjava-${{ matrix.java }} -Pspark-${{ matrix.spark }} -Pspark-authz-hudi-test ${{ matrix.spark-archive }} ${{ matrix.exclude-tags }} - name: Code coverage if: | matrix.java == 8 && @@ -140,11 +143,11 @@ jobs: spark: - '3.4' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - name: Setup JDK ${{ matrix.java }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: ${{ matrix.java }} @@ -156,7 +159,7 @@ jobs: uses: ./.github/actions/cache-engine-archives - name: Build on Scala ${{ matrix.scala }} run: | - TEST_MODULES="!externals/kyuubi-flink-sql-engine,!integration-tests/kyuubi-flink-it" + TEST_MODULES="!externals/kyuubi-flink-sql-engine,!integration-tests/kyuubi-flink-it,!integration-tests/kyuubi-gluten-it" ./build/mvn clean install ${MVN_OPT} -pl ${TEST_MODULES} -am \ -Pscala-${{ matrix.scala }} -Pjava-${{ matrix.java }} -Pspark-${{ matrix.spark }} - name: Upload test logs @@ -196,11 +199,11 @@ jobs: flink-archive: '-Dflink.archive.mirror=https://archive.apache.org/dist/flink/flink-1.18.0 -Dflink.archive.name=flink-1.18.0-bin-scala_2.12.tgz' comment: 'verify-on-flink-1.18-binary' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - name: Setup JDK ${{ matrix.java }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: ${{ matrix.java }} @@ -242,13 +245,21 @@ jobs: matrix: java: - 8 + hive-archive: [ "" ] comment: [ "normal" ] + include: + - java: 8 + hive-archive: '-Dhive.archive.mirror=https://archive.apache.org/dist/hive/hive-2.3.9 -Dhive.archive.name=apache-hive-2.3.9-bin.tar.gz' + comment: 'verify-on-hive-2.3-binary' + - java: 8 + hive-archive: '-Dhive.archive.mirror=https://github.com/pan3793/cdh-hive/releases/download/cdh6.3.2-release -Dhive.archive.name=apache-hive-2.1.1-cdh6.3.2-bin.tar.gz' + comment: 'verify-on-hive-2.1-cdh6-binary' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - name: Setup JDK ${{ matrix.java }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: ${{ matrix.java }} @@ -261,8 +272,15 @@ jobs: - name: Build and test Hive with maven w/o linters run: | TEST_MODULES="externals/kyuubi-hive-sql-engine,integration-tests/kyuubi-hive-it" - ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} -am clean install -DskipTests - ./build/mvn ${MVN_OPT} -pl ${TEST_MODULES} test + ./build/mvn ${MVN_OPT} ${{ matrix.hive-archive }} -pl ${TEST_MODULES} -am clean install -DskipTests + # Hive 2.3.9 ships Derby 10.10.2.0, which may fail to boostrap on latest JDK 8 + # https://github.com/apache/hive/pull/4895 + if [[ "${{ matrix.hive-archive }}" == *apache-hive-2.3.9-bin.tar.gz* ]]; then + HIVE_239_LIB="$PWD/externals/kyuubi-download/target/apache-hive-2.3.9-bin/lib" + rm $HIVE_239_LIB/derby-* + wget https://repo1.maven.org/maven2/org/apache/derby/derby/10.14.2.0/derby-10.14.2.0.jar -P $HIVE_239_LIB + fi + ./build/mvn ${MVN_OPT} ${{ matrix.hive-archive }} -pl ${TEST_MODULES} test - name: Upload test logs if: failure() uses: actions/upload-artifact@v3 @@ -283,11 +301,11 @@ jobs: - 11 comment: [ "normal" ] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - name: Setup JDK ${{ matrix.java }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: ${{ matrix.java }} @@ -322,11 +340,11 @@ jobs: - 11 comment: [ "normal" ] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - name: Setup JDK ${{ matrix.java }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: ${{ matrix.java }} @@ -356,11 +374,11 @@ jobs: env: SPARK_LOCAL_IP: localhost steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - name: Setup JDK 8 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: 8 @@ -383,16 +401,16 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 # https://github.com/docker/build-push-action - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 - name: Build Kyuubi Docker Image - uses: docker/build-push-action@v3 + uses: docker/build-push-action@v5 with: # passthrough CI into build container build-args: | - CI=${CI} + CI=${CI} MVN_ARG=--flink-provided --hive-provided -Dmaven.javadoc.skip=true -Drat.skip=true -Dscalastyle.skip=true -Dspotless.check.skip -DskipTests context: . file: build/Dockerfile @@ -409,8 +427,8 @@ jobs: # https://minikube.sigs.k8s.io/docs/handbook/pushing/#7-loading-directly-to-in-cluster-container-runtime minikube image load apache/kyuubi:latest # pre-install spark into minikube - docker pull apache/spark:3.4.1 - minikube image load apache/spark:3.4.1 + docker pull apache/spark:3.4.2 + minikube image load apache/spark:3.4.2 - name: kubectl pre-check run: | kubectl get nodes @@ -455,7 +473,7 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Cache Engine Archives uses: ./.github/actions/cache-engine-archives - name: Setup Minikube @@ -502,11 +520,11 @@ jobs: zookeeper: ["3.4", "3.5", "3.6", "3.7" ] comment: [ "normal" ] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - name: Setup JDK ${{ matrix.java }} - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: ${{ matrix.java }} diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 5ff634da6d8..1ba696bbe6f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -33,11 +33,11 @@ jobs: env: SPARK_LOCAL_IP: localhost steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Tune Runner VM uses: ./.github/actions/tune-runner-vm - name: Setup JDK 8 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: 8 diff --git a/.github/workflows/publish-snapshot-docker.yml b/.github/workflows/publish-snapshot-docker.yml index 3afccee7aa8..0a73dcc2da5 100644 --- a/.github/workflows/publish-snapshot-docker.yml +++ b/.github/workflows/publish-snapshot-docker.yml @@ -28,18 +28,18 @@ jobs: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 - name: Login to Docker Hub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USER }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build and Push Kyuubi Docker Image - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: # build cache on Github Actions, See: https://docs.docker.com/build/cache/backends/gha/#using-dockerbuild-push-action cache-from: type=gha diff --git a/.github/workflows/publish-snapshot-nexus.yml b/.github/workflows/publish-snapshot-nexus.yml index b4191396b1f..64dd1a690b1 100644 --- a/.github/workflows/publish-snapshot-nexus.yml +++ b/.github/workflows/publish-snapshot-nexus.yml @@ -43,11 +43,11 @@ jobs: profiles: -Pflink-provided,spark-provided,hive-provided,spark-3.4 steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ matrix.branch }} - name: Setup JDK 8 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: 8 diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index d189cd205db..38bb12a4f9a 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -27,7 +27,7 @@ jobs: permissions: pull-requests: write steps: - - uses: actions/stale@v7 + - uses: actions/stale@v9 with: repo-token: ${{ secrets.GITHUB_TOKEN }} stale-pr-message: > diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 87823ddbd20..5b8b6a7048d 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -37,11 +37,11 @@ jobs: - '-Pflink-provided,hive-provided,spark-provided,spark-block-cleaner,spark-3.5,spark-3.4,spark-3.3,spark-3.2,tpcds,kubernetes-it' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - name: Setup JDK 8 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: 8 @@ -50,7 +50,7 @@ jobs: - name: Setup Maven uses: ./.github/actions/setup-maven - name: Setup Python 3 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.9' cache: 'pip' @@ -92,7 +92,7 @@ jobs: pip install black==$SPOTLESS_BLACK_VERSION build/mvn spotless:check ${{ matrix.profiles }} -Pspotless-python,spark-3.1 - name: setup npm - uses: actions/setup-node@v3 + uses: actions/setup-node@v4 with: node-version: 18 - name: Web UI Style with node @@ -114,7 +114,7 @@ jobs: name: Super Linter and Shellcheck runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Super Linter Checks uses: github/super-linter/slim@v5 env: diff --git a/.github/workflows/web-ui.yml b/.github/workflows/web-ui.yml index 9de7a599d45..ec0a88575b2 100644 --- a/.github/workflows/web-ui.yml +++ b/.github/workflows/web-ui.yml @@ -20,9 +20,9 @@ jobs: runs-on: ubuntu-22.04 steps: - name: checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup JDK 8 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: temurin java-version: 8 @@ -35,7 +35,7 @@ jobs: echo "NODEJS_VERSION=${NODEJS_VERSION}" >> "$GITHUB_ENV" echo "PNPM_VERSION=${PNPM_VERSION}" >> "$GITHUB_ENV" - name: Setup Nodejs and NPM - uses: actions/setup-node@v3 + uses: actions/setup-node@v4 with: node-version: ${{env.NODEJS_VERSION}} cache: npm diff --git a/.gitignore b/.gitignore index a2f6fb1efe4..dcf808e6752 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ .settings build/apache-maven* build/release/tmp +build/release/*.txt build/scala* build/test target/ diff --git a/LICENSE-binary b/LICENSE-binary index 748842a6191..b225b2c6288 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -506,6 +506,8 @@ is auto-generated by `pnpm licenses list --prod`. โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค โ”‚ postcss โ”‚ MIT โ”‚ โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค +โ”‚ proxy-from-env โ”‚ MIT โ”‚ +โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค โ”‚ randexp โ”‚ MIT โ”‚ โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค โ”‚ ret โ”‚ MIT โ”‚ diff --git a/build/dist b/build/dist index df9498008cb..2ea702b61af 100755 --- a/build/dist +++ b/build/dist @@ -249,6 +249,7 @@ mkdir -p "$DISTDIR/pid" mkdir -p "$DISTDIR/logs" mkdir -p "$DISTDIR/work" mkdir -p "$DISTDIR/jars" +mkdir -p "$DISTDIR/db-scripts" mkdir -p "$DISTDIR/beeline-jars" mkdir -p "$DISTDIR/web-ui" mkdir -p "$DISTDIR/externals/engines/flink" @@ -270,6 +271,9 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE" # Copy kyuubi server jars cp -r "$KYUUBI_HOME"/kyuubi-assembly/target/scala-$SCALA_VERSION/jars/*.jar "$DISTDIR/jars/" +# Copy kyuubi database scripts +cp -r "$KYUUBI_HOME"/kyuubi-server/src/main/resources/sql/* "$DISTDIR/db-scripts/" + # Copy kyuubi beeline jars cp "$KYUUBI_HOME"/kyuubi-hive-beeline/target/*.jar "$DISTDIR/beeline-jars/" diff --git a/build/release/known_translations b/build/release/known_translations new file mode 100644 index 00000000000..73fd1ad55e3 --- /dev/null +++ b/build/release/known_translations @@ -0,0 +1,60 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This is a mapping of names to be translated. +# The format expected on each line should be: - +AngersZhuuuu - Yi Zhu +ASiegeLion - Peiyue Liu +bowenliang123 - Bowen Liang +BruceWong96 - Bruce Wong +CavemanIV - Liang Zhang +cxzl25 - Shaoyun Chen +davidyuan1223 - David Yuan +dependabot[bot] - GitHub Bot +dev-lpq - Pengqi Li +dnskr - Denis Krivenko +edddddy - Yang Du +hadoopkandy - Kang Wang +HaoYang670 - Remzi Yang +huage1994 - Guanhua Li +iodone - Yaodong Zhang +ITzhangqiang - Qiang Zhang +Kiss736921 - Alex Zou +labbomb - Junjie Xu +lightning_L - Tianlin Liao +liunaijie - Naijie Liu +lsm1 - Senmiao Liu +mattshma - Ming Ma +merrily01 - Ruilei Ma +minyk - Drake Youngkun Min +packyan - Deng An +QianyongY - Yong Qian +thomasg19930417 - Xu Guo +turboFei - Fei Wang +ulysses-you - Xiduo You +wangmiao1002 - Miao Wang +wForget - Zhen Wang +Xieming LI - Xieming Li +XorSum - Baokun Han +yabola - Chenliang Lu +Yikf - Kaifei Yi +ymZhao1001 - Yangming Zhao +zhaohehuhu - He Zhao +zhaomin1423 - Min Zhao +zhouyifan279 - Yifan Zhou +zhuyaogai - Yaogai Zhu +zwangsheng - Binjie Yang diff --git a/build/release/release.sh b/build/release/release.sh index 047513dc5e3..49fef9f8b24 100755 --- a/build/release/release.sh +++ b/build/release/release.sh @@ -124,12 +124,17 @@ upload_nexus_staging() { -s "${KYUUBI_DIR}/build/release/asf-settings.xml" \ -pl extensions/spark/kyuubi-extension-spark-3-3 -am - # Spark TPC-DS/TPC-H Connector build with default Spark version (3.4) and Scala 2.13 - ${KYUUBI_DIR}/build/mvn clean deploy -DskipTests -Papache-release,flink-provided,spark-provided,hive-provided,spark-3.4 \ + # Spark Extension Plugin for Spark 3.5 + ${KYUUBI_DIR}/build/mvn clean deploy -DskipTests -Papache-release,flink-provided,spark-provided,hive-provided,spark-3.5 \ + -s "${KYUUBI_DIR}/build/release/asf-settings.xml" \ + -pl extensions/spark/kyuubi-extension-spark-3-5 -am + + # Spark TPC-DS/TPC-H Connector built with default Spark version (3.4) and Scala 2.13 + ${KYUUBI_DIR}/build/mvn clean deploy -DskipTests -Papache-release,flink-provided,spark-provided,hive-provided,spark-3.4,scala-2.13 \ -s "${KYUUBI_DIR}/build/release/asf-settings.xml" \ - -pl extensions/spark/kyuubi-connector-tpcds,extensions/spark/kyuubi-connector-tpch + -pl extensions/spark/kyuubi-spark-connector-tpcds,extensions/spark/kyuubi-spark-connector-tpch -am - # All modules including Spark Extension Plugin and Connectors build with default Spark version (3.4) and default Scala version (2.12) + # All modules including Spark Extension Plugin and Connectors built with default Spark version (3.4) and default Scala version (2.12) ${KYUUBI_DIR}/build/mvn clean deploy -DskipTests -Papache-release,flink-provided,spark-provided,hive-provided,spark-3.4 \ -s "${KYUUBI_DIR}/build/release/asf-settings.xml" } diff --git a/charts/kyuubi/Chart.yaml b/charts/kyuubi/Chart.yaml index 56abc9edc88..2fefab2886d 100644 --- a/charts/kyuubi/Chart.yaml +++ b/charts/kyuubi/Chart.yaml @@ -20,7 +20,7 @@ name: kyuubi description: A Helm chart for Kyuubi server type: application version: 0.1.0 -appVersion: 1.7.3 +appVersion: 1.8.0 home: https://kyuubi.apache.org icon: https://raw.githubusercontent.com/apache/kyuubi/master/docs/imgs/logo.png sources: diff --git a/charts/kyuubi/templates/kyuubi-service.yaml b/charts/kyuubi/templates/kyuubi-service.yaml index 64c8b06ac20..9d9362e86d6 100644 --- a/charts/kyuubi/templates/kyuubi-service.yaml +++ b/charts/kyuubi/templates/kyuubi-service.yaml @@ -37,6 +37,12 @@ spec: {{- end }} selector: {{- include "kyuubi.selectorLabels" $ | nindent 4 }} + {{- if ($frontend.service.sessionAffinity) }} + sessionAffinity: {{ $frontend.service.sessionAffinity }} + {{- end }} + {{- with $frontend.service.sessionAffinityConfig }} + sessionAffinityConfig: {{- toYaml . | nindent 4 }} + {{- end }} --- {{- end }} {{- end }} diff --git a/charts/kyuubi/values.yaml b/charts/kyuubi/values.yaml index faa854b1017..044668040f3 100644 --- a/charts/kyuubi/values.yaml +++ b/charts/kyuubi/values.yaml @@ -85,6 +85,13 @@ server: port: "{{ .Values.server.thriftBinary.port }}" nodePort: ~ annotations: {} + # candidates are ClientIP or None + # https://kubernetes.io/docs/reference/kubernetes-api/service-resources/service-v1/ + sessionAffinity: ~ + sessionAffinityConfig: {} + # sessionAffinityConfig: + # clientIP: + # timeoutSeconds: 10800 # Thrift HTTP protocol (HiveServer2 compatible) thriftHttp: @@ -95,6 +102,13 @@ server: port: "{{ .Values.server.thriftHttp.port }}" nodePort: ~ annotations: {} + # candidates are ClientIP or None + # https://kubernetes.io/docs/reference/kubernetes-api/service-resources/service-v1/ + sessionAffinity: ~ + sessionAffinityConfig: {} + # sessionAffinityConfig: + # clientIP: + # timeoutSeconds: 10800 # REST API protocol (experimental) rest: @@ -105,6 +119,13 @@ server: port: "{{ .Values.server.rest.port }}" nodePort: ~ annotations: {} + # candidates are ClientIP or None + # https://kubernetes.io/docs/reference/kubernetes-api/service-resources/service-v1/ + sessionAffinity: ~ + sessionAffinityConfig: {} + # sessionAffinityConfig: + # clientIP: + # timeoutSeconds: 10800 # MySQL compatible text protocol (experimental) mysql: @@ -115,6 +136,13 @@ server: port: "{{ .Values.server.mysql.port }}" nodePort: ~ annotations: {} + # candidates are ClientIP or None + # https://kubernetes.io/docs/reference/kubernetes-api/service-resources/service-v1/ + sessionAffinity: ~ + sessionAffinityConfig: {} + # sessionAffinityConfig: + # clientIP: + # timeoutSeconds: 10800 monitoring: # Exposes metrics in Prometheus format diff --git a/codecov.yml b/codecov.yml index 6267ea38074..1be776f5831 100644 --- a/codecov.yml +++ b/codecov.yml @@ -16,4 +16,11 @@ # codecov: - token: b624e642-b0c8-4d45-94a1-a370888435bb + token: 5115fd3e-2ef2-40ed-b012-376a2afdc382 + +coverage: + status: + project: + default: + target: auto # auto compares coverage to the previous base commit + threshold: 2% #this allows a 2% drop from the previous base commit coverage diff --git a/conf/kyuubi-env.sh.template b/conf/kyuubi-env.sh.template index 2b7be6fc89a..2d89d3a5452 100755 --- a/conf/kyuubi-env.sh.template +++ b/conf/kyuubi-env.sh.template @@ -64,5 +64,5 @@ # export HIVE_HADOOP_CLASSPATH=${HADOOP_HOME}/share/hadoop/common/lib/commons-collections-3.2.2.jar:${HADOOP_HOME}/share/hadoop/client/hadoop-client-runtime-3.1.0.jar:${HADOOP_HOME}/share/hadoop/client/hadoop-client-api-3.1.0.jar:${HADOOP_HOME}/share/hadoop/common/lib/htrace-core4-4.1.0-incubating.jar # export HADOOP_CONF_DIR=/usr/ndp/current/mapreduce_client/conf # export YARN_CONF_DIR=/usr/ndp/current/yarn/conf -# export KYUUBI_JAVA_OPTS="-Xmx10g -XX:+UnlockDiagnosticVMOptions -XX:ParGCCardsPerStrideChunk=4096 -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+UseCondCardMark -XX:MaxDirectMemorySize=1024m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./logs -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution -Xloggc:./logs/kyuubi-server-gc-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=5M -XX:NewRatio=3 -XX:MetaspaceSize=512m" -# export KYUUBI_BEELINE_OPTS="-Xmx2g -XX:+UnlockDiagnosticVMOptions -XX:ParGCCardsPerStrideChunk=4096 -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+UseCondCardMark" +# export KYUUBI_JAVA_OPTS="-Xmx10g -XX:MaxMetaspaceSize=512m -XX:MaxDirectMemorySize=1024m -XX:+UseG1GC -XX:+UseStringDeduplication -XX:+UnlockDiagnosticVMOptions -XX:+UseCondCardMark -XX:+UseGCOverheadLimit -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./logs -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution -verbose:gc -Xloggc:./logs/kyuubi-server-gc-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=20M" +# export KYUUBI_BEELINE_OPTS="-Xmx2g -XX:+UseG1GC -XX:+UnlockDiagnosticVMOptions -XX:+UseCondCardMark" diff --git a/conf/log4j2.xml.template b/conf/log4j2.xml.template index 2601690eb90..215fddf47f4 100644 --- a/conf/log4j2.xml.template +++ b/conf/log4j2.xml.template @@ -21,10 +21,9 @@ Set to debug or trace if log4j initialization is failing. --> + ${env:KYUUBI_LOG_DIR} rest-audit.log rest-audit-%d{yyyy-MM-dd}-%i.log - - k8s-audit.log k8s-audit-%d{yyyy-MM-dd}-%i.log @@ -35,7 +34,7 @@ - @@ -43,7 +42,7 @@ - diff --git a/dev/dependencyList b/dev/dependencyList index ede67c96173..4089d963d7a 100644 --- a/dev/dependencyList +++ b/dev/dependencyList @@ -140,28 +140,28 @@ log4j-core/2.20.0//log4j-core-2.20.0.jar log4j-slf4j-impl/2.20.0//log4j-slf4j-impl-2.20.0.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar -metrics-core/4.2.8//metrics-core-4.2.8.jar -metrics-jmx/4.2.8//metrics-jmx-4.2.8.jar -metrics-json/4.2.8//metrics-json-4.2.8.jar -metrics-jvm/4.2.8//metrics-jvm-4.2.8.jar +metrics-core/4.2.23//metrics-core-4.2.23.jar +metrics-jmx/4.2.23//metrics-jmx-4.2.23.jar +metrics-json/4.2.23//metrics-json-4.2.23.jar +metrics-jvm/4.2.23//metrics-jvm-4.2.23.jar mimepull/1.9.15//mimepull-1.9.15.jar -netty-all/4.1.93.Final//netty-all-4.1.93.Final.jar -netty-buffer/4.1.93.Final//netty-buffer-4.1.93.Final.jar -netty-codec-dns/4.1.93.Final//netty-codec-dns-4.1.93.Final.jar -netty-codec-http/4.1.93.Final//netty-codec-http-4.1.93.Final.jar -netty-codec-http2/4.1.93.Final//netty-codec-http2-4.1.93.Final.jar -netty-codec-socks/4.1.93.Final//netty-codec-socks-4.1.93.Final.jar -netty-codec/4.1.93.Final//netty-codec-4.1.93.Final.jar -netty-common/4.1.93.Final//netty-common-4.1.93.Final.jar -netty-handler-proxy/4.1.93.Final//netty-handler-proxy-4.1.93.Final.jar -netty-handler/4.1.93.Final//netty-handler-4.1.93.Final.jar -netty-resolver-dns/4.1.93.Final//netty-resolver-dns-4.1.93.Final.jar -netty-resolver/4.1.93.Final//netty-resolver-4.1.93.Final.jar -netty-transport-classes-epoll/4.1.93.Final//netty-transport-classes-epoll-4.1.93.Final.jar -netty-transport-native-epoll/4.1.93.Final/linux-aarch_64/netty-transport-native-epoll-4.1.93.Final-linux-aarch_64.jar -netty-transport-native-epoll/4.1.93.Final/linux-x86_64/netty-transport-native-epoll-4.1.93.Final-linux-x86_64.jar -netty-transport-native-unix-common/4.1.93.Final//netty-transport-native-unix-common-4.1.93.Final.jar -netty-transport/4.1.93.Final//netty-transport-4.1.93.Final.jar +netty-all/4.1.100.Final//netty-all-4.1.100.Final.jar +netty-buffer/4.1.100.Final//netty-buffer-4.1.100.Final.jar +netty-codec-dns/4.1.100.Final//netty-codec-dns-4.1.100.Final.jar +netty-codec-http/4.1.100.Final//netty-codec-http-4.1.100.Final.jar +netty-codec-http2/4.1.100.Final//netty-codec-http2-4.1.100.Final.jar +netty-codec-socks/4.1.100.Final//netty-codec-socks-4.1.100.Final.jar +netty-codec/4.1.100.Final//netty-codec-4.1.100.Final.jar +netty-common/4.1.100.Final//netty-common-4.1.100.Final.jar +netty-handler-proxy/4.1.100.Final//netty-handler-proxy-4.1.100.Final.jar +netty-handler/4.1.100.Final//netty-handler-4.1.100.Final.jar +netty-resolver-dns/4.1.100.Final//netty-resolver-dns-4.1.100.Final.jar +netty-resolver/4.1.100.Final//netty-resolver-4.1.100.Final.jar +netty-transport-classes-epoll/4.1.100.Final//netty-transport-classes-epoll-4.1.100.Final.jar +netty-transport-native-epoll/4.1.100.Final/linux-aarch_64/netty-transport-native-epoll-4.1.100.Final-linux-aarch_64.jar +netty-transport-native-epoll/4.1.100.Final/linux-x86_64/netty-transport-native-epoll-4.1.100.Final-linux-x86_64.jar +netty-transport-native-unix-common/4.1.100.Final//netty-transport-native-unix-common-4.1.100.Final.jar +netty-transport/4.1.100.Final//netty-transport-4.1.100.Final.jar okhttp-urlconnection/3.14.9//okhttp-urlconnection-3.14.9.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.15.0//okio-1.15.0.jar diff --git a/dev/kyuubi-codecov/pom.xml b/dev/kyuubi-codecov/pom.xml index a5ec582f961..cdf79827359 100644 --- a/dev/kyuubi-codecov/pom.xml +++ b/dev/kyuubi-codecov/pom.xml @@ -31,6 +31,18 @@ https://kyuubi.apache.org/ + + org.apache.kyuubi + kyuubi-util + ${project.version} + + + + org.apache.kyuubi + kyuubi-util-scala_${scala.binary.version} + ${project.version} + + org.apache.kyuubi kyuubi-common_${scala.binary.version} @@ -130,26 +142,6 @@ - - org.jacoco - jacoco-maven-plugin - - - report-agg - - report-aggregate - - verify - - - **/jacoco*.exec - - ${project.reporting.outputDirectory}/jacoco-aggregate-all - - - - - org.apache.maven.plugins maven-dependency-plugin @@ -229,5 +221,31 @@ + + codecov + + + + org.jacoco + jacoco-maven-plugin + + + report-agg + + report-aggregate + + verify + + + **/jacoco*.exec + + ${project.reporting.outputDirectory}/jacoco-aggregate-all + + + + + + + diff --git a/docker/playground/.env b/docker/playground/.env index 24284bd39fa..e8446fd56c9 100644 --- a/docker/playground/.env +++ b/docker/playground/.env @@ -18,13 +18,13 @@ AWS_JAVA_SDK_VERSION=1.12.367 HADOOP_VERSION=3.3.6 HIVE_VERSION=2.3.9 -ICEBERG_VERSION=1.3.1 -KYUUBI_VERSION=1.7.3 -KYUUBI_HADOOP_VERSION=3.3.5 +ICEBERG_VERSION=1.4.2 +KYUUBI_VERSION=1.8.0 +KYUUBI_HADOOP_VERSION=3.3.6 POSTGRES_VERSION=12 POSTGRES_JDBC_VERSION=42.3.4 SCALA_BINARY_VERSION=2.12 -SPARK_VERSION=3.3.3 -SPARK_BINARY_VERSION=3.3 -SPARK_HADOOP_VERSION=3.3.2 +SPARK_VERSION=3.4.2 +SPARK_BINARY_VERSION=3.4 +SPARK_HADOOP_VERSION=3.3.4 ZOOKEEPER_VERSION=3.6.3 diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css deleted file mode 100644 index 9352af86567..00000000000 --- a/docs/_static/css/custom.css +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -table.docutils { - width: 100%; - margin-top: 10px; - margin-bottom: 10px; - border: 0; - border-collapse: collapse; - table-layout: auto; -} -table.docutils th { - padding: 1px 8px 1px 5px; - border-top: 1px; - border-left: 1px; - border-right: 1px; - border-bottom: 1px solid #aaa; -} -table.docutils td { - word-break: break-word; - min-width: 10%; -} -table.docutils tr:hover { - background: #efefef; -} -table.docutils tbody tr:nth-child(2n) { - background: #9EBCE21E; -} -table.docutils td:nth-child(1) { - width: 25%; - word-break: break-all; - font-weight: 500; -} diff --git a/docs/client/jdbc/kyuubi_jdbc.rst b/docs/client/jdbc/kyuubi_jdbc.rst index d4270ea8ac6..a3c56b41813 100644 --- a/docs/client/jdbc/kyuubi_jdbc.rst +++ b/docs/client/jdbc/kyuubi_jdbc.rst @@ -147,6 +147,28 @@ Connection URL over Service Discovery - zookeeper quorum is the corresponding zookeeper cluster configured by `kyuubi.ha.addresses` at the server side. - zooKeeperNamespace is the corresponding namespace configured by `kyuubi.ha.namespace` at the server side. +HiveServer2 Compatibility +************************* + +.. versionadded:: 1.8.0 + +JDBC Drivers need to negotiate a protocol version with Kyuubi Server/HiveServer2 when connecting. + +Kyuubi Hive JDBC Driver offers protocol version v10 (`clientProtocolVersion=9`, supported since Hive 2.3.0) +to server by default. + +If you need to connect to HiveServer2 before 2.3.0, +please set client property `clientProtocolVersion` to a lower number. + +.. code-block:: jdbc + + jdbc:subprotocol://host:port[/catalog]/[schema];clientProtocolVersion=9; + + +.. tip:: + All supported protocol versions and corresponding Hive versions can be found in `TProtocolVersion.java`_ + and its git commits. + Kerberos Authentication ----------------------- Since 1.6.0, Kyuubi JDBC driver implements the Kerberos authentication based on JAAS framework instead of `Hadoop UserGroupInformation`_, @@ -172,6 +194,7 @@ It's straightforward to use principal and keytab for Kerberos authentication, ju - kyuubiClientPrincipal: Kerberos ``principal`` for client authentication - kyuubiClientKeytab: path of Kerberos ``keytab`` file for client authentication +- kyuubiClientTicketCache: path of Kerberos ``ticketCache`` file for client authentication, available since 1.8.0. - kyuubiServerPrincipal: Kerberos ``principal`` configured by `kyuubi.kinit.principal` at the server side. ``kyuubiServerPrincipal`` is available as an alias of ``principal`` since 1.7.0, use ``principal`` for previous versions. @@ -218,4 +241,5 @@ Authentication by Subject (programing only) .. _JDBC Applications: ../bi_tools/index.html .. _java.sql.DriverManager: https://docs.oracle.com/javase/8/docs/api/java/sql/DriverManager.html .. _Hadoop UserGroupInformation: https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/security/UserGroupInformation.html -.. _krb5.conf instruction: https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html \ No newline at end of file +.. _krb5.conf instruction: https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/tutorials/KerberosReq.html +.. _TProtocolVersion.java: https://github.com/apache/hive/blob/master/service-rpc/src/gen/thrift/gen-javabean/org/apache/hive/service/rpc/thrift/TProtocolVersion.java \ No newline at end of file diff --git a/docs/client/rest/rest_api.md b/docs/client/rest/rest_api.md index fc04857d020..4f28dec05ac 100644 --- a/docs/client/rest/rest_api.md +++ b/docs/client/rest/rest_api.md @@ -297,7 +297,7 @@ Get a list of operation log lines of the running operation by the specified oper | Name | Description | Type | |:--------|:--------------------------------------|:-----| -| maxRows | The max row that are pulled each time | Int | +| maxrows | The max row that are pulled each time | Int | #### Response Body @@ -410,12 +410,6 @@ The [Batch](#batch). Kill the batch if it is still running. -#### Request Parameters - -| Name | Description | Type | -|:------------------------|:------------------------------|:-----------------| -| hive.server2.proxy.user | the proxy user to impersonate | String(optional) | - #### Response Body | Name | Description | Type | @@ -468,8 +462,12 @@ Delete the specified engine. | type | the engine type | String(optional) | | sharelevel | the engine share level | String(optional) | | subdomain | the engine subdomain | String(optional) | +| proxyUser | the proxy user to impersonate | String(optional) | | hive.server2.proxy.user | the proxy user to impersonate | String(optional) | +`proxyUser` is an alternative to `hive.server2.proxy.user`, and the current behavior is consistent with +`hive.server2.proxy.user`. When both parameters are set, `proxyUser` takes precedence. + ### GET /admin/engine Get a list of satisfied engines. @@ -481,8 +479,12 @@ Get a list of satisfied engines. | type | the engine type | String(optional) | | sharelevel | the engine share level | String(optional) | | subdomain | the engine subdomain | String(optional) | +| proxyUser | the proxy user to impersonate | String(optional) | | hive.server2.proxy.user | the proxy user to impersonate | String(optional) | +`proxyUser` is an alternative to hive.server2.proxy.user, and the current behavior is consistent with +hive.server2.proxy.user. When both parameters are set, proxyUser takes precedence. + #### Response Body The [Engine](#engine) List. diff --git a/docs/client/ui/engine_ui.md b/docs/client/ui/engine_ui.md new file mode 100644 index 00000000000..312606eca5e --- /dev/null +++ b/docs/client/ui/engine_ui.md @@ -0,0 +1,39 @@ + + +# Engine UI + +This engine UI is able to help you understand status of the engine behind Kyuubi servers. + +## Engine Management Details + +The Engine UI offers an Engine Management feature on the left side of UI page. This allows users to access detailed information about the engines. +However, not all available engines are displayed by default. Thus, users have to add correct filter conditions to get engines they prefer. After setting the right conditions, please click on 'search' button. +The engines that meet your specified requirements should be listed on the page as the below picture shown. + +![workspace](../../imgs/ui/engine_ui.png) + +| Name | Description | +|:---------------|| +| Engine address | The engine IP address | +| Engine ID | The unique identifier of engine | +| Engine Type | The engine types(only SPARK-SQL engine can be shown in this page now) | +| Share Level | The share level of engine, such as user, connection, group and server | +| User | The user created the engine | +| Version | The version of the Kyuubi server associated with this engine | +| Operation | Extra operations that users can do further.
1. View native engine UI
find and select the engine you wish to view its native UI.
clink on the view button, you should be redirected to the native engine UI powered by Kyuubi proxy.
2. Delete the specified engine gracefully
select the specific engine you would like to delete from the Engine Management page.
click on delete button and confirm your choice, then the engine will be remove from service discovery like Zookeeper, ETCD and etc.
The engine will eventually be shut down once all connected session closed. | + diff --git a/docs/client/ui/index.rst b/docs/client/ui/index.rst index 63a02cbd484..7ac3b9d2881 100644 --- a/docs/client/ui/index.rst +++ b/docs/client/ui/index.rst @@ -20,5 +20,5 @@ Web UI .. toctree:: :maxdepth: 2 - hive_beeline + engine_ui diff --git a/docs/conf.py b/docs/conf.py index eaac1acedef..d75f819b3c2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -126,8 +126,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] -html_css_files = ["css/custom.css"] +html_static_path = [] htmlhelp_basename = 'Recommonmarkdoc' github_doc_root = 'https://github.com/apache/kyuubi/tree/master/docs/' diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md index 66fa4557893..20a1bf8d93f 100644 --- a/docs/configuration/settings.md +++ b/docs/configuration/settings.md @@ -120,82 +120,91 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Engine -| Key | Default | Meaning | Type | Since | -|----------------------------------------------------------|---------------------------||----------|-------| -| kyuubi.engine.chat.extra.classpath | <undefined> | The extra classpath for the Chat engine, for configuring the location of the SDK and etc. | string | 1.8.0 | -| kyuubi.engine.chat.gpt.apiKey | <undefined> | The key to access OpenAI open API, which could be got at https://platform.openai.com/account/api-keys | string | 1.8.0 | -| kyuubi.engine.chat.gpt.http.connect.timeout | PT2M | The timeout[ms] for establishing the connection with the Chat GPT server. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | -| kyuubi.engine.chat.gpt.http.proxy | <undefined> | HTTP proxy url for API calling in Chat GPT engine. e.g. http://127.0.0.1:1087 | string | 1.8.0 | -| kyuubi.engine.chat.gpt.http.socket.timeout | PT2M | The timeout[ms] for waiting for data packets after Chat GPT server connection is established. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | -| kyuubi.engine.chat.gpt.model | gpt-3.5-turbo | ID of the model used in ChatGPT. Available models refer to OpenAI's [Model overview](https://platform.openai.com/docs/models/overview). | string | 1.8.0 | -| kyuubi.engine.chat.java.options | <undefined> | The extra Java options for the Chat engine | string | 1.8.0 | -| kyuubi.engine.chat.memory | 1g | The heap memory for the Chat engine | string | 1.8.0 | -| kyuubi.engine.chat.provider | ECHO | The provider for the Chat engine. Candidates:
  • ECHO: simply replies a welcome message.
  • GPT: a.k.a ChatGPT, powered by OpenAI.
| string | 1.8.0 | -| kyuubi.engine.connection.url.use.hostname | true | (deprecated) When true, the engine registers with hostname to zookeeper. When Spark runs on K8s with cluster mode, set to false to ensure that server can connect to engine | boolean | 1.3.0 | -| kyuubi.engine.deregister.exception.classes || A comma-separated list of exception classes. If there is any exception thrown, whose class matches the specified classes, the engine would deregister itself. | set | 1.2.0 | -| kyuubi.engine.deregister.exception.messages || A comma-separated list of exception messages. If there is any exception thrown, whose message or stacktrace matches the specified message list, the engine would deregister itself. | set | 1.2.0 | -| kyuubi.engine.deregister.exception.ttl | PT30M | Time to live(TTL) for exceptions pattern specified in kyuubi.engine.deregister.exception.classes and kyuubi.engine.deregister.exception.messages to deregister engines. Once the total error count hits the kyuubi.engine.deregister.job.max.failures within the TTL, an engine will deregister itself and wait for self-terminated. Otherwise, we suppose that the engine has recovered from temporary failures. | duration | 1.2.0 | -| kyuubi.engine.deregister.job.max.failures | 4 | Number of failures of job before deregistering the engine. | int | 1.2.0 | -| kyuubi.engine.event.json.log.path | file:///tmp/kyuubi/events | The location where all the engine events go for the built-in JSON logger.
  • Local Path: start with 'file://'
  • HDFS Path: start with 'hdfs://'
| string | 1.3.0 | -| kyuubi.engine.event.loggers | SPARK | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
  • SPARK: the events will be written to the Spark listener bus.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: User-defined event handlers.
Note that: Kyuubi supports custom event handlers with the Java SPI. To register a custom event handler, the user needs to implement a subclass of `org.apache.kyuubi.events.handler.CustomEventHandlerProvider` which has a zero-arg constructor. | seq | 1.3.0 | -| kyuubi.engine.flink.application.jars | <undefined> | A comma-separated list of the local jars to be shipped with the job to the cluster. For example, SQL UDF jars. Only effective in yarn application mode. | string | 1.8.0 | -| kyuubi.engine.flink.extra.classpath | <undefined> | The extra classpath for the Flink SQL engine, for configuring the location of hadoop client jars, etc. Only effective in yarn session mode. | string | 1.6.0 | -| kyuubi.engine.flink.java.options | <undefined> | The extra Java options for the Flink SQL engine. Only effective in yarn session mode. | string | 1.6.0 | -| kyuubi.engine.flink.memory | 1g | The heap memory for the Flink SQL engine. Only effective in yarn session mode. | string | 1.6.0 | -| kyuubi.engine.hive.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: to be done.
| seq | 1.7.0 | -| kyuubi.engine.hive.extra.classpath | <undefined> | The extra classpath for the Hive query engine, for configuring location of the hadoop client jars and etc. | string | 1.6.0 | -| kyuubi.engine.hive.java.options | <undefined> | The extra Java options for the Hive query engine | string | 1.6.0 | -| kyuubi.engine.hive.memory | 1g | The heap memory for the Hive query engine | string | 1.6.0 | -| kyuubi.engine.initialize.sql | SHOW DATABASES | SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SHOW DATABASES` to eagerly active HiveClient. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.2.0 | -| kyuubi.engine.jdbc.connection.password | <undefined> | The password is used for connecting to server | string | 1.6.0 | -| kyuubi.engine.jdbc.connection.propagateCredential | false | Whether to use the session's user and password to connect to database | boolean | 1.8.0 | -| kyuubi.engine.jdbc.connection.properties || The additional properties are used for connecting to server | seq | 1.6.0 | -| kyuubi.engine.jdbc.connection.provider | <undefined> | The connection provider is used for getting a connection from the server | string | 1.6.0 | -| kyuubi.engine.jdbc.connection.url | <undefined> | The server url that engine will connect to | string | 1.6.0 | -| kyuubi.engine.jdbc.connection.user | <undefined> | The user is used for connecting to server | string | 1.6.0 | -| kyuubi.engine.jdbc.driver.class | <undefined> | The driver class for JDBC engine connection | string | 1.6.0 | -| kyuubi.engine.jdbc.extra.classpath | <undefined> | The extra classpath for the JDBC query engine, for configuring the location of the JDBC driver and etc. | string | 1.6.0 | -| kyuubi.engine.jdbc.initialize.sql | SELECT 1 | SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SELECT 1` to eagerly active JDBCClient. | seq | 1.8.0 | -| kyuubi.engine.jdbc.java.options | <undefined> | The extra Java options for the JDBC query engine | string | 1.6.0 | -| kyuubi.engine.jdbc.memory | 1g | The heap memory for the JDBC query engine | string | 1.6.0 | -| kyuubi.engine.jdbc.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. | seq | 1.8.0 | -| kyuubi.engine.jdbc.type | <undefined> | The short name of JDBC type | string | 1.6.0 | -| kyuubi.engine.kubernetes.submit.timeout | PT30S | The engine submit timeout for Kubernetes application. | duration | 1.7.2 | -| kyuubi.engine.operation.convert.catalog.database.enabled | true | When set to true, The engine converts the JDBC methods of set/get Catalog and set/get Schema to the implementation of different engines | boolean | 1.6.0 | -| kyuubi.engine.operation.log.dir.root | engine_operation_logs | Root directory for query operation log at engine-side. | string | 1.4.0 | -| kyuubi.engine.pool.name | engine-pool | The name of the engine pool. | string | 1.5.0 | -| kyuubi.engine.pool.selectPolicy | RANDOM | The select policy of an engine from the corresponding engine pool engine for a session.
  • RANDOM - Randomly use the engine in the pool
  • POLLING - Polling use the engine in the pool
| string | 1.7.0 | -| kyuubi.engine.pool.size | -1 | The size of the engine pool. Note that, if the size is less than 1, the engine pool will not be enabled; otherwise, the size of the engine pool will be min(this, kyuubi.engine.pool.size.threshold). | int | 1.4.0 | -| kyuubi.engine.pool.size.threshold | 9 | This parameter is introduced as a server-side parameter controlling the upper limit of the engine pool. | int | 1.4.0 | -| kyuubi.engine.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.3.0 | -| kyuubi.engine.share.level | USER | Engines will be shared in different levels, available configs are:
  • CONNECTION: engine will not be shared but only used by the current client connection
  • USER: engine will be shared by all sessions created by a unique username, see also kyuubi.engine.share.level.subdomain
  • GROUP: the engine will be shared by all sessions created by all users belong to the same primary group name. The engine will be launched by the group name as the effective username, so here the group name is in value of special user who is able to visit the computing resources/data of the team. It follows the [Hadoop GroupsMapping](https://reurl.cc/xE61Y5) to map user to a primary group. If the primary group is not found, it fallback to the USER level.
  • SERVER: the App will be shared by Kyuubi servers
| string | 1.2.0 | -| kyuubi.engine.share.level.sub.domain | <undefined> | (deprecated) - Using kyuubi.engine.share.level.subdomain instead | string | 1.2.0 | -| kyuubi.engine.share.level.subdomain | <undefined> | Allow end-users to create a subdomain for the share level of an engine. A subdomain is a case-insensitive string values that must be a valid zookeeper subpath. For example, for the `USER` share level, an end-user can share a certain engine within a subdomain, not for all of its clients. End-users are free to create multiple engines in the `USER` share level. When disable engine pool, use 'default' if absent. | string | 1.4.0 | -| kyuubi.engine.single.spark.session | false | When set to true, this engine is running in a single session mode. All the JDBC/ODBC connections share the temporary views, function registries, SQL configuration and the current database. | boolean | 1.3.0 | -| kyuubi.engine.spark.event.loggers | SPARK | A comma-separated list of engine loggers, where engine/session/operation etc events go.
  • SPARK: the events will be written to the Spark listener bus.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: to be done.
| seq | 1.7.0 | -| kyuubi.engine.spark.python.env.archive | <undefined> | Portable Python env archive used for Spark engine Python language mode. | string | 1.7.0 | -| kyuubi.engine.spark.python.env.archive.exec.path | bin/python | The Python exec path under the Python env archive. | string | 1.7.0 | -| kyuubi.engine.spark.python.home.archive | <undefined> | Spark archive containing $SPARK_HOME/python directory, which is used to init session Python worker for Python language mode. | string | 1.7.0 | -| kyuubi.engine.submit.timeout | PT30S | Period to tolerant Driver Pod ephemerally invisible after submitting. In some Resource Managers, e.g. K8s, the Driver Pod is not visible immediately after `spark-submit` is returned. | duration | 1.7.1 | -| kyuubi.engine.trino.connection.keystore.password | <undefined> | The keystore password used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.keystore.path | <undefined> | The keystore path used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.keystore.type | <undefined> | The keystore type used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.password | <undefined> | The password used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.truststore.password | <undefined> | The truststore password used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.truststore.path | <undefined> | The truststore path used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.truststore.type | <undefined> | The truststore type used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: to be done.
| seq | 1.7.0 | -| kyuubi.engine.trino.extra.classpath | <undefined> | The extra classpath for the Trino query engine, for configuring other libs which may need by the Trino engine | string | 1.6.0 | -| kyuubi.engine.trino.java.options | <undefined> | The extra Java options for the Trino query engine | string | 1.6.0 | -| kyuubi.engine.trino.memory | 1g | The heap memory for the Trino query engine | string | 1.6.0 | -| kyuubi.engine.type | SPARK_SQL | Specify the detailed engine supported by Kyuubi. The engine type bindings to SESSION scope. This configuration is experimental. Currently, available configs are:
  • SPARK_SQL: specify this engine type will launch a Spark engine which can provide all the capacity of the Apache Spark. Note, it's a default engine type.
  • FLINK_SQL: specify this engine type will launch a Flink engine which can provide all the capacity of the Apache Flink.
  • TRINO: specify this engine type will launch a Trino engine which can provide all the capacity of the Trino.
  • HIVE_SQL: specify this engine type will launch a Hive engine which can provide all the capacity of the Hive Server2.
  • JDBC: specify this engine type will launch a JDBC engine which can forward queries to the database system through the certain JDBC driver, for now, it supports Doris and Phoenix.
  • CHAT: specify this engine type will launch a Chat engine.
| string | 1.4.0 | -| kyuubi.engine.ui.retainedSessions | 200 | The number of SQL client sessions kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | -| kyuubi.engine.ui.retainedStatements | 200 | The number of statements kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | -| kyuubi.engine.ui.stop.enabled | true | When true, allows Kyuubi engine to be killed from the Spark Web UI. | boolean | 1.3.0 | -| kyuubi.engine.user.isolated.spark.session | true | When set to false, if the engine is running in a group or server share level, all the JDBC/ODBC connections will be isolated against the user. Including the temporary views, function registries, SQL configuration, and the current database. Note that, it does not affect if the share level is connection or user. | boolean | 1.6.0 | -| kyuubi.engine.user.isolated.spark.session.idle.interval | PT1M | The interval to check if the user-isolated Spark session is timeout. | duration | 1.6.0 | -| kyuubi.engine.user.isolated.spark.session.idle.timeout | PT6H | If kyuubi.engine.user.isolated.spark.session is false, we will release the Spark session if its corresponding user is inactive after this configured timeout. | duration | 1.6.0 | -| kyuubi.engine.yarn.submit.timeout | PT30S | The engine submit timeout for YARN application. | duration | 1.7.2 | +| Key | Default | Meaning | Type | Since | +|----------------------------------------------------------|---------------------------||----------|-------| +| kyuubi.engine.chat.ernie.http.connect.timeout | PT2M | The timeout[ms] for establishing the connection with the ernie bot server. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.9.0 | +| kyuubi.engine.chat.ernie.http.proxy | <undefined> | HTTP proxy url for API calling in ernie bot engine. e.g. http://127.0.0.1:1088 | string | 1.9.0 | +| kyuubi.engine.chat.ernie.http.socket.timeout | PT2M | The timeout[ms] for waiting for data packets after ernie bot server connection is established. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.9.0 | +| kyuubi.engine.chat.ernie.model | completions | ID of the model used in ernie bot. Available models are completions_pro, ernie_bot_8k, completions and eb-instant[Model overview](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/6lp69is2a). | string | 1.9.0 | +| kyuubi.engine.chat.ernie.token | <undefined> | The token to access ernie bot open API, which could be got at https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Ilkkrb0i5 | string | 1.9.0 | +| kyuubi.engine.chat.extra.classpath | <undefined> | The extra classpath for the Chat engine, for configuring the location of the SDK and etc. | string | 1.8.0 | +| kyuubi.engine.chat.gpt.apiKey | <undefined> | The key to access OpenAI open API, which could be got at https://platform.openai.com/account/api-keys | string | 1.8.0 | +| kyuubi.engine.chat.gpt.http.connect.timeout | PT2M | The timeout[ms] for establishing the connection with the Chat GPT server. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | +| kyuubi.engine.chat.gpt.http.proxy | <undefined> | HTTP proxy url for API calling in Chat GPT engine. e.g. http://127.0.0.1:1087 | string | 1.8.0 | +| kyuubi.engine.chat.gpt.http.socket.timeout | PT2M | The timeout[ms] for waiting for data packets after Chat GPT server connection is established. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | +| kyuubi.engine.chat.gpt.model | gpt-3.5-turbo | ID of the model used in ChatGPT. Available models refer to OpenAI's [Model overview](https://platform.openai.com/docs/models/overview). | string | 1.8.0 | +| kyuubi.engine.chat.java.options | <undefined> | The extra Java options for the Chat engine | string | 1.8.0 | +| kyuubi.engine.chat.memory | 1g | The heap memory for the Chat engine | string | 1.8.0 | +| kyuubi.engine.chat.provider | ECHO | The provider for the Chat engine. Candidates:
  • ECHO: simply replies a welcome message.
  • GPT: a.k.a ChatGPT, powered by OpenAI.
  • ERNIE: ErnieBot, powered by Baidu.
| string | 1.8.0 | +| kyuubi.engine.connection.url.use.hostname | true | (deprecated) When true, the engine registers with hostname to zookeeper. When Spark runs on K8s with cluster mode, set to false to ensure that server can connect to engine | boolean | 1.3.0 | +| kyuubi.engine.deregister.exception.classes || A comma-separated list of exception classes. If there is any exception thrown, whose class matches the specified classes, the engine would deregister itself. | set | 1.2.0 | +| kyuubi.engine.deregister.exception.messages || A comma-separated list of exception messages. If there is any exception thrown, whose message or stacktrace matches the specified message list, the engine would deregister itself. | set | 1.2.0 | +| kyuubi.engine.deregister.exception.ttl | PT30M | Time to live(TTL) for exceptions pattern specified in kyuubi.engine.deregister.exception.classes and kyuubi.engine.deregister.exception.messages to deregister engines. Once the total error count hits the kyuubi.engine.deregister.job.max.failures within the TTL, an engine will deregister itself and wait for self-terminated. Otherwise, we suppose that the engine has recovered from temporary failures. | duration | 1.2.0 | +| kyuubi.engine.deregister.job.max.failures | 4 | Number of failures of job before deregistering the engine. | int | 1.2.0 | +| kyuubi.engine.event.json.log.path | file:///tmp/kyuubi/events | The location where all the engine events go for the built-in JSON logger.
  • Local Path: start with 'file://'
  • HDFS Path: start with 'hdfs://'
| string | 1.3.0 | +| kyuubi.engine.event.loggers | SPARK | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
  • SPARK: the events will be written to the Spark listener bus.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: User-defined event handlers.
Note that: Kyuubi supports custom event handlers with the Java SPI. To register a custom event handler, the user needs to implement a subclass of `org.apache.kyuubi.events.handler.CustomEventHandlerProvider` which has a zero-arg constructor. | seq | 1.3.0 | +| kyuubi.engine.flink.application.jars | <undefined> | A comma-separated list of the local jars to be shipped with the job to the cluster. For example, SQL UDF jars. Only effective in yarn application mode. | string | 1.8.0 | +| kyuubi.engine.flink.extra.classpath | <undefined> | The extra classpath for the Flink SQL engine, for configuring the location of hadoop client jars, etc. Only effective in yarn session mode. | string | 1.6.0 | +| kyuubi.engine.flink.initialize.sql | SHOW DATABASES | The initialize sql for Flink engine. It fallback to `kyuubi.engine.initialize.sql`. | seq | 1.8.1 | +| kyuubi.engine.flink.java.options | <undefined> | The extra Java options for the Flink SQL engine. Only effective in yarn session mode. | string | 1.6.0 | +| kyuubi.engine.flink.memory | 1g | The heap memory for the Flink SQL engine. Only effective in yarn session mode. | string | 1.6.0 | +| kyuubi.engine.hive.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: to be done.
| seq | 1.7.0 | +| kyuubi.engine.hive.extra.classpath | <undefined> | The extra classpath for the Hive query engine, for configuring location of the hadoop client jars and etc. | string | 1.6.0 | +| kyuubi.engine.hive.java.options | <undefined> | The extra Java options for the Hive query engine | string | 1.6.0 | +| kyuubi.engine.hive.memory | 1g | The heap memory for the Hive query engine | string | 1.6.0 | +| kyuubi.engine.initialize.sql | SHOW DATABASES | SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SHOW DATABASES` to eagerly active HiveClient. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.2.0 | +| kyuubi.engine.jdbc.connection.password | <undefined> | The password is used for connecting to server | string | 1.6.0 | +| kyuubi.engine.jdbc.connection.propagateCredential | false | Whether to use the session's user and password to connect to database | boolean | 1.8.0 | +| kyuubi.engine.jdbc.connection.properties || The additional properties are used for connecting to server | seq | 1.6.0 | +| kyuubi.engine.jdbc.connection.provider | <undefined> | A JDBC connection provider plugin for the Kyuubi Server to establish a connection to the JDBC URL. The configuration value should be a subclass of `org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider`. Kyuubi provides the following built-in implementations:
  • doris: For establishing Doris connections.
  • mysql: For establishing MySQL connections.
  • phoenix: For establishing Phoenix connections.
  • postgresql: For establishing PostgreSQL connections.
  • starrocks: For establishing StarRocks connections.
  • | string | 1.6.0 | +| kyuubi.engine.jdbc.connection.url | <undefined> | The server url that engine will connect to | string | 1.6.0 | +| kyuubi.engine.jdbc.connection.user | <undefined> | The user is used for connecting to server | string | 1.6.0 | +| kyuubi.engine.jdbc.driver.class | <undefined> | The driver class for JDBC engine connection | string | 1.6.0 | +| kyuubi.engine.jdbc.extra.classpath | <undefined> | The extra classpath for the JDBC query engine, for configuring the location of the JDBC driver and etc. | string | 1.6.0 | +| kyuubi.engine.jdbc.fetch.size | 1000 | The fetch size of JDBC engine | int | 1.9.0 | +| kyuubi.engine.jdbc.initialize.sql | SELECT 1 | SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SELECT 1` to eagerly active JDBCClient. | seq | 1.8.0 | +| kyuubi.engine.jdbc.java.options | <undefined> | The extra Java options for the JDBC query engine | string | 1.6.0 | +| kyuubi.engine.jdbc.memory | 1g | The heap memory for the JDBC query engine | string | 1.6.0 | +| kyuubi.engine.jdbc.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. | seq | 1.8.0 | +| kyuubi.engine.jdbc.type | <undefined> | The short name of JDBC type | string | 1.6.0 | +| kyuubi.engine.kubernetes.submit.timeout | PT30S | The engine submit timeout for Kubernetes application. | duration | 1.7.2 | +| kyuubi.engine.operation.convert.catalog.database.enabled | true | When set to true, The engine converts the JDBC methods of set/get Catalog and set/get Schema to the implementation of different engines | boolean | 1.6.0 | +| kyuubi.engine.operation.log.dir.root | engine_operation_logs | Root directory for query operation log at engine-side. | string | 1.4.0 | +| kyuubi.engine.pool.name | engine-pool | The name of the engine pool. | string | 1.5.0 | +| kyuubi.engine.pool.selectPolicy | RANDOM | The select policy of an engine from the corresponding engine pool engine for a session.
    • RANDOM - Randomly use the engine in the pool
    • POLLING - Polling use the engine in the pool
    | string | 1.7.0 | +| kyuubi.engine.pool.size | -1 | The size of the engine pool. Note that, if the size is less than 1, the engine pool will not be enabled; otherwise, the size of the engine pool will be min(this, kyuubi.engine.pool.size.threshold). | int | 1.4.0 | +| kyuubi.engine.pool.size.threshold | 9 | This parameter is introduced as a server-side parameter controlling the upper limit of the engine pool. | int | 1.4.0 | +| kyuubi.engine.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.3.0 | +| kyuubi.engine.share.level | USER | Engines will be shared in different levels, available configs are:
    • CONNECTION: engine will not be shared but only used by the current client connection
    • USER: engine will be shared by all sessions created by a unique username, see also kyuubi.engine.share.level.subdomain
    • GROUP: the engine will be shared by all sessions created by all users belong to the same primary group name. The engine will be launched by the group name as the effective username, so here the group name is in value of special user who is able to visit the computing resources/data of the team. It follows the [Hadoop GroupsMapping](https://reurl.cc/xE61Y5) to map user to a primary group. If the primary group is not found, it fallback to the USER level.
    • SERVER: the App will be shared by Kyuubi servers
    | string | 1.2.0 | +| kyuubi.engine.share.level.sub.domain | <undefined> | (deprecated) - Using kyuubi.engine.share.level.subdomain instead | string | 1.2.0 | +| kyuubi.engine.share.level.subdomain | <undefined> | Allow end-users to create a subdomain for the share level of an engine. A subdomain is a case-insensitive string values that must be a valid zookeeper subpath. For example, for the `USER` share level, an end-user can share a certain engine within a subdomain, not for all of its clients. End-users are free to create multiple engines in the `USER` share level. When disable engine pool, use 'default' if absent. | string | 1.4.0 | +| kyuubi.engine.single.spark.session | false | When set to true, this engine is running in a single session mode. All the JDBC/ODBC connections share the temporary views, function registries, SQL configuration and the current database. | boolean | 1.3.0 | +| kyuubi.engine.spark.event.loggers | SPARK | A comma-separated list of engine loggers, where engine/session/operation etc events go.
    • SPARK: the events will be written to the Spark listener bus.
    • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
    • JDBC: to be done
    • CUSTOM: to be done.
    | seq | 1.7.0 | +| kyuubi.engine.spark.initialize.sql | SHOW DATABASES | The initialize sql for Spark engine. It fallback to `kyuubi.engine.initialize.sql`. | seq | 1.8.1 | +| kyuubi.engine.spark.output.mode | AUTO | The output mode of Spark engine:
    • AUTO: For PySpark, the extracted `text/plain` from python response as output.
    • NOTEBOOK: For PySpark, the original python response as output.
    | string | 1.9.0 | +| kyuubi.engine.spark.python.env.archive | <undefined> | Portable Python env archive used for Spark engine Python language mode. | string | 1.7.0 | +| kyuubi.engine.spark.python.env.archive.exec.path | bin/python | The Python exec path under the Python env archive. | string | 1.7.0 | +| kyuubi.engine.spark.python.home.archive | <undefined> | Spark archive containing $SPARK_HOME/python directory, which is used to init session Python worker for Python language mode. | string | 1.7.0 | +| kyuubi.engine.submit.timeout | PT30S | Period to tolerant Driver Pod ephemerally invisible after submitting. In some Resource Managers, e.g. K8s, the Driver Pod is not visible immediately after `spark-submit` is returned. | duration | 1.7.1 | +| kyuubi.engine.trino.connection.keystore.password | <undefined> | The keystore password used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.keystore.path | <undefined> | The keystore path used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.keystore.type | <undefined> | The keystore type used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.password | <undefined> | The password used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.truststore.password | <undefined> | The truststore password used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.truststore.path | <undefined> | The truststore path used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.truststore.type | <undefined> | The truststore type used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
    • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
    • JDBC: to be done
    • CUSTOM: to be done.
    | seq | 1.7.0 | +| kyuubi.engine.trino.extra.classpath | <undefined> | The extra classpath for the Trino query engine, for configuring other libs which may need by the Trino engine | string | 1.6.0 | +| kyuubi.engine.trino.java.options | <undefined> | The extra Java options for the Trino query engine | string | 1.6.0 | +| kyuubi.engine.trino.memory | 1g | The heap memory for the Trino query engine | string | 1.6.0 | +| kyuubi.engine.type | SPARK_SQL | Specify the detailed engine supported by Kyuubi. The engine type bindings to SESSION scope. This configuration is experimental. Currently, available configs are:
    • SPARK_SQL: specify this engine type will launch a Spark engine which can provide all the capacity of the Apache Spark. Note, it's a default engine type.
    • FLINK_SQL: specify this engine type will launch a Flink engine which can provide all the capacity of the Apache Flink.
    • TRINO: specify this engine type will launch a Trino engine which can provide all the capacity of the Trino.
    • HIVE_SQL: specify this engine type will launch a Hive engine which can provide all the capacity of the Hive Server2.
    • JDBC: specify this engine type will launch a JDBC engine which can forward queries to the database system through the certain JDBC driver, for now, it supports Doris, MySQL, Phoenix, PostgreSQL and StarRocks.
    • CHAT: specify this engine type will launch a Chat engine.
    | string | 1.4.0 | +| kyuubi.engine.ui.retainedSessions | 200 | The number of SQL client sessions kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | +| kyuubi.engine.ui.retainedStatements | 200 | The number of statements kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | +| kyuubi.engine.ui.stop.enabled | true | When true, allows Kyuubi engine to be killed from the Spark Web UI. | boolean | 1.3.0 | +| kyuubi.engine.user.isolated.spark.session | true | When set to false, if the engine is running in a group or server share level, all the JDBC/ODBC connections will be isolated against the user. Including the temporary views, function registries, SQL configuration, and the current database. Note that, it does not affect if the share level is connection or user. | boolean | 1.6.0 | +| kyuubi.engine.user.isolated.spark.session.idle.interval | PT1M | The interval to check if the user-isolated Spark session is timeout. | duration | 1.6.0 | +| kyuubi.engine.user.isolated.spark.session.idle.timeout | PT6H | If kyuubi.engine.user.isolated.spark.session is false, we will release the Spark session if its corresponding user is inactive after this configured timeout. | duration | 1.6.0 | +| kyuubi.engine.yarn.submit.timeout | PT30S | The engine submit timeout for YARN application. | duration | 1.7.2 | ### Event @@ -309,20 +318,26 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Kubernetes -| Key | Default | Meaning | Type | Since | -|-----------------------------------------------------|-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.kubernetes.authenticate.caCertFile | <undefined> | Path to the CA cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | -| kyuubi.kubernetes.authenticate.clientCertFile | <undefined> | Path to the client cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | -| kyuubi.kubernetes.authenticate.clientKeyFile | <undefined> | Path to the client key file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | -| kyuubi.kubernetes.authenticate.oauthToken | <undefined> | The OAuth token to use when authenticating against the Kubernetes API server. Note that unlike, the other authentication options, this must be the exact string value of the token to use for the authentication. | string | 1.7.0 | -| kyuubi.kubernetes.authenticate.oauthTokenFile | <undefined> | Path to the file containing the OAuth token to use when authenticating against the Kubernetes API server. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | -| kyuubi.kubernetes.context | <undefined> | The desired context from your kubernetes config file used to configure the K8s client for interacting with the cluster. | string | 1.6.0 | -| kyuubi.kubernetes.context.allow.list || The allowed kubernetes context list, if it is empty, there is no kubernetes context limitation. | set | 1.8.0 | -| kyuubi.kubernetes.master.address | <undefined> | The internal Kubernetes master (API server) address to be used for kyuubi. | string | 1.7.0 | -| kyuubi.kubernetes.namespace | default | The namespace that will be used for running the kyuubi pods and find engines. | string | 1.7.0 | -| kyuubi.kubernetes.namespace.allow.list || The allowed kubernetes namespace list, if it is empty, there is no kubernetes namespace limitation. | set | 1.8.0 | -| kyuubi.kubernetes.terminatedApplicationRetainPeriod | PT5M | The period for which the Kyuubi server retains application information after the application terminates. | duration | 1.7.1 | -| kyuubi.kubernetes.trust.certificates | false | If set to true then client can submit to kubernetes cluster only with token | boolean | 1.7.0 | +| Key | Default | Meaning | Type | Since | +|----------------------------------------------------------------------|-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.kubernetes.application.state.container | spark-kubernetes-driver | The container name to retrieve the application state from. | string | 1.8.1 | +| kyuubi.kubernetes.application.state.source | POD | The source to retrieve the application state from. The valid values are pod and container. If the source is container and there is container inside the pod with the name of kyuubi.kubernetes.application.state.container, the application state will be from the matched container state. Otherwise, the application state will be from the pod state. | string | 1.8.1 | +| kyuubi.kubernetes.authenticate.caCertFile | <undefined> | Path to the CA cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.clientCertFile | <undefined> | Path to the client cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.clientKeyFile | <undefined> | Path to the client key file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.oauthToken | <undefined> | The OAuth token to use when authenticating against the Kubernetes API server. Note that unlike, the other authentication options, this must be the exact string value of the token to use for the authentication. | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.oauthTokenFile | <undefined> | Path to the file containing the OAuth token to use when authenticating against the Kubernetes API server. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.context | <undefined> | The desired context from your kubernetes config file used to configure the K8s client for interacting with the cluster. | string | 1.6.0 | +| kyuubi.kubernetes.context.allow.list || The allowed kubernetes context list, if it is empty, there is no kubernetes context limitation. | set | 1.8.0 | +| kyuubi.kubernetes.master.address | <undefined> | The internal Kubernetes master (API server) address to be used for kyuubi. | string | 1.7.0 | +| kyuubi.kubernetes.namespace | default | The namespace that will be used for running the kyuubi pods and find engines. | string | 1.7.0 | +| kyuubi.kubernetes.namespace.allow.list || The allowed kubernetes namespace list, if it is empty, there is no kubernetes namespace limitation. | set | 1.8.0 | +| kyuubi.kubernetes.spark.cleanupTerminatedDriverPod.checkInterval | PT1M | Kyuubi server use guava cache as the cleanup trigger with time-based eviction, but the eviction would not happened until any get/put operation happened. This option schedule a daemon thread evict cache periodically. | duration | 1.8.1 | +| kyuubi.kubernetes.spark.cleanupTerminatedDriverPod.kind | NONE | Kyuubi server will delete the spark driver pod after the application terminates for kyuubi.kubernetes.terminatedApplicationRetainPeriod. Available options are NONE, ALL, COMPLETED and default value is None which means none of the pod will be deleted | string | 1.8.1 | +| kyuubi.kubernetes.spark.forciblyRewriteDriverPodName.enabled | false | Whether to forcibly rewrite Spark driver pod name with 'kyuubi--driver'. If disabled, Kyuubi will try to preserve the application name while satisfying K8s' pod name policy, but some vendors may have stricter pod name policies, thus the generated name may become illegal. | boolean | 1.8.1 | +| kyuubi.kubernetes.spark.forciblyRewriteExecutorPodNamePrefix.enabled | false | Whether to forcibly rewrite Spark executor pod name prefix with 'kyuubi-'. If disabled, Kyuubi will try to preserve the application name while satisfying K8s' pod name policy, but some vendors may have stricter Pod name policies, thus the generated name may become illegal. | boolean | 1.8.1 | +| kyuubi.kubernetes.terminatedApplicationRetainPeriod | PT5M | The period for which the Kyuubi server retains application information after the application terminates. | duration | 1.7.1 | +| kyuubi.kubernetes.trust.certificates | false | If set to true then client can submit to kubernetes cluster only with token | boolean | 1.7.0 | ### Lineage @@ -348,7 +363,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.metadata.store.jdbc.driver | <undefined> | JDBC driver class name for server jdbc metadata store. | string | 1.6.0 | | kyuubi.metadata.store.jdbc.password || The password for server JDBC metadata store. | string | 1.6.0 | | kyuubi.metadata.store.jdbc.priority.enabled | false | Whether to enable the priority scheduling for batch impl v2. When false, ignore kyuubi.batch.priority and use the FIFO ordering strategy for batch job scheduling. Note: this feature may cause significant performance issues when using MySQL 5.7 as the metastore backend due to the lack of support for mixed order index. See more details at KYUUBI #5329. | boolean | 1.8.0 | -| kyuubi.metadata.store.jdbc.url | jdbc:sqlite:kyuubi_state_store.db | The JDBC url for server JDBC metadata store. By default, it is a SQLite database url, and the state information is not shared across kyuubi instances. To enable high availability for multiple kyuubi instances, please specify a production JDBC url. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.url | jdbc:sqlite:<KYUUBI_HOME>/kyuubi_state_store.db | The JDBC url for server JDBC metadata store. By default, it is a SQLite database url, and the state information is not shared across Kyuubi instances. To enable high availability for multiple kyuubi instances, please specify a production JDBC url. Note: this value support the variables substitution: ``. | string | 1.6.0 | | kyuubi.metadata.store.jdbc.user || The username for server JDBC metadata store. | string | 1.6.0 | ### Metrics @@ -381,28 +396,31 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.operation.result.arrow.timestampAsString | false | When true, arrow-based rowsets will convert columns of type timestamp to strings for transmission. | boolean | 1.7.0 | | kyuubi.operation.result.format | thrift | Specify the result format, available configs are:
    • THRIFT: the result will convert to TRow at the engine driver side.
    • ARROW: the result will be encoded as Arrow at the executor side before collecting by the driver, and deserialized at the client side. note that it only takes effect for kyuubi-hive-jdbc clients now.
    | string | 1.7.0 | | kyuubi.operation.result.max.rows | 0 | Max rows of Spark query results. Rows exceeding the limit would be ignored. By setting this value to 0 to disable the max rows limit. | int | 1.6.0 | +| kyuubi.operation.result.saveToFile.dir | /tmp/kyuubi/tmp_kyuubi_result | The Spark query result save dir, it should be a public accessible to every engine. Results are saved in ORC format, and the directory structure is `/OPERATION_RESULT_SAVE_TO_FILE_DIR/engineId/sessionId/statementId`. Each query result will delete when query finished. | string | 1.9.0 | +| kyuubi.operation.result.saveToFile.enabled | false | The switch for Spark query result save to file. | boolean | 1.9.0 | +| kyuubi.operation.result.saveToFile.minSize | 209715200 | The minSize of Spark result save to file, default value is 200 MB.we use spark's `EstimationUtils#getSizePerRowestimate` to estimate the output size of the execution plan. | long | 1.9.0 | | kyuubi.operation.scheduler.pool | <undefined> | The scheduler pool of job. Note that, this config should be used after changing Spark config spark.scheduler.mode=FAIR. | string | 1.1.1 | | kyuubi.operation.spark.listener.enabled | true | When set to true, Spark engine registers an SQLOperationListener before executing the statement, logging a few summary statistics when each stage completes. | boolean | 1.6.0 | | kyuubi.operation.status.polling.timeout | PT5S | Timeout(ms) for long polling asynchronous running sql query's status | duration | 1.0.0 | ### Server -| Key | Default | Meaning | Type | Since | -|----------------------------------------------------------|-------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.server.administrators || Comma-separated list of Kyuubi service administrators. We use this config to grant admin permission to any service accounts. | set | 1.8.0 | -| kyuubi.server.info.provider | ENGINE | The server information provider name, some clients may rely on this information to check the server compatibilities and functionalities.
  • SERVER: Return Kyuubi server information.
  • ENGINE: Return Kyuubi engine information.
  • | string | 1.6.1 | -| kyuubi.server.limit.batch.connections.per.ipaddress | <undefined> | Maximum kyuubi server batch connections per ipaddress. Any user exceeding this limit will not be allowed to connect. | int | 1.7.0 | -| kyuubi.server.limit.batch.connections.per.user | <undefined> | Maximum kyuubi server batch connections per user. Any user exceeding this limit will not be allowed to connect. | int | 1.7.0 | -| kyuubi.server.limit.batch.connections.per.user.ipaddress | <undefined> | Maximum kyuubi server batch connections per user:ipaddress combination. Any user-ipaddress exceeding this limit will not be allowed to connect. | int | 1.7.0 | -| kyuubi.server.limit.client.fetch.max.rows | <undefined> | Max rows limit for getting result row set operation. If the max rows specified by client-side is larger than the limit, request will fail directly. | int | 1.8.0 | -| kyuubi.server.limit.connections.per.ipaddress | <undefined> | Maximum kyuubi server connections per ipaddress. Any user exceeding this limit will not be allowed to connect. | int | 1.6.0 | -| kyuubi.server.limit.connections.per.user | <undefined> | Maximum kyuubi server connections per user. Any user exceeding this limit will not be allowed to connect. | int | 1.6.0 | -| kyuubi.server.limit.connections.per.user.ipaddress | <undefined> | Maximum kyuubi server connections per user:ipaddress combination. Any user-ipaddress exceeding this limit will not be allowed to connect. | int | 1.6.0 | -| kyuubi.server.limit.connections.user.deny.list || The user in the deny list will be denied to connect to kyuubi server, if the user has configured both user.unlimited.list and user.deny.list, the priority of the latter is higher. | set | 1.8.0 | -| kyuubi.server.limit.connections.user.unlimited.list || The maximum connections of the user in the white list will not be limited. | set | 1.7.0 | -| kyuubi.server.name | <undefined> | The name of Kyuubi Server. | string | 1.5.0 | -| kyuubi.server.periodicGC.interval | PT30M | How often to trigger a garbage collection. | duration | 1.7.0 | -| kyuubi.server.redaction.regex | <undefined> | Regex to decide which Kyuubi contain sensitive information. When this regex matches a property key or value, the value is redacted from the various logs. || 1.6.0 | +| Key | Default | Meaning | Type | Since | +|----------------------------------------------------------|-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.server.administrators || Comma-separated list of Kyuubi service administrators. We use this config to grant admin permission to any service accounts when security mechanism is enabled. Note, when kyuubi.authentication is configured to NOSASL or NONE, everyone is treated as administrator. | set | 1.8.0 | +| kyuubi.server.info.provider | ENGINE | The server information provider name, some clients may rely on this information to check the server compatibilities and functionalities.
  • SERVER: Return Kyuubi server information.
  • ENGINE: Return Kyuubi engine information.
  • | string | 1.6.1 | +| kyuubi.server.limit.batch.connections.per.ipaddress | <undefined> | Maximum kyuubi server batch connections per ipaddress. Any user exceeding this limit will not be allowed to connect. | int | 1.7.0 | +| kyuubi.server.limit.batch.connections.per.user | <undefined> | Maximum kyuubi server batch connections per user. Any user exceeding this limit will not be allowed to connect. | int | 1.7.0 | +| kyuubi.server.limit.batch.connections.per.user.ipaddress | <undefined> | Maximum kyuubi server batch connections per user:ipaddress combination. Any user-ipaddress exceeding this limit will not be allowed to connect. | int | 1.7.0 | +| kyuubi.server.limit.client.fetch.max.rows | <undefined> | Max rows limit for getting result row set operation. If the max rows specified by client-side is larger than the limit, request will fail directly. | int | 1.8.0 | +| kyuubi.server.limit.connections.per.ipaddress | <undefined> | Maximum kyuubi server connections per ipaddress. Any user exceeding this limit will not be allowed to connect. | int | 1.6.0 | +| kyuubi.server.limit.connections.per.user | <undefined> | Maximum kyuubi server connections per user. Any user exceeding this limit will not be allowed to connect. | int | 1.6.0 | +| kyuubi.server.limit.connections.per.user.ipaddress | <undefined> | Maximum kyuubi server connections per user:ipaddress combination. Any user-ipaddress exceeding this limit will not be allowed to connect. | int | 1.6.0 | +| kyuubi.server.limit.connections.user.deny.list || The user in the deny list will be denied to connect to kyuubi server, if the user has configured both user.unlimited.list and user.deny.list, the priority of the latter is higher. | set | 1.8.0 | +| kyuubi.server.limit.connections.user.unlimited.list || The maximum connections of the user in the white list will not be limited. | set | 1.7.0 | +| kyuubi.server.name | <undefined> | The name of Kyuubi Server. | string | 1.5.0 | +| kyuubi.server.periodicGC.interval | PT30M | How often to trigger a garbage collection. | duration | 1.7.0 | +| kyuubi.server.redaction.regex | <undefined> | Regex to decide which Kyuubi contain sensitive information. When this regex matches a property key or value, the value is redacted from the various logs. || 1.6.0 | ### Session @@ -410,17 +428,18 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co |------------------------------------------------------|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| | kyuubi.session.check.interval | PT5M | The check interval for session timeout. | duration | 1.0.0 | | kyuubi.session.close.on.disconnect | true | Session will be closed when client disconnects from kyuubi gateway. Set this to false to have session outlive its parent connection. | boolean | 1.8.0 | -| kyuubi.session.conf.advisor | <undefined> | A config advisor plugin for Kyuubi Server. This plugin can provide some custom configs for different users or session configs and overwrite the session configs before opening a new session. This config value should be a subclass of `org.apache.kyuubi.plugin.SessionConfAdvisor` which has a zero-arg constructor. | string | 1.5.0 | +| kyuubi.session.conf.advisor | <undefined> | A config advisor plugin for Kyuubi Server. This plugin can provide a list of custom configs for different users or session configs and overwrite the session configs before opening a new session. This config value should be a subclass of `org.apache.kyuubi.plugin.SessionConfAdvisor` which has a zero-arg constructor. | seq | 1.5.0 | | kyuubi.session.conf.file.reload.interval | PT10M | When `FileSessionConfAdvisor` is used, this configuration defines the expired time of `$KYUUBI_CONF_DIR/kyuubi-session-.conf` in the cache. After exceeding this value, the file will be reloaded. | duration | 1.7.0 | | kyuubi.session.conf.ignore.list || A comma-separated list of ignored keys. If the client connection contains any of them, the key and the corresponding value will be removed silently during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | set | 1.2.0 | | kyuubi.session.conf.profile | <undefined> | Specify a profile to load session-level configurations from `$KYUUBI_CONF_DIR/kyuubi-session-.conf`. This configuration will be ignored if the file does not exist. This configuration only takes effect when `kyuubi.session.conf.advisor` is set as `org.apache.kyuubi.session.FileSessionConfAdvisor`. | string | 1.7.0 | | kyuubi.session.conf.restrict.list || A comma-separated list of restricted keys. If the client connection contains any of them, the connection will be rejected explicitly during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | set | 1.2.0 | -| kyuubi.session.engine.alive.max.failures | 3 | The maximum number of failures allowed for the engine. | int | 1.8.0 | +| kyuubi.session.engine.alive.max.failures | 3 | The maximum number of failures allowed for the engine. | int | 1.8.1 | | kyuubi.session.engine.alive.probe.enabled | false | Whether to enable the engine alive probe, it true, we will create a companion thrift client that keeps sending simple requests to check whether the engine is alive. | boolean | 1.6.0 | | kyuubi.session.engine.alive.probe.interval | PT10S | The interval for engine alive probe. | duration | 1.6.0 | | kyuubi.session.engine.alive.timeout | PT2M | The timeout for engine alive. If there is no alive probe success in the last timeout window, the engine will be marked as no-alive. | duration | 1.6.0 | | kyuubi.session.engine.check.interval | PT1M | The check interval for engine timeout | duration | 1.0.0 | | kyuubi.session.engine.flink.fetch.timeout | <undefined> | Result fetch timeout for Flink engine. If the timeout is reached, the result fetch would be stopped and the current fetched would be returned. If no data are fetched, a TimeoutException would be thrown. | duration | 1.8.0 | +| kyuubi.session.engine.flink.initialize.sql || The initialize sql for Flink session. It fallback to `kyuubi.engine.session.initialize.sql` | seq | 1.8.1 | | kyuubi.session.engine.flink.main.resource | <undefined> | The package used to create Flink SQL engine remote job. If it is undefined, Kyuubi will use the default | string | 1.4.0 | | kyuubi.session.engine.flink.max.rows | 1000000 | Max rows of Flink query results. For batch queries, rows exceeding the limit would be ignored. For streaming queries, the query would be canceled if the limit is reached. | int | 1.5.0 | | kyuubi.session.engine.hive.main.resource | <undefined> | The package used to create Hive engine remote job. If it is undefined, Kyuubi will use the default | string | 1.6.0 | @@ -430,8 +449,10 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.session.engine.log.timeout | PT24H | If we use Spark as the engine then the session submit log is the console output of spark-submit. We will retain the session submit log until over the config value. | duration | 1.1.0 | | kyuubi.session.engine.login.timeout | PT15S | The timeout of creating the connection to remote sql query engine | duration | 1.0.0 | | kyuubi.session.engine.open.max.attempts | 9 | The number of times an open engine will retry when encountering a special error. | int | 1.7.0 | +| kyuubi.session.engine.open.onFailure | RETRY | The behavior when opening engine failed:
    • RETRY: retry to open engine for kyuubi.session.engine.open.max.attempts times.
    • DEREGISTER_IMMEDIATELY: deregister the engine immediately.
    • DEREGISTER_AFTER_RETRY: deregister the engine after retry to open engine for kyuubi.session.engine.open.max.attempts times.
    | string | 1.8.1 | | kyuubi.session.engine.open.retry.wait | PT10S | How long to wait before retrying to open the engine after failure. | duration | 1.7.0 | | kyuubi.session.engine.share.level | USER | (deprecated) - Using kyuubi.engine.share.level instead | string | 1.0.0 | +| kyuubi.session.engine.spark.initialize.sql || The initialize sql for Spark session. It fallback to `kyuubi.engine.session.initialize.sql` | seq | 1.8.1 | | kyuubi.session.engine.spark.main.resource | <undefined> | The package used to create Spark SQL engine remote application. If it is undefined, Kyuubi will use the default | string | 1.0.0 | | kyuubi.session.engine.spark.max.initial.wait | PT1M | Max wait time for the initial connection to Spark engine. The engine will self-terminate no new incoming connection is established within this time. This setting only applies at the CONNECTION share level. 0 or negative means not to self-terminate. | duration | 1.8.0 | | kyuubi.session.engine.spark.max.lifetime | PT0S | Max lifetime for Spark engine, the engine will self-terminate when it reaches the end of life. 0 or negative means not to self-terminate. | duration | 1.6.0 | @@ -451,6 +472,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.session.idle.timeout | PT6H | session idle timeout, it will be closed when it's not accessed for this duration | duration | 1.2.0 | | kyuubi.session.local.dir.allow.list || The local dir list that are allowed to access by the kyuubi session application. End-users might set some parameters such as `spark.files` and it will upload some local files when launching the kyuubi engine, if the local dir allow list is defined, kyuubi will check whether the path to upload is in the allow list. Note that, if it is empty, there is no limitation for that. And please use absolute paths. | set | 1.6.0 | | kyuubi.session.name | <undefined> | A human readable name of the session and we use empty string by default. This name will be recorded in the event. Note that, we only apply this value from session conf. | string | 1.4.0 | +| kyuubi.session.proxy.user | <undefined> | An alternative to hive.server2.proxy.user. The current behavior is consistent with hive.server2.proxy.user and now only takes effect in RESTFul API. When both parameters are set, kyuubi.session.proxy.user takes precedence. | string | 1.9.0 | | kyuubi.session.timeout | PT6H | (deprecated)session timeout, it will be closed when it's not accessed for this duration | duration | 1.0.0 | | kyuubi.session.user.sign.enabled | false | Whether to verify the integrity of session user name on the engine side, e.g. Authz plugin in Spark. | boolean | 1.7.0 | @@ -470,19 +492,19 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Zookeeper -| Key | Default | Meaning | Type | Since | -|--------------------------------------------------|--------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|-------| -| kyuubi.zookeeper.embedded.client.port | 2181 | clientPort for the embedded ZooKeeper server to listen for client connections, a client here could be Kyuubi server, engine, and JDBC client | int | 1.2.0 | -| kyuubi.zookeeper.embedded.client.port.address | <undefined> | clientPortAddress for the embedded ZooKeeper server to | string | 1.2.0 | -| kyuubi.zookeeper.embedded.client.use.hostname | false | When true, embedded Zookeeper prefer to bind hostname, otherwise, ip address. | boolean | 1.7.2 | -| kyuubi.zookeeper.embedded.data.dir | embedded_zookeeper | dataDir for the embedded zookeeper server where stores the in-memory database snapshots and, unless specified otherwise, the transaction log of updates to the database. | string | 1.2.0 | -| kyuubi.zookeeper.embedded.data.log.dir | embedded_zookeeper | dataLogDir for the embedded ZooKeeper server where writes the transaction log . | string | 1.2.0 | -| kyuubi.zookeeper.embedded.directory | embedded_zookeeper | The temporary directory for the embedded ZooKeeper server | string | 1.0.0 | -| kyuubi.zookeeper.embedded.max.client.connections | 120 | maxClientCnxns for the embedded ZooKeeper server to limit the number of concurrent connections of a single client identified by IP address | int | 1.2.0 | -| kyuubi.zookeeper.embedded.max.session.timeout | 60000 | maxSessionTimeout in milliseconds for the embedded ZooKeeper server will allow the client to negotiate. Defaults to 20 times the tickTime | int | 1.2.0 | -| kyuubi.zookeeper.embedded.min.session.timeout | 6000 | minSessionTimeout in milliseconds for the embedded ZooKeeper server will allow the client to negotiate. Defaults to 2 times the tickTime | int | 1.2.0 | -| kyuubi.zookeeper.embedded.port | 2181 | The port of the embedded ZooKeeper server | int | 1.0.0 | -| kyuubi.zookeeper.embedded.tick.time | 3000 | tickTime in milliseconds for the embedded ZooKeeper server | int | 1.2.0 | +| Key | Default | Meaning | Type | Since | +|--------------------------------------------------|--------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|-------| +| kyuubi.zookeeper.embedded.client.port | 2181 | clientPort for the embedded ZooKeeper server to listen for client connections, a client here could be Kyuubi server, engine, and JDBC client | int | 1.2.0 | +| kyuubi.zookeeper.embedded.client.port.address | <undefined> | clientPortAddress for the embedded ZooKeeper server to | string | 1.2.0 | +| kyuubi.zookeeper.embedded.client.use.hostname | false | When true, embedded Zookeeper prefer to bind hostname, otherwise, ip address. | boolean | 1.7.2 | +| kyuubi.zookeeper.embedded.data.dir | embedded_zookeeper | dataDir for the embedded zookeeper server where stores the in-memory database snapshots and, unless specified otherwise, the transaction log of updates to the database. If it is a relative path, it is resolved relative to KYUUBI_HOME. | string | 1.2.0 | +| kyuubi.zookeeper.embedded.data.log.dir | embedded_zookeeper | dataLogDir for the embedded ZooKeeper server where writes the transaction log. If it is a relative path, it is resolved relative to KYUUBI_HOME. | string | 1.2.0 | +| kyuubi.zookeeper.embedded.directory | embedded_zookeeper | The temporary directory for the embedded ZooKeeper server. If it is a relative path, it is resolved relative to KYUUBI_HOME. | string | 1.0.0 | +| kyuubi.zookeeper.embedded.max.client.connections | 120 | maxClientCnxns for the embedded ZooKeeper server to limit the number of concurrent connections of a single client identified by IP address | int | 1.2.0 | +| kyuubi.zookeeper.embedded.max.session.timeout | 60000 | maxSessionTimeout in milliseconds for the embedded ZooKeeper server will allow the client to negotiate. Defaults to 20 times the tickTime | int | 1.2.0 | +| kyuubi.zookeeper.embedded.min.session.timeout | 6000 | minSessionTimeout in milliseconds for the embedded ZooKeeper server will allow the client to negotiate. Defaults to 2 times the tickTime | int | 1.2.0 | +| kyuubi.zookeeper.embedded.port | 2181 | The port of the embedded ZooKeeper server | int | 1.0.0 | +| kyuubi.zookeeper.embedded.tick.time | 3000 | tickTime in milliseconds for the embedded ZooKeeper server | int | 1.2.0 | ## Spark Configurations @@ -496,7 +518,11 @@ Setting them in `$KYUUBI_HOME/conf/kyuubi-defaults.conf` supplies with default v ### Via JDBC Connection URL -Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default;#spark.sql.shuffle.partitions=2;spark.executor.memory=5g``` +Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: + +``` +jdbc:hive2://localhost:10009/default;#spark.sql.shuffle.partitions=2;spark.executor.memory=5g +``` - **Runtime SQL Configuration** - For [Runtime SQL Configurations](https://spark.apache.org/docs/latest/configuration.html#runtime-sql-configuration), they will take affect every time @@ -528,7 +554,11 @@ The below options in `kyuubi-defaults.conf` will set `parallelism.default: 2` an ### Via JDBC Connection URL -Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default;#parallelism.default=2;taskmanager.memory.process.size=5g``` +Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: + +``` +jdbc:hive2://localhost:10009/default;#flink.parallelism.default=2;flink.taskmanager.memory.process.size=5g +``` ### Via SET Statements @@ -555,7 +585,11 @@ The below options in `kyuubi-defaults.conf` will set `query_max_stage_count: 500 ### Via JDBC Connection URL -Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default;#trino.query_max_stage_count=500;trino.parse_decimal_literals_as_double=true``` +Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: + +``` +jdbc:hive2://localhost:10009/default;#trino.query_max_stage_count=500;trino.parse_decimal_literals_as_double=true +``` ### Via SET Statements diff --git a/docs/connector/spark/delta_lake_with_azure_blob.rst b/docs/connector/spark/delta_lake_with_azure_blob.rst index 1d7cab048b6..bdca3f5dfee 100644 --- a/docs/connector/spark/delta_lake_with_azure_blob.rst +++ b/docs/connector/spark/delta_lake_with_azure_blob.rst @@ -171,6 +171,7 @@ Enter the ./kyuubi/conf directory Add the following content: .. code-block:: properties + spark.master spark://:7077 kyuubi.authentication NONE kyuubi.frontend.bind.host @@ -220,6 +221,7 @@ Create Table ************ .. code-block:: sql + -- Create or replace table with path CREATE OR REPLACE TABLE delta.`wasbs://1000@azure_account.blob.core.windows.net/alexDemo20211129` ( date DATE, @@ -276,6 +278,7 @@ Overwrite Mode Result: .. code-block:: text + +-------------+----------+------------+---------------+ | date | eventId | eventType | data | +-------------+----------+------------+---------------+ @@ -287,6 +290,7 @@ Delete Table Data ***************** .. code-block:: sql + DELETE FROM delta.`wasbs://1000@azure_account.blob.core.windows.net/alexDemo20211129` WHERE eventId = 002; diff --git a/docs/connector/spark/hudi.rst b/docs/connector/spark/hudi.rst index 045e75146f0..3ccd1f93b18 100644 --- a/docs/connector/spark/hudi.rst +++ b/docs/connector/spark/hudi.rst @@ -60,6 +60,7 @@ Configurations To activate functionality of Hudi, we can set the following configurations: .. code-block:: properties + # Spark 3.2 spark.serializer=org.apache.spark.serializer.KryoSerializer spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension diff --git a/docs/contributing/code/building.md b/docs/contributing/code/building.md index 8c5c5aeec60..bfa6a46caed 100644 --- a/docs/contributing/code/building.md +++ b/docs/contributing/code/building.md @@ -63,9 +63,24 @@ Since v1.1.0, Kyuubi support building with different Spark profiles, | Profile | Default | Since | |-------------|---------|-------| -| -Pspark-3.1 | No | 1.1.0 | -| -Pspark-3.2 | No | 1.4.0 | -| -Pspark-3.3 | Yes | 1.6.0 | +| -Pspark-3.1 | | 1.1.0 | +| -Pspark-3.2 | | 1.4.0 | +| -Pspark-3.3 | | 1.6.0 | +| -Pspark-3.4 | โœ“ | 1.8.0 | +| -Pspark-3.5 | | 1.8.0 | + +## Building Kyuubi Against Different Scala Versions + +Since v1.8.0, Kyuubi support building with different Scala profile. Currently, Kyuubi supports building with Scala 2.12 and 2.13, while Scala 2.12 by default. + +| Profile | Default | Since | +|--------------|---------|-------| +| (Scala 2.12) | โœ“ | - | +| -Pscala-2.13 | | 1.8.0 | + +Please activate `scala-2.13` profile when Scala 2.13 support is needed. The GA tests have covered integration test with the Kyuubi server, engines and related plugins, while the Flink engine and it's integration tests are not included for the reason that Flink does not support Scala 2.13 yet and will pull out client support for Scala. + +For the Scala version for Spark engines, the server will look up the `SPARK_SCALA_VERSION` system environment variable first, and then the Scala version of the server compiled with if the former one not set. For the Scala version for other engines, the server will use the Scala version of the server compiled with. ## Building With Apache dlcdn Site diff --git a/docs/contributing/code/style.rst b/docs/contributing/code/style.rst index d967e895971..fced388a3ed 100644 --- a/docs/contributing/code/style.rst +++ b/docs/contributing/code/style.rst @@ -35,5 +35,11 @@ Java Coding Style Guide Kyuubi adopts the `Google Java style`_ for java codes. +Documentation Style Guide +------------------------- + +Kyuubi adopts the `Documentation Style Guide`_ for documentation. + .. _Databricks Scala Coding Style Guide: https://github.com/databricks/scala-style-guide -.. _Google Java style: https://google.github.io/styleguide/javaguide.html \ No newline at end of file +.. _Google Java style: https://google.github.io/styleguide/javaguide.html +.. _Documentation Style Guide: ../doc/style.html \ No newline at end of file diff --git a/docs/deployment/migration-guide.md b/docs/deployment/migration-guide.md index 58df0fcc629..9a099d58508 100644 --- a/docs/deployment/migration-guide.md +++ b/docs/deployment/migration-guide.md @@ -17,13 +17,24 @@ # Kyuubi Migration Guide +## Upgrading from Kyuubi 1.8 to 1.9 + +* Since Kyuubi 1.9.0, `kyuubi.session.conf.advisor` can be set as a sequence, Kyuubi supported chaining SessionConfAdvisors. + +## Upgrading from Kyuubi 1.8.0 to 1.8.1 + +* Since Kyuubi 1.8.1, for `DELETE /batches/${batchId}`, `hive.server2.proxy.user` is not needed in the request parameters. +* Since Kyuubi 1.8.1, the default SQLite file `kyuubi_state_store.db` for Metadata store is located under `$KYUUBI_HOME` instead of `$PWD`. To restore previous behavior, set `kyuubi.metadata.store.jdbc.url` to `jdbc:sqlite:kyuubi_state_store.db`. + ## Upgrading from Kyuubi 1.7 to 1.8 * Since Kyuubi 1.8, SQLite is added and becomes the default database type of Kyuubi metastore, as Derby has been deprecated. Both Derby and SQLite are mainly for testing purposes, and they're not supposed to be used in production. To restore previous behavior, set `kyuubi.metadata.store.jdbc.database.type=DERBY` and `kyuubi.metadata.store.jdbc.url=jdbc:derby:memory:kyuubi_state_store_db;create=true`. - +* Since Kyuubi 1.8, if the directory of the embedded zookeeper configuration (`kyuubi.zookeeper.embedded.directory` + & `kyuubi.zookeeper.embedded.data.dir` & `kyuubi.zookeeper.embedded.data.log.dir`) is a relative path, it is resolved + relative to `$KYUUBI_HOME` instead of `$PWD`. * Since Kyuubi 1.8, PROMETHEUS is changed as the default metrics reporter. To restore previous behavior, set `kyuubi.metrics.reporters=JSON`. diff --git a/docs/deployment/spark/gluten.md b/docs/deployment/spark/gluten.md new file mode 100644 index 00000000000..8f6bcdef7af --- /dev/null +++ b/docs/deployment/spark/gluten.md @@ -0,0 +1,55 @@ + + + +# Gluten + +Gluten is a Spark plugin developed by Intel, designed to accelerate Apache Spark with native libraries. Currently, only CentOS 7/8 and Ubuntu 20.04/22.04, along with Spark 3.2/3.3/3.4, are supported. Users can employ the following methods to utilize the Gluten with Velox native libraries. + +## Building(with velox Backend) + +### Build gluten velox backend package + +Git clone gluten project, use gluten build script `buildbundle-veloxbe.sh`, and target package is in `/path/to/gluten/package/target/` + +```bash +git clone https://github.com/oap-project/gluten.git +cd /path/to/gluten + +## The script builds two jars for spark 3.2.x, 3.3.x, and 3.4.x. +./dev/buildbundle-veloxbe.sh +``` + +## Usage + +You can use Gluten to accelerate Spark by following steps. + +### Installing + +add gluten jar: `copy /path/to/gluten/package/target/gluten-velox-bundle-spark3.x_2.12-*.jar $SPARK_HOME/jars/` or specified to `spark.jars` configuration + +### Configure + +add config into `spark-defaults.conf`: + +```properties +spark.plugins=io.glutenproject.GlutenPlugin +spark.memory.offHeap.size=20g +spark.memory.offHeap.enabled=true +spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager +``` + diff --git a/docs/deployment/spark/index.rst b/docs/deployment/spark/index.rst index 0d75c506390..acaaa6ec511 100644 --- a/docs/deployment/spark/index.rst +++ b/docs/deployment/spark/index.rst @@ -30,3 +30,4 @@ Even if you don't use Kyuubi, as a simple Spark user, I'm sure you'll find the n dynamic_allocation aqe incremental_collection + gluten diff --git a/docs/extensions/engines/spark/rules.md b/docs/extensions/engines/spark/rules.md index 4614f52440a..55357e46d26 100644 --- a/docs/extensions/engines/spark/rules.md +++ b/docs/extensions/engines/spark/rules.md @@ -92,4 +92,5 @@ Kyuubi provides some configs to make these feature easy to use. | spark.sql.finalWriteStageExecutorMemory | fallback spark.executor.memory | Specify the executor on heap memory request for final write stage. It would be passed to the RDD resource profile. | 1.8.0 | | spark.sql.finalWriteStageExecutorMemoryOverhead | fallback spark.executor.memoryOverhead | Specify the executor memory overhead request for final write stage. It would be passed to the RDD resource profile. | 1.8.0 | | spark.sql.finalWriteStageExecutorOffHeapMemory | NONE | Specify the executor off heap memory request for final write stage. It would be passed to the RDD resource profile. | 1.8.0 | +| spark.sql.execution.scriptTransformation.enabled | true | When false, script transformation is not allowed. | 1.9.0 | diff --git a/docs/imgs/kyuubi_layers.drawio b/docs/imgs/kyuubi_layers.drawio index 2c3348ed4b8..95dc80ef9a2 100644 --- a/docs/imgs/kyuubi_layers.drawio +++ b/docs/imgs/kyuubi_layers.drawio @@ -1 +1,157 @@  \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/imgs/kyuubi_layers.drawio.png b/docs/imgs/kyuubi_layers.drawio.png index ebc6b74a323..f3a8c059989 100644 Binary files a/docs/imgs/kyuubi_layers.drawio.png and b/docs/imgs/kyuubi_layers.drawio.png differ diff --git a/docs/imgs/ui/engine_ui.png b/docs/imgs/ui/engine_ui.png new file mode 100644 index 00000000000..a0044b4dc1d Binary files /dev/null and b/docs/imgs/ui/engine_ui.png differ diff --git a/docs/index.rst b/docs/index.rst index e86041ffc0d..ab9cf271fc2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,7 +30,8 @@ Apache `Spark `_, `Flink `_, `Doris `_, `Hive `_, -and `Trino `_, etc, to query massive datasets distributed +`Trino `_, +and `StarRocks `_, etc., to query massive datasets distributed over fleets of machines from heterogeneous data sources. The Kyuubi Server lane of the below swimlane divides our prospective users into @@ -138,7 +139,7 @@ by professionals on the Kyuubi server side. It is suitable for the following sce in your data lake in cloud storage or an on-prem HDFS cluster. - Lakehouse formation and analytics - - Easily build an ACID table storage layer via Hudi, Iceberg, or/and Delta Lake. + - Easily build an ACID table storage layer via Hudi, Iceberg, Delta Lake or/and Paimon. - Logical data warehouse - Provide a relational abstraction on top of disparate data without ETL jobs, diff --git a/docs/quick_start/quick_start.rst b/docs/quick_start/quick_start.rst index 9d0a7d30ccf..85a215aad8e 100644 --- a/docs/quick_start/quick_start.rst +++ b/docs/quick_start/quick_start.rst @@ -36,23 +36,23 @@ For quick start deployment, we need to prepare the following stuffs: These essential components are JVM-based applications. So, the JRE needs to be pre-installed and the ``JAVA_HOME`` is correctly set to each component. - ================ ============ =============== =========================================== - Component Role Version Remarks - ================ ============ =============== =========================================== - **Java** JRE 8/11/17 Officially released against JDK8 - **Kyuubi** Gateway \ |release| \ - Kyuubi Server - Engine lib - Kyuubi Engine - Beeline - Kyuubi Hive Beeline - **Spark** Engine >=3.1 A Spark distribution - **Flink** Engine 1.16/1.17/1.18 A Flink distribution - **Trino** Engine >=363 A Trino cluster - **Doris** Engine N/A A Doris cluster - **Hive** Engine - 3.1.x - A Hive distribution - Metastore - N/A - An optional and external metadata store, - whose version is decided by engines + ================ ============ ==================== =========================================== + Component Role Version Remarks + ================ ============ ==================== =========================================== + **Java** JRE 8/11/17 Officially released against JDK8 + **Kyuubi** Gateway \ |release| \ - Kyuubi Server + Engine lib - Kyuubi Engine + Beeline - Kyuubi Hive Beeline + **Spark** Engine 3.1 to 3.5 A Spark distribution + **Flink** Engine 1.16/1.17/1.18 A Flink distribution + **Trino** Engine >=363 A Trino cluster + **Doris** Engine N/A A Doris cluster + **Hive** Engine - 2.1-cdh6/2.3/3.1 - A Hive distribution + Metastore - N/A - An optional and external metadata store, + whose version is decided by engines **Zookeeper** HA >=3.4.x - **Disk** Storage N/A N/A - ================ ============ =============== =========================================== + **Disk** Storage N/A N/A + ================ ============ ==================== =========================================== The other internal or external parts listed in the above sheet can be used individually or all together. For example, you can use Kyuubi, Spark and Flink to build a streaming diff --git a/docs/requirements.txt b/docs/requirements.txt index 8e1f5c47119..b2f9efc4a4e 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -17,12 +17,12 @@ # under the License. # -markdown==3.4.1 +markdown==3.5.1 recommonmark==0.7.1 -sphinx==4.5.0 -sphinx-book-theme==0.3.3 +sphinx==7.2.6 +sphinx-book-theme==1.1.0 sphinx-markdown-tables==0.0.17 -sphinx-notfound-page==0.8.3 +sphinx-notfound-page==1.0.0 sphinx-togglebutton===0.3.2 sphinxemoji===0.2.0 sphinx-copybutton===0.5.2 diff --git a/docs/tools/kyuubi-admin.rst b/docs/tools/kyuubi-admin.rst index bd37f7e684f..29149e92f5f 100644 --- a/docs/tools/kyuubi-admin.rst +++ b/docs/tools/kyuubi-admin.rst @@ -99,8 +99,6 @@ Usage: ``bin/kyuubi-admin list engine [options]`` - The subdomain for the share level of an engine. If not specified, it will read the configuration item kyuubi.engine.share.level.subdomain from kyuubi-defaults.conf. * - --hs2ProxyUser - The proxy user to impersonate. When specified, it will list engines for the hs2ProxyUser. - * - -a --all - - All the engine. .. _list_server: diff --git a/extensions/spark/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLCommonExtension.scala b/extensions/spark/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLCommonExtension.scala index 62aa88b9861..3dda669a8a3 100644 --- a/extensions/spark/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLCommonExtension.scala +++ b/extensions/spark/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLCommonExtension.scala @@ -44,7 +44,7 @@ object KyuubiSparkSQLCommonExtension { extensions.injectQueryStagePrepRule(_ => InsertShuffleNodeBeforeJoin) - extensions.injectPostHocResolutionRule(session => MarkNumOutputColumnsRule(session)) + extensions.injectPostHocResolutionRule(MarkNumOutputColumnsRule(_)) extensions.injectQueryStagePrepRule(FinalStageConfigIsolation(_)) } } diff --git a/extensions/spark/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala b/extensions/spark/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala index f952b56f387..f61eb731e58 100644 --- a/extensions/spark/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala +++ b/extensions/spark/kyuubi-extension-spark-3-1/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala @@ -20,7 +20,7 @@ package org.apache.kyuubi.sql import org.apache.spark.sql.SparkSessionExtensions import org.apache.kyuubi.sql.sqlclassification.KyuubiSqlClassification -import org.apache.kyuubi.sql.watchdog.{ForcedMaxOutputRowsRule, MaxScanStrategy} +import org.apache.kyuubi.sql.watchdog.{ForcedMaxOutputRowsRule, KyuubiUnsupportedOperationsCheck, MaxScanStrategy} // scalastyle:off line.size.limit /** @@ -39,6 +39,7 @@ class KyuubiSparkSQLExtension extends (SparkSessionExtensions => Unit) { extensions.injectPostHocResolutionRule(DropIgnoreNonexistent) // watchdog extension + extensions.injectCheckRule(_ => KyuubiUnsupportedOperationsCheck) extensions.injectOptimizerRule(ForcedMaxOutputRowsRule) extensions.injectPlannerStrategy(MaxScanStrategy) } diff --git a/extensions/spark/kyuubi-extension-spark-3-2/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala b/extensions/spark/kyuubi-extension-spark-3-2/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala index 97e77704293..9a0f5b1bb6b 100644 --- a/extensions/spark/kyuubi-extension-spark-3-2/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala +++ b/extensions/spark/kyuubi-extension-spark-3-2/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala @@ -19,7 +19,7 @@ package org.apache.kyuubi.sql import org.apache.spark.sql.SparkSessionExtensions -import org.apache.kyuubi.sql.watchdog.{ForcedMaxOutputRowsRule, MaxScanStrategy} +import org.apache.kyuubi.sql.watchdog.{ForcedMaxOutputRowsRule, KyuubiUnsupportedOperationsCheck, MaxScanStrategy} // scalastyle:off line.size.limit /** @@ -37,6 +37,7 @@ class KyuubiSparkSQLExtension extends (SparkSessionExtensions => Unit) { extensions.injectPostHocResolutionRule(DropIgnoreNonexistent) // watchdog extension + extensions.injectCheckRule(_ => KyuubiUnsupportedOperationsCheck) extensions.injectOptimizerRule(ForcedMaxOutputRowsRule) extensions.injectPlannerStrategy(MaxScanStrategy) } diff --git a/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLCommonExtension.scala b/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLCommonExtension.scala index 170b5a16509..c001ffc6c3b 100644 --- a/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLCommonExtension.scala +++ b/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLCommonExtension.scala @@ -44,7 +44,7 @@ object KyuubiSparkSQLCommonExtension { extensions.injectQueryStagePrepRule(_ => InsertShuffleNodeBeforeJoin) - extensions.injectPostHocResolutionRule(session => MarkNumOutputColumnsRule(session)) + extensions.injectPostHocResolutionRule(MarkNumOutputColumnsRule(_)) extensions.injectQueryStagePrepRule(FinalStageConfigIsolation(_)) } } diff --git a/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala b/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala index 792315d897a..fd11fb5f579 100644 --- a/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala +++ b/extensions/spark/kyuubi-extension-spark-3-3/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala @@ -19,7 +19,7 @@ package org.apache.kyuubi.sql import org.apache.spark.sql.{FinalStageResourceManager, InjectCustomResourceProfile, SparkSessionExtensions} -import org.apache.kyuubi.sql.watchdog.{ForcedMaxOutputRowsRule, MaxScanStrategy} +import org.apache.kyuubi.sql.watchdog.{ForcedMaxOutputRowsRule, KyuubiUnsupportedOperationsCheck, MaxScanStrategy} // scalastyle:off line.size.limit /** @@ -37,6 +37,7 @@ class KyuubiSparkSQLExtension extends (SparkSessionExtensions => Unit) { extensions.injectPostHocResolutionRule(DropIgnoreNonexistent) // watchdog extension + extensions.injectCheckRule(_ => KyuubiUnsupportedOperationsCheck) extensions.injectOptimizerRule(ForcedMaxOutputRowsRule) extensions.injectPlannerStrategy(MaxScanStrategy) diff --git a/extensions/spark/kyuubi-extension-spark-3-4/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala b/extensions/spark/kyuubi-extension-spark-3-4/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala index 6f45dae126e..4b16d3e1681 100644 --- a/extensions/spark/kyuubi-extension-spark-3-4/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala +++ b/extensions/spark/kyuubi-extension-spark-3-4/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala @@ -273,4 +273,11 @@ object KyuubiSQLConf { .version("1.8.0") .stringConf .createOptional + + val SCRIPT_TRANSFORMATION_ENABLED = + buildConf("spark.sql.execution.scriptTransformation.enabled") + .doc("When false, script transformation is not allowed.") + .version("1.9.0") + .booleanConf + .createWithDefault(true) } diff --git a/extensions/spark/kyuubi-extension-spark-3-4/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala b/extensions/spark/kyuubi-extension-spark-3-4/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala index 792315d897a..fd11fb5f579 100644 --- a/extensions/spark/kyuubi-extension-spark-3-4/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala +++ b/extensions/spark/kyuubi-extension-spark-3-4/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala @@ -19,7 +19,7 @@ package org.apache.kyuubi.sql import org.apache.spark.sql.{FinalStageResourceManager, InjectCustomResourceProfile, SparkSessionExtensions} -import org.apache.kyuubi.sql.watchdog.{ForcedMaxOutputRowsRule, MaxScanStrategy} +import org.apache.kyuubi.sql.watchdog.{ForcedMaxOutputRowsRule, KyuubiUnsupportedOperationsCheck, MaxScanStrategy} // scalastyle:off line.size.limit /** @@ -37,6 +37,7 @@ class KyuubiSparkSQLExtension extends (SparkSessionExtensions => Unit) { extensions.injectPostHocResolutionRule(DropIgnoreNonexistent) // watchdog extension + extensions.injectCheckRule(_ => KyuubiUnsupportedOperationsCheck) extensions.injectOptimizerRule(ForcedMaxOutputRowsRule) extensions.injectPlannerStrategy(MaxScanStrategy) diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisRowSetHelper.scala b/extensions/spark/kyuubi-extension-spark-3-4/src/main/scala/org/apache/kyuubi/sql/watchdog/KyuubiUnsupportedOperationsCheck.scala similarity index 54% rename from externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisRowSetHelper.scala rename to extensions/spark/kyuubi-extension-spark-3-4/src/main/scala/org/apache/kyuubi/sql/watchdog/KyuubiUnsupportedOperationsCheck.scala index a92942cecdf..2b4d3940ada 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisRowSetHelper.scala +++ b/extensions/spark/kyuubi-extension-spark-3-4/src/main/scala/org/apache/kyuubi/sql/watchdog/KyuubiUnsupportedOperationsCheck.scala @@ -14,23 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.kyuubi.engine.jdbc.doris -import org.apache.hive.service.rpc.thrift._ +package org.apache.kyuubi.sql.watchdog -import org.apache.kyuubi.engine.jdbc.schema.RowSetHelper +import org.apache.spark.sql.catalyst.SQLConfHelper +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ScriptTransformation} -class DorisRowSetHelper extends RowSetHelper { +import org.apache.kyuubi.sql.{KyuubiSQLConf, KyuubiSQLExtensionException} - override def toTinyIntTColumn(rows: Seq[Seq[Any]], ordinal: Int): TColumn = - toIntegerTColumn(rows, ordinal) - - override def toSmallIntTColumn(rows: Seq[Seq[Any]], ordinal: Int): TColumn = - toIntegerTColumn(rows, ordinal) - - override def toTinyIntTColumnValue(row: List[Any], ordinal: Int): TColumnValue = - toIntegerTColumnValue(row, ordinal) - - override def toSmallIntTColumnValue(row: List[Any], ordinal: Int): TColumnValue = - toIntegerTColumnValue(row, ordinal) +object KyuubiUnsupportedOperationsCheck extends (LogicalPlan => Unit) with SQLConfHelper { + override def apply(plan: LogicalPlan): Unit = + conf.getConf(KyuubiSQLConf.SCRIPT_TRANSFORMATION_ENABLED) match { + case false => plan foreach { + case _: ScriptTransformation => + throw new KyuubiSQLExtensionException("Script transformation is not allowed") + case _ => + } + case true => + } } diff --git a/extensions/spark/kyuubi-extension-spark-3-4/src/test/scala/org/apache/spark/sql/WatchDogSuiteBase.scala b/extensions/spark/kyuubi-extension-spark-3-4/src/test/scala/org/apache/spark/sql/WatchDogSuiteBase.scala index a202e813c5e..139efd9ca06 100644 --- a/extensions/spark/kyuubi-extension-spark-3-4/src/test/scala/org/apache/spark/sql/WatchDogSuiteBase.scala +++ b/extensions/spark/kyuubi-extension-spark-3-4/src/test/scala/org/apache/spark/sql/WatchDogSuiteBase.scala @@ -24,7 +24,7 @@ import scala.collection.JavaConverters._ import org.apache.commons.io.FileUtils import org.apache.spark.sql.catalyst.plans.logical.{GlobalLimit, LogicalPlan} -import org.apache.kyuubi.sql.KyuubiSQLConf +import org.apache.kyuubi.sql.{KyuubiSQLConf, KyuubiSQLExtensionException} import org.apache.kyuubi.sql.watchdog.{MaxFileSizeExceedException, MaxPartitionExceedException} trait WatchDogSuiteBase extends KyuubiSparkSQLExtensionTest { @@ -598,4 +598,13 @@ trait WatchDogSuiteBase extends KyuubiSparkSQLExtensionTest { } } } + + test("disable script transformation") { + withSQLConf(KyuubiSQLConf.SCRIPT_TRANSFORMATION_ENABLED.key -> "false") { + val e = intercept[KyuubiSQLExtensionException] { + sql("SELECT TRANSFORM('') USING 'ls /'") + } + assert(e.getMessage == "Script transformation is not allowed") + } + } } diff --git a/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/DynamicShufflePartitions.scala b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/DynamicShufflePartitions.scala new file mode 100644 index 00000000000..03d93d07680 --- /dev/null +++ b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/DynamicShufflePartitions.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.sql + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, RangePartitioning, RoundRobinPartitioning} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan} +import org.apache.spark.sql.execution.adaptive.ShuffleQueryStageExec +import org.apache.spark.sql.execution.exchange.{REPARTITION_BY_NUM, ShuffleExchangeExec, ValidateRequirements} +import org.apache.spark.sql.hive.HiveSparkPlanHelper.HiveTableScanExec +import org.apache.spark.sql.internal.SQLConf._ + +import org.apache.kyuubi.sql.KyuubiSQLConf.{DYNAMIC_SHUFFLE_PARTITIONS, DYNAMIC_SHUFFLE_PARTITIONS_MAX_NUM} + +/** + * Dynamically adjust the number of shuffle partitions according to the input data size + */ +case class DynamicShufflePartitions(spark: SparkSession) extends Rule[SparkPlan] { + + override def apply(plan: SparkPlan): SparkPlan = { + if (!conf.getConf(DYNAMIC_SHUFFLE_PARTITIONS) || !conf.getConf(ADAPTIVE_EXECUTION_ENABLED)) { + plan + } else { + val maxDynamicShufflePartitions = conf.getConf(DYNAMIC_SHUFFLE_PARTITIONS_MAX_NUM) + + def collectScanSizes(plan: SparkPlan): Seq[Long] = plan match { + case FileSourceScanExec(relation, _, _, _, _, _, _, _, _) => + Seq(relation.location.sizeInBytes) + case t: HiveTableScanExec => + t.relation.prunedPartitions match { + case Some(partitions) => Seq(partitions.flatMap(_.stats).map(_.sizeInBytes.toLong).sum) + case None => Seq(t.relation.computeStats().sizeInBytes.toLong) + .filter(_ != conf.defaultSizeInBytes) + } + case stage: ShuffleQueryStageExec if stage.isMaterialized && stage.mapStats.isDefined => + Seq(stage.mapStats.get.bytesByPartitionId.sum) + case p => + p.children.flatMap(collectScanSizes) + } + + val scanSizes = collectScanSizes(plan) + if (scanSizes.isEmpty) { + return plan + } + + val targetSize = conf.getConf(ADVISORY_PARTITION_SIZE_IN_BYTES) + val targetShufflePartitions = Math.min( + Math.max(scanSizes.sum / targetSize + 1, conf.numShufflePartitions).toInt, + maxDynamicShufflePartitions) + + val newPlan = plan transformUp { + case exchange @ ShuffleExchangeExec(outputPartitioning, _, shuffleOrigin, _) + if shuffleOrigin != REPARTITION_BY_NUM => + val newOutPartitioning = outputPartitioning match { + case RoundRobinPartitioning(numPartitions) + if targetShufflePartitions != numPartitions => + Some(RoundRobinPartitioning(targetShufflePartitions)) + case HashPartitioning(expressions, numPartitions) + if targetShufflePartitions != numPartitions => + Some(HashPartitioning(expressions, targetShufflePartitions)) + case RangePartitioning(ordering, numPartitions) + if targetShufflePartitions != numPartitions => + Some(RangePartitioning(ordering, targetShufflePartitions)) + case _ => None + } + if (newOutPartitioning.isDefined) { + exchange.copy(outputPartitioning = newOutPartitioning.get) + } else { + exchange + } + } + + if (ValidateRequirements.validate(newPlan)) { + newPlan + } else { + logInfo("DynamicShufflePartitions rule generated an invalid plan. " + + "Falling back to the original plan.") + plan + } + } + } + +} diff --git a/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/InferRebalanceAndSortOrders.scala b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/InferRebalanceAndSortOrders.scala index fcbf5c0a122..3b840f2a014 100644 --- a/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/InferRebalanceAndSortOrders.scala +++ b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/InferRebalanceAndSortOrders.scala @@ -22,7 +22,7 @@ import scala.annotation.tailrec import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeSet, Expression, NamedExpression, UnaryExpression} import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys import org.apache.spark.sql.catalyst.plans.{FullOuter, Inner, LeftAnti, LeftOuter, LeftSemi, RightOuter} -import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LogicalPlan, Project, Sort, SubqueryAlias, View} +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Generate, LogicalPlan, Project, Sort, SubqueryAlias, View, Window} /** * Infer the columns for Rebalance and Sort to improve the compression ratio. @@ -96,6 +96,12 @@ object InferRebalanceAndSortOrders { case f: Filter => candidateKeys(f.child, output) case s: SubqueryAlias => candidateKeys(s.child, output) case v: View => candidateKeys(v.child, output) + case g: Generate => candidateKeys(g.child, AttributeSet(g.requiredChildOutput)) + case w: Window => + val aliasMap = getAliasMap(w.windowExpressions) + Some(( + w.partitionSpec.map(p => aliasMap.getOrElse(p.canonicalized, p)), + w.orderSpec.map(_.child).map(o => aliasMap.getOrElse(o.canonicalized, o)))) case _ => None } diff --git a/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala index 6f45dae126e..7c4e8d631ef 100644 --- a/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala +++ b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala @@ -273,4 +273,27 @@ object KyuubiSQLConf { .version("1.8.0") .stringConf .createOptional + + val DYNAMIC_SHUFFLE_PARTITIONS = + buildConf("spark.sql.optimizer.dynamicShufflePartitions") + .doc("If true, adjust the number of shuffle partitions dynamically based on the job" + + " input size. The new number of partitions is the maximum input size" + + " divided by `spark.sql.adaptive.advisoryPartitionSizeInBytes`.") + .version("1.9.0") + .booleanConf + .createWithDefault(false) + + val DYNAMIC_SHUFFLE_PARTITIONS_MAX_NUM = + buildConf("spark.sql.optimizer.dynamicShufflePartitions.maxNum") + .doc("The maximum partition number of DynamicShufflePartitions.") + .version("1.9.0") + .intConf + .createWithDefault(2000) + + val SCRIPT_TRANSFORMATION_ENABLED = + buildConf("spark.sql.execution.scriptTransformation.enabled") + .doc("When false, script transformation is not allowed.") + .version("1.9.0") + .booleanConf + .createWithDefault(true) } diff --git a/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLCommonExtension.scala b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLCommonExtension.scala index f39ad3cc390..ad95ac4295e 100644 --- a/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLCommonExtension.scala +++ b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLCommonExtension.scala @@ -43,6 +43,7 @@ object KyuubiSparkSQLCommonExtension { extensions.injectPostHocResolutionRule(FinalStageConfigIsolationCleanRule) extensions.injectQueryStagePrepRule(_ => InsertShuffleNodeBeforeJoin) + extensions.injectQueryStagePrepRule(DynamicShufflePartitions) extensions.injectQueryStagePrepRule(FinalStageConfigIsolation(_)) } diff --git a/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala index 792315d897a..fd11fb5f579 100644 --- a/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala +++ b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/KyuubiSparkSQLExtension.scala @@ -19,7 +19,7 @@ package org.apache.kyuubi.sql import org.apache.spark.sql.{FinalStageResourceManager, InjectCustomResourceProfile, SparkSessionExtensions} -import org.apache.kyuubi.sql.watchdog.{ForcedMaxOutputRowsRule, MaxScanStrategy} +import org.apache.kyuubi.sql.watchdog.{ForcedMaxOutputRowsRule, KyuubiUnsupportedOperationsCheck, MaxScanStrategy} // scalastyle:off line.size.limit /** @@ -37,6 +37,7 @@ class KyuubiSparkSQLExtension extends (SparkSessionExtensions => Unit) { extensions.injectPostHocResolutionRule(DropIgnoreNonexistent) // watchdog extension + extensions.injectCheckRule(_ => KyuubiUnsupportedOperationsCheck) extensions.injectOptimizerRule(ForcedMaxOutputRowsRule) extensions.injectPlannerStrategy(MaxScanStrategy) diff --git a/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/watchdog/KyuubiUnsupportedOperationsCheck.scala b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/watchdog/KyuubiUnsupportedOperationsCheck.scala new file mode 100644 index 00000000000..2b4d3940ada --- /dev/null +++ b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/kyuubi/sql/watchdog/KyuubiUnsupportedOperationsCheck.scala @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.sql.watchdog + +import org.apache.spark.sql.catalyst.SQLConfHelper +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ScriptTransformation} + +import org.apache.kyuubi.sql.{KyuubiSQLConf, KyuubiSQLExtensionException} + +object KyuubiUnsupportedOperationsCheck extends (LogicalPlan => Unit) with SQLConfHelper { + override def apply(plan: LogicalPlan): Unit = + conf.getConf(KyuubiSQLConf.SCRIPT_TRANSFORMATION_ENABLED) match { + case false => plan foreach { + case _: ScriptTransformation => + throw new KyuubiSQLExtensionException("Script transformation is not allowed") + case _ => + } + case true => + } +} diff --git a/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/spark/sql/hive/HiveSparkPlanHelper.scala b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/spark/sql/hive/HiveSparkPlanHelper.scala new file mode 100644 index 00000000000..aa9a0459616 --- /dev/null +++ b/extensions/spark/kyuubi-extension-spark-3-5/src/main/scala/org/apache/spark/sql/hive/HiveSparkPlanHelper.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.hive + +object HiveSparkPlanHelper { + type HiveTableScanExec = org.apache.spark.sql.hive.execution.HiveTableScanExec +} diff --git a/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/DynamicShufflePartitionsSuite.scala b/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/DynamicShufflePartitionsSuite.scala new file mode 100644 index 00000000000..6668675a5f5 --- /dev/null +++ b/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/DynamicShufflePartitionsSuite.scala @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +import org.apache.spark.sql.execution.{CommandResultExec, SparkPlan} +import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, ShuffleQueryStageExec} +import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, ShuffleExchangeExec} +import org.apache.spark.sql.hive.HiveUtils.CONVERT_METASTORE_PARQUET +import org.apache.spark.sql.internal.SQLConf._ + +import org.apache.kyuubi.sql.KyuubiSQLConf.{DYNAMIC_SHUFFLE_PARTITIONS, DYNAMIC_SHUFFLE_PARTITIONS_MAX_NUM} + +class DynamicShufflePartitionsSuite extends KyuubiSparkSQLExtensionTest { + + override protected def beforeAll(): Unit = { + super.beforeAll() + setupData() + } + + test("test dynamic shuffle partitions") { + def collectExchanges(plan: SparkPlan): Seq[ShuffleExchangeExec] = { + plan match { + case p: CommandResultExec => collectExchanges(p.commandPhysicalPlan) + case p: AdaptiveSparkPlanExec => collectExchanges(p.finalPhysicalPlan) + case p: ShuffleQueryStageExec => collectExchanges(p.plan) + case p: ShuffleExchangeExec => p +: collectExchanges(p.child) + case p => p.children.flatMap(collectExchanges) + } + } + + // datasource scan + withTable("table1", "table2", "table3") { + sql("create table table1 stored as parquet as select c1, c2 from t1") + sql("create table table2 stored as parquet as select c1, c2 from t2") + sql("create table table3 (c1 int, c2 string) stored as parquet") + sql("ANALYZE TABLE table1 COMPUTE STATISTICS") + sql("ANALYZE TABLE table2 COMPUTE STATISTICS") + + val initialPartitionNum: Int = 2 + Seq(false, true).foreach { dynamicShufflePartitions => + val maxDynamicShufflePartitions = if (dynamicShufflePartitions) { + Seq(8, 2000) + } else { + Seq(2000) + } + maxDynamicShufflePartitions.foreach { maxDynamicShufflePartitionNum => + withSQLConf( + DYNAMIC_SHUFFLE_PARTITIONS.key -> dynamicShufflePartitions.toString, + DYNAMIC_SHUFFLE_PARTITIONS_MAX_NUM.key -> maxDynamicShufflePartitionNum.toString, + AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + COALESCE_PARTITIONS_INITIAL_PARTITION_NUM.key -> initialPartitionNum.toString, + ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "500") { + val df = sql("insert overwrite table3 " + + " select a.c1 as c1, b.c2 as c2 from table1 a join table2 b on a.c1 = b.c1") + + val exchanges = collectExchanges(df.queryExecution.executedPlan) + val (joinExchanges, rebalanceExchanges) = exchanges + .partition(_.shuffleOrigin == ENSURE_REQUIREMENTS) + // table scan size: 7369 3287 + assert(joinExchanges.size == 2) + if (dynamicShufflePartitions) { + joinExchanges.foreach(e => + assert(e.outputPartitioning.numPartitions + == Math.min(22, maxDynamicShufflePartitionNum))) + } else { + joinExchanges.foreach(e => + assert(e.outputPartitioning.numPartitions == initialPartitionNum)) + } + + assert(rebalanceExchanges.size == 1) + if (dynamicShufflePartitions) { + if (maxDynamicShufflePartitionNum == 8) { + // shuffle query size: 1424 451 + assert(rebalanceExchanges.head.outputPartitioning.numPartitions == + Math.min(4, maxDynamicShufflePartitionNum)) + } else { + // shuffle query size: 2057 664 + assert(rebalanceExchanges.head.outputPartitioning.numPartitions == + Math.min(6, maxDynamicShufflePartitionNum)) + } + } else { + assert( + rebalanceExchanges.head.outputPartitioning.numPartitions == initialPartitionNum) + } + } + + // hive table scan + withSQLConf( + DYNAMIC_SHUFFLE_PARTITIONS.key -> dynamicShufflePartitions.toString, + DYNAMIC_SHUFFLE_PARTITIONS_MAX_NUM.key -> maxDynamicShufflePartitionNum.toString, + AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1", + COALESCE_PARTITIONS_INITIAL_PARTITION_NUM.key -> initialPartitionNum.toString, + ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "500", + CONVERT_METASTORE_PARQUET.key -> "false") { + val df = sql("insert overwrite table3 " + + " select a.c1 as c1, b.c2 as c2 from table1 a join table2 b on a.c1 = b.c1") + + val exchanges = collectExchanges(df.queryExecution.executedPlan) + val (joinExchanges, rebalanceExchanges) = exchanges + .partition(_.shuffleOrigin == ENSURE_REQUIREMENTS) + // table scan size: 7369 3287 + assert(joinExchanges.size == 2) + if (dynamicShufflePartitions) { + joinExchanges.foreach(e => + assert(e.outputPartitioning.numPartitions == + Math.min(22, maxDynamicShufflePartitionNum))) + } else { + joinExchanges.foreach(e => + assert(e.outputPartitioning.numPartitions == initialPartitionNum)) + } + // shuffle query size: 5154 720 + assert(rebalanceExchanges.size == 1) + if (dynamicShufflePartitions) { + assert(rebalanceExchanges.head.outputPartitioning.numPartitions + == Math.min(12, maxDynamicShufflePartitionNum)) + } else { + assert(rebalanceExchanges.head.outputPartitioning.numPartitions == + initialPartitionNum) + } + } + } + } + } + } + +} diff --git a/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/RebalanceBeforeWritingSuite.scala b/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/RebalanceBeforeWritingSuite.scala index 1d9630f4937..64e44abc08c 100644 --- a/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/RebalanceBeforeWritingSuite.scala +++ b/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/RebalanceBeforeWritingSuite.scala @@ -199,9 +199,10 @@ class RebalanceBeforeWritingSuite extends KyuubiSparkSQLExtensionTest { } withView("v") { - withTable("t", "input1", "input2") { + withTable("t", "t2", "input1", "input2") { withSQLConf(KyuubiSQLConf.INFER_REBALANCE_AND_SORT_ORDERS.key -> "true") { sql(s"CREATE TABLE t (c1 int, c2 long) USING PARQUET PARTITIONED BY (p string)") + sql(s"CREATE TABLE t2 (c1 int, c2 long, c3 long) USING PARQUET PARTITIONED BY (p string)") sql(s"CREATE TABLE input1 USING PARQUET AS SELECT * FROM VALUES(1,2),(1,3)") sql(s"CREATE TABLE input2 USING PARQUET AS SELECT * FROM VALUES(1,3),(1,3)") sql(s"CREATE VIEW v as SELECT col1, count(*) as col2 FROM input1 GROUP BY col1") @@ -264,6 +265,30 @@ class RebalanceBeforeWritingSuite extends KyuubiSparkSQLExtensionTest { |SELECT * FROM v |""".stripMargin) checkShuffleAndSort(df5.queryExecution.analyzed, 1, 1) + + // generate + val df6 = sql( + s""" + |INSERT INTO TABLE t2 PARTITION(p='a') + |SELECT /*+ broadcast(input2) */ input1.col1, input2.col1, cast(cc.action1 as bigint) + |FROM input1 + |JOIN input2 + |ON input1.col1 = input2.col1 + | lateral view explode(ARRAY(input1.col1, input1.col2)) cc as action1 + |""".stripMargin) + checkShuffleAndSort(df6.queryExecution.analyzed, 1, 1) + + // window + val df7 = sql( + s""" + |INSERT INTO TABLE t2 PARTITION(p='a') + |SELECT /*+ broadcast(input2) */ input1.col1, input2.col2, + | RANK() OVER (PARTITION BY input2.col2 ORDER BY input1.col1) AS rank + |FROM input1 + |JOIN input2 + |ON input1.col1 = input2.col1 + |""".stripMargin) + checkShuffleAndSort(df7.queryExecution.analyzed, 1, 1) } } } diff --git a/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/WatchDogSuiteBase.scala b/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/WatchDogSuiteBase.scala index a202e813c5e..139efd9ca06 100644 --- a/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/WatchDogSuiteBase.scala +++ b/extensions/spark/kyuubi-extension-spark-3-5/src/test/scala/org/apache/spark/sql/WatchDogSuiteBase.scala @@ -24,7 +24,7 @@ import scala.collection.JavaConverters._ import org.apache.commons.io.FileUtils import org.apache.spark.sql.catalyst.plans.logical.{GlobalLimit, LogicalPlan} -import org.apache.kyuubi.sql.KyuubiSQLConf +import org.apache.kyuubi.sql.{KyuubiSQLConf, KyuubiSQLExtensionException} import org.apache.kyuubi.sql.watchdog.{MaxFileSizeExceedException, MaxPartitionExceedException} trait WatchDogSuiteBase extends KyuubiSparkSQLExtensionTest { @@ -598,4 +598,13 @@ trait WatchDogSuiteBase extends KyuubiSparkSQLExtensionTest { } } } + + test("disable script transformation") { + withSQLConf(KyuubiSQLConf.SCRIPT_TRANSFORMATION_ENABLED.key -> "false") { + val e = intercept[KyuubiSQLExtensionException] { + sql("SELECT TRANSFORM('') USING 'ls /'") + } + assert(e.getMessage == "Script transformation is not allowed") + } + } } diff --git a/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala b/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala index 6f45dae126e..4b16d3e1681 100644 --- a/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala +++ b/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/KyuubiSQLConf.scala @@ -273,4 +273,11 @@ object KyuubiSQLConf { .version("1.8.0") .stringConf .createOptional + + val SCRIPT_TRANSFORMATION_ENABLED = + buildConf("spark.sql.execution.scriptTransformation.enabled") + .doc("When false, script transformation is not allowed.") + .version("1.9.0") + .booleanConf + .createWithDefault(true) } diff --git a/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/watchdog/KyuubiUnsupportedOperationsCheck.scala b/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/watchdog/KyuubiUnsupportedOperationsCheck.scala new file mode 100644 index 00000000000..2b4d3940ada --- /dev/null +++ b/extensions/spark/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/watchdog/KyuubiUnsupportedOperationsCheck.scala @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.sql.watchdog + +import org.apache.spark.sql.catalyst.SQLConfHelper +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ScriptTransformation} + +import org.apache.kyuubi.sql.{KyuubiSQLConf, KyuubiSQLExtensionException} + +object KyuubiUnsupportedOperationsCheck extends (LogicalPlan => Unit) with SQLConfHelper { + override def apply(plan: LogicalPlan): Unit = + conf.getConf(KyuubiSQLConf.SCRIPT_TRANSFORMATION_ENABLED) match { + case false => plan foreach { + case _: ScriptTransformation => + throw new KyuubiSQLExtensionException("Script transformation is not allowed") + case _ => + } + case true => + } +} diff --git a/extensions/spark/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/WatchDogSuiteBase.scala b/extensions/spark/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/WatchDogSuiteBase.scala index a202e813c5e..139efd9ca06 100644 --- a/extensions/spark/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/WatchDogSuiteBase.scala +++ b/extensions/spark/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/WatchDogSuiteBase.scala @@ -24,7 +24,7 @@ import scala.collection.JavaConverters._ import org.apache.commons.io.FileUtils import org.apache.spark.sql.catalyst.plans.logical.{GlobalLimit, LogicalPlan} -import org.apache.kyuubi.sql.KyuubiSQLConf +import org.apache.kyuubi.sql.{KyuubiSQLConf, KyuubiSQLExtensionException} import org.apache.kyuubi.sql.watchdog.{MaxFileSizeExceedException, MaxPartitionExceedException} trait WatchDogSuiteBase extends KyuubiSparkSQLExtensionTest { @@ -598,4 +598,13 @@ trait WatchDogSuiteBase extends KyuubiSparkSQLExtensionTest { } } } + + test("disable script transformation") { + withSQLConf(KyuubiSQLConf.SCRIPT_TRANSFORMATION_ENABLED.key -> "false") { + val e = intercept[KyuubiSQLExtensionException] { + sql("SELECT TRANSFORM('') USING 'ls /'") + } + assert(e.getMessage == "Script transformation is not allowed") + } + } } diff --git a/extensions/spark/kyuubi-spark-authz-shaded/pom.xml b/extensions/spark/kyuubi-spark-authz-shaded/pom.xml index b135a1d7c1e..10edeb1fbab 100644 --- a/extensions/spark/kyuubi-spark-authz-shaded/pom.xml +++ b/extensions/spark/kyuubi-spark-authz-shaded/pom.xml @@ -93,14 +93,6 @@ javax.ws.rs jsr311-api - - org.codehaus.jackson - jackson-core-asl - - - org.codehaus.jackson - jackson-mapper-asl - com.kstruct gethostname4j @@ -246,6 +238,8 @@ org.apache.ranger:ranger-plugins-common org.apache.ranger:ranger-plugins-audit org.codehaus.jackson:jackson-jaxrs + org.codehaus.jackson:jackson-core-asl + org.codehaus.jackson:jackson-mapper-asl com.sun.jersey:jersey-client com.sun.jersey:jersey-core com.kstruct:gethostname4j @@ -259,6 +253,9 @@ **/*.proto META-INF/*.SF + META-INF/LGPL2.1 + META-INF/AL2.0 + META-INF/ASL2.0 META-INF/*.DSA META-INF/*.RSA META-INF/DEPENDENCIES @@ -274,8 +271,8 @@ - org.codehaus.jackson.jaxrs - ${kyuubi.shade.packageName}.org.codehaus.jackson.jaxrs + org.codehaus.jackson + ${kyuubi.shade.packageName}.org.codehaus.jackson com.sun.jersey @@ -314,4 +311,27 @@ + + + + + scala-2.13 + + + + org.apache.maven.plugins + maven-shade-plugin + + + + net.java.dev.jna:jna + net.java.dev.jna:jna-platform + + + + + + + + diff --git a/extensions/spark/kyuubi-spark-authz-shaded/src/main/resources/META-INF/LICENSE b/extensions/spark/kyuubi-spark-authz-shaded/src/main/resources/META-INF/LICENSE new file mode 100644 index 00000000000..1e6d25e885e --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz-shaded/src/main/resources/META-INF/LICENSE @@ -0,0 +1,225 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +------------------------------------------------------------------------------------ + +This project bundles some components that are licensed under the + +Apache License Version 2.0 +-------------------------- +org.apache.ranger:ranger-plugins-common +org.apache.ranger:ranger-plugins-audit +org.codehaus.jackson:jackson-jaxrs +org.codehaus.jackson:jackson-core-asl +org.codehaus.jackson:jackson-mapper-asl +net.java.dev.jna:jna +net.java.dev.jna:jna-platform + +Common Development and Distribution License (CDDL) 1.1 +------------------------------------------------------ +com.sun.jersey:jersey-client +com.sun.jersey:jersey-core + +MIT license +----------- +com.kstruct:gethostname4j diff --git a/extensions/spark/kyuubi-spark-authz-shaded/src/main/resources/META-INF/NOTICE b/extensions/spark/kyuubi-spark-authz-shaded/src/main/resources/META-INF/NOTICE new file mode 100644 index 00000000000..9afa0f86d1c --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz-shaded/src/main/resources/META-INF/NOTICE @@ -0,0 +1,12 @@ +Apache Kyuubi +Copyright 2021-2023 The Apache Software Foundation. + +This product includes software developed at +The Apache Software Foundation (https://www.apache.org/). + +-------------------------------------------------------------------------------- + +This binary artifact contains + +Apache Ranger +Copyright 2014-2022 The Apache Software Foundation diff --git a/extensions/spark/kyuubi-spark-authz/README.md b/extensions/spark/kyuubi-spark-authz/README.md index 9657b5b7a5c..43ee45b09a8 100644 --- a/extensions/spark/kyuubi-spark-authz/README.md +++ b/extensions/spark/kyuubi-spark-authz/README.md @@ -34,6 +34,7 @@ build/mvn clean package -DskipTests -pl :kyuubi-spark-authz_2.12 -am -Dspark.ver `-Dspark.version=` - [x] master +- [x] 3.5.x - [x] 3.4.x (default) - [x] 3.3.x - [x] 3.2.x diff --git a/extensions/spark/kyuubi-spark-authz/pom.xml b/extensions/spark/kyuubi-spark-authz/pom.xml index 1b865561275..c2d9f759556 100644 --- a/extensions/spark/kyuubi-spark-authz/pom.xml +++ b/extensions/spark/kyuubi-spark-authz/pom.xml @@ -329,6 +329,12 @@ paimon-spark-${paimon.spark.binary.version} test + + + io.delta + ${delta.artifact}_${scala.binary.version} + test + diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.ActionTypeExtractor b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.ActionTypeExtractor index 61fa81809b1..140d113ac0d 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.ActionTypeExtractor +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.ActionTypeExtractor @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.CatalogExtractor b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.CatalogExtractor index ae058a66f2e..7ae3aac52f3 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.CatalogExtractor +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.CatalogExtractor @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.ColumnExtractor b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.ColumnExtractor index ed76c15d107..497c7867c59 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.ColumnExtractor +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.ColumnExtractor @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.DatabaseExtractor b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.DatabaseExtractor index 2a269ee5067..c2b65812559 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.DatabaseExtractor +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.DatabaseExtractor @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.FunctionExtractor b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.FunctionExtractor index 2facb004a04..745fd1bfcd6 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.FunctionExtractor +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.FunctionExtractor @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.FunctionTypeExtractor b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.FunctionTypeExtractor index 3bb0ee6c23e..d054f346263 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.FunctionTypeExtractor +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.FunctionTypeExtractor @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.QueryExtractor b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.QueryExtractor index 2406a40e196..10222257a5d 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.QueryExtractor +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.QueryExtractor @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.TableExtractor b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.TableExtractor index dc35a8f5104..7010766f24b 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.TableExtractor +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.TableExtractor @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -19,15 +19,17 @@ org.apache.kyuubi.plugin.spark.authz.serde.CatalogTableOptionTableExtractor org.apache.kyuubi.plugin.spark.authz.serde.CatalogTableTableExtractor org.apache.kyuubi.plugin.spark.authz.serde.DataSourceV2RelationTableExtractor org.apache.kyuubi.plugin.spark.authz.serde.ExpressionSeqTableExtractor -org.apache.kyuubi.plugin.spark.authz.serde.HudiDataSourceV2RelationTableExtractor -org.apache.kyuubi.plugin.spark.authz.serde.HudiMergeIntoTargetTableExtractor org.apache.kyuubi.plugin.spark.authz.serde.HudiCallProcedureInputTableExtractor org.apache.kyuubi.plugin.spark.authz.serde.HudiCallProcedureOutputTableExtractor +org.apache.kyuubi.plugin.spark.authz.serde.HudiDataSourceV2RelationTableExtractor +org.apache.kyuubi.plugin.spark.authz.serde.HudiMergeIntoTargetTableExtractor org.apache.kyuubi.plugin.spark.authz.serde.IdentifierTableExtractor org.apache.kyuubi.plugin.spark.authz.serde.LogicalRelationTableExtractor org.apache.kyuubi.plugin.spark.authz.serde.ResolvedDbObjectNameTableExtractor org.apache.kyuubi.plugin.spark.authz.serde.ResolvedIdentifierTableExtractor org.apache.kyuubi.plugin.spark.authz.serde.ResolvedTableTableExtractor org.apache.kyuubi.plugin.spark.authz.serde.StringTableExtractor +org.apache.kyuubi.plugin.spark.authz.serde.SubqueryAliasTableExtractor +org.apache.kyuubi.plugin.spark.authz.serde.TableIdentifierOptionTableExtractor org.apache.kyuubi.plugin.spark.authz.serde.TableIdentifierTableExtractor org.apache.kyuubi.plugin.spark.authz.serde.TableTableExtractor diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.TableTypeExtractor b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.TableTypeExtractor index 251a317581f..caeeefa4196 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.TableTypeExtractor +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.TableTypeExtractor @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.URIExtractor b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.URIExtractor new file mode 100644 index 00000000000..460dfeb01ae --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/META-INF/services/org.apache.kyuubi.plugin.spark.authz.serde.URIExtractor @@ -0,0 +1,32 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +org.apache.kyuubi.plugin.spark.authz.serde.BaseRelationFileIndexURIExtractor +org.apache.kyuubi.plugin.spark.authz.serde.CatalogStorageFormatURIExtractor +org.apache.kyuubi.plugin.spark.authz.serde.CatalogTableURIExtractor +org.apache.kyuubi.plugin.spark.authz.serde.DataSourceV2RelationURIExtractor +org.apache.kyuubi.plugin.spark.authz.serde.IdentifierURIExtractor +org.apache.kyuubi.plugin.spark.authz.serde.PartitionLocsSeqURIExtractor +org.apache.kyuubi.plugin.spark.authz.serde.PropertiesLocationUriExtractor +org.apache.kyuubi.plugin.spark.authz.serde.PropertiesPathUriExtractor +org.apache.kyuubi.plugin.spark.authz.serde.ResolvedTableURIExtractor +org.apache.kyuubi.plugin.spark.authz.serde.StringSeqURIExtractor +org.apache.kyuubi.plugin.spark.authz.serde.StringURIExtractor +org.apache.kyuubi.plugin.spark.authz.serde.SubqueryAliasURIExtractor +org.apache.kyuubi.plugin.spark.authz.serde.TableIdentifierOptionURIExtractor +org.apache.kyuubi.plugin.spark.authz.serde.TableIdentifierURIExtractor +org.apache.kyuubi.plugin.spark.authz.serde.TableSpecURIExtractor diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/database_command_spec.json b/extensions/spark/kyuubi-spark-authz/src/main/resources/database_command_spec.json index c640ed89bce..5891fb1e548 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/database_command_spec.json +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/database_command_spec.json @@ -4,159 +4,215 @@ "fieldName" : "child", "fieldExtractor" : "ResolvedNamespaceDatabaseExtractor", "catalogDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" } ], - "opType" : "ALTERDATABASE" + "opType" : "ALTERDATABASE", + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.CreateNamespace", "databaseDescs" : [ { "fieldName" : "name", "fieldExtractor" : "ResolvedDBObjectNameDatabaseExtractor", "catalogDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" }, { "fieldName" : "namespace", "fieldExtractor" : "StringSeqDatabaseExtractor", "catalogDesc" : { "fieldName" : "catalog", - "fieldExtractor" : "CatalogPluginCatalogExtractor" + "fieldExtractor" : "CatalogPluginCatalogExtractor", + "comment" : "" }, - "isInput" : false + "isInput" : false, + "comment" : "" }, { "fieldName" : "name", "fieldExtractor" : "ResolvedNamespaceDatabaseExtractor", "catalogDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" } ], - "opType" : "CREATEDATABASE" + "opType" : "CREATEDATABASE", + "uriDescs" : [ { + "fieldName" : "properties", + "fieldExtractor" : "PropertiesLocationUriExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.DescribeNamespace", "databaseDescs" : [ { "fieldName" : "namespace", "fieldExtractor" : "ResolvedNamespaceDatabaseExtractor", "catalogDesc" : null, - "isInput" : true + "isInput" : true, + "comment" : "" } ], - "opType" : "DESCDATABASE" + "opType" : "DESCDATABASE", + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.DropNamespace", "databaseDescs" : [ { "fieldName" : "namespace", "fieldExtractor" : "ResolvedNamespaceDatabaseExtractor", "catalogDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" } ], - "opType" : "DROPDATABASE" + "opType" : "DROPDATABASE", + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.SetCatalogAndNamespace", "databaseDescs" : [ { "fieldName" : "child", "fieldExtractor" : "ResolvedNamespaceDatabaseExtractor", "catalogDesc" : null, - "isInput" : true + "isInput" : true, + "comment" : "" }, { "fieldName" : "child", "fieldExtractor" : "ResolvedDBObjectNameDatabaseExtractor", "catalogDesc" : null, - "isInput" : true + "isInput" : true, + "comment" : "" }, { "fieldName" : "namespace", "fieldExtractor" : "StringSeqOptionDatabaseExtractor", "catalogDesc" : { "fieldName" : "catalogName", - "fieldExtractor" : "StringOptionCatalogExtractor" + "fieldExtractor" : "StringOptionCatalogExtractor", + "comment" : "" }, - "isInput" : true + "isInput" : true, + "comment" : "" } ], - "opType" : "SWITCHDATABASE" + "opType" : "SWITCHDATABASE", + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.SetNamespaceLocation", "databaseDescs" : [ { "fieldName" : "namespace", "fieldExtractor" : "ResolvedNamespaceDatabaseExtractor", "catalogDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" } ], - "opType" : "ALTERDATABASE_LOCATION" + "opType" : "ALTERDATABASE_LOCATION", + "uriDescs" : [ { + "fieldName" : "location", + "fieldExtractor" : "StringURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.SetNamespaceProperties", "databaseDescs" : [ { "fieldName" : "namespace", "fieldExtractor" : "ResolvedNamespaceDatabaseExtractor", "catalogDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" } ], - "opType" : "ALTERDATABASE" + "opType" : "ALTERDATABASE", + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterDatabasePropertiesCommand", "databaseDescs" : [ { "fieldName" : "databaseName", "fieldExtractor" : "StringDatabaseExtractor", "catalogDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" } ], - "opType" : "ALTERDATABASE" + "opType" : "ALTERDATABASE", + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterDatabaseSetLocationCommand", "databaseDescs" : [ { "fieldName" : "databaseName", "fieldExtractor" : "StringDatabaseExtractor", "catalogDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" } ], - "opType" : "ALTERDATABASE_LOCATION" + "opType" : "ALTERDATABASE_LOCATION", + "uriDescs" : [ { + "fieldName" : "location", + "fieldExtractor" : "StringURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.execution.command.AnalyzeTablesCommand", "databaseDescs" : [ { "fieldName" : "databaseName", "fieldExtractor" : "StringOptionDatabaseExtractor", "catalogDesc" : null, - "isInput" : true + "isInput" : true, + "comment" : "" } ], - "opType" : "ANALYZE_TABLE" + "opType" : "ANALYZE_TABLE", + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.CreateDatabaseCommand", "databaseDescs" : [ { "fieldName" : "databaseName", "fieldExtractor" : "StringDatabaseExtractor", "catalogDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" } ], - "opType" : "CREATEDATABASE" + "opType" : "CREATEDATABASE", + "uriDescs" : [ { + "fieldName" : "path", + "fieldExtractor" : "StringURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.execution.command.DescribeDatabaseCommand", "databaseDescs" : [ { "fieldName" : "databaseName", "fieldExtractor" : "StringDatabaseExtractor", "catalogDesc" : null, - "isInput" : true + "isInput" : true, + "comment" : "" } ], - "opType" : "DESCDATABASE" + "opType" : "DESCDATABASE", + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.DropDatabaseCommand", "databaseDescs" : [ { "fieldName" : "databaseName", "fieldExtractor" : "StringDatabaseExtractor", "catalogDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" } ], - "opType" : "DROPDATABASE" + "opType" : "DROPDATABASE", + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.SetDatabaseCommand", "databaseDescs" : [ { "fieldName" : "databaseName", "fieldExtractor" : "StringDatabaseExtractor", "catalogDesc" : null, - "isInput" : true + "isInput" : true, + "comment" : "" } ], - "opType" : "SWITCHDATABASE" + "opType" : "SWITCHDATABASE", + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.SetNamespaceCommand", "databaseDescs" : [ { "fieldName" : "namespace", "fieldExtractor" : "StringSeqDatabaseExtractor", "catalogDesc" : null, - "isInput" : true + "isInput" : true, + "comment" : "" } ], - "opType" : "SWITCHDATABASE" + "opType" : "SWITCHDATABASE", + "uriDescs" : [ ] } ] \ No newline at end of file diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/function_command_spec.json b/extensions/spark/kyuubi-spark-authz/src/main/resources/function_command_spec.json index 0b71245d218..14dad8e2a3f 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/function_command_spec.json +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/function_command_spec.json @@ -7,9 +7,11 @@ "functionTypeDesc" : { "fieldName" : "isTemp", "fieldExtractor" : "TempMarkerFunctionTypeExtractor", - "skipTypes" : [ "TEMP" ] + "skipTypes" : [ "TEMP" ], + "comment" : "" }, - "isInput" : false + "isInput" : false, + "comment" : "" }, { "fieldName" : "functionName", "fieldExtractor" : "StringFunctionExtractor", @@ -17,14 +19,17 @@ "fieldName" : "databaseName", "fieldExtractor" : "StringOptionDatabaseExtractor", "catalogDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" }, "functionTypeDesc" : { "fieldName" : "isTemp", "fieldExtractor" : "TempMarkerFunctionTypeExtractor", - "skipTypes" : [ "TEMP" ] + "skipTypes" : [ "TEMP" ], + "comment" : "" }, - "isInput" : false + "isInput" : false, + "comment" : "" } ], "opType" : "CREATEFUNCTION" }, { @@ -36,9 +41,11 @@ "functionTypeDesc" : { "fieldName" : "info", "fieldExtractor" : "ExpressionInfoFunctionTypeExtractor", - "skipTypes" : [ "TEMP", "SYSTEM" ] + "skipTypes" : [ "TEMP", "SYSTEM" ], + "comment" : "" }, - "isInput" : true + "isInput" : true, + "comment" : "" }, { "fieldName" : "functionName", "fieldExtractor" : "FunctionIdentifierFunctionExtractor", @@ -46,9 +53,11 @@ "functionTypeDesc" : { "fieldName" : "functionName", "fieldExtractor" : "FunctionIdentifierFunctionTypeExtractor", - "skipTypes" : [ "TEMP", "SYSTEM" ] + "skipTypes" : [ "TEMP", "SYSTEM" ], + "comment" : "" }, - "isInput" : true + "isInput" : true, + "comment" : "" } ], "opType" : "DESCFUNCTION" }, { @@ -60,9 +69,11 @@ "functionTypeDesc" : { "fieldName" : "isTemp", "fieldExtractor" : "TempMarkerFunctionTypeExtractor", - "skipTypes" : [ "TEMP" ] + "skipTypes" : [ "TEMP" ], + "comment" : "" }, - "isInput" : false + "isInput" : false, + "comment" : "" }, { "fieldName" : "functionName", "fieldExtractor" : "StringFunctionExtractor", @@ -70,14 +81,17 @@ "fieldName" : "databaseName", "fieldExtractor" : "StringOptionDatabaseExtractor", "catalogDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" }, "functionTypeDesc" : { "fieldName" : "isTemp", "fieldExtractor" : "TempMarkerFunctionTypeExtractor", - "skipTypes" : [ "TEMP" ] + "skipTypes" : [ "TEMP" ], + "comment" : "" }, - "isInput" : false + "isInput" : false, + "comment" : "" } ], "opType" : "DROPFUNCTION" }, { @@ -89,10 +103,12 @@ "fieldName" : "databaseName", "fieldExtractor" : "StringOptionDatabaseExtractor", "catalogDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" }, "functionTypeDesc" : null, - "isInput" : false + "isInput" : false, + "comment" : "" } ], "opType" : "RELOADFUNCTION" } ] \ No newline at end of file diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/scan_command_spec.json b/extensions/spark/kyuubi-spark-authz/src/main/resources/scan_command_spec.json index 3273ccbeaf0..1145adbe07a 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/scan_command_spec.json +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/scan_command_spec.json @@ -1,35 +1,48 @@ [ { - "classname" : "org.apache.kyuubi.plugin.spark.authz.util.PermanentViewMarker", + "classname" : "org.apache.kyuubi.plugin.spark.authz.rule.permanentview.PermanentViewMarker", "scanDescs" : [ { "fieldName" : "catalogTable", "fieldExtractor" : "CatalogTableTableExtractor", - "catalogDesc" : null + "catalogDesc" : null, + "comment" : "" } ], - "functionDescs" : [ ] + "functionDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.catalog.HiveTableRelation", "scanDescs" : [ { "fieldName" : "tableMeta", "fieldExtractor" : "CatalogTableTableExtractor", - "catalogDesc" : null + "catalogDesc" : null, + "comment" : "" } ], - "functionDescs" : [ ] + "functionDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.datasources.LogicalRelation", "scanDescs" : [ { "fieldName" : "catalogTable", "fieldExtractor" : "CatalogTableOptionTableExtractor", - "catalogDesc" : null + "catalogDesc" : null, + "comment" : "" } ], - "functionDescs" : [ ] + "functionDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "relation", + "fieldExtractor" : "BaseRelationFileIndexURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation", "scanDescs" : [ { "fieldName" : null, "fieldExtractor" : "DataSourceV2RelationTableExtractor", - "catalogDesc" : null + "catalogDesc" : null, + "comment" : "" } ], - "functionDescs" : [ ] + "functionDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hive.HiveGenericUDF", "scanDescs" : [ ], @@ -40,10 +53,13 @@ "functionTypeDesc" : { "fieldName" : "name", "fieldExtractor" : "FunctionNameFunctionTypeExtractor", - "skipTypes" : [ "TEMP", "SYSTEM" ] + "skipTypes" : [ "TEMP", "SYSTEM" ], + "comment" : "" }, - "isInput" : true - } ] + "isInput" : true, + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hive.HiveGenericUDTF", "scanDescs" : [ ], @@ -54,10 +70,13 @@ "functionTypeDesc" : { "fieldName" : "name", "fieldExtractor" : "FunctionNameFunctionTypeExtractor", - "skipTypes" : [ "TEMP", "SYSTEM" ] + "skipTypes" : [ "TEMP", "SYSTEM" ], + "comment" : "" }, - "isInput" : true - } ] + "isInput" : true, + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hive.HiveSimpleUDF", "scanDescs" : [ ], @@ -68,10 +87,13 @@ "functionTypeDesc" : { "fieldName" : "name", "fieldExtractor" : "FunctionNameFunctionTypeExtractor", - "skipTypes" : [ "TEMP", "SYSTEM" ] + "skipTypes" : [ "TEMP", "SYSTEM" ], + "comment" : "" }, - "isInput" : true - } ] + "isInput" : true, + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hive.HiveUDAFFunction", "scanDescs" : [ ], @@ -82,8 +104,11 @@ "functionTypeDesc" : { "fieldName" : "name", "fieldExtractor" : "FunctionNameFunctionTypeExtractor", - "skipTypes" : [ "TEMP", "SYSTEM" ] + "skipTypes" : [ "TEMP", "SYSTEM" ], + "comment" : "" }, - "isInput" : true - } ] + "isInput" : true, + "comment" : "" + } ], + "uriDescs" : [ ] } ] \ No newline at end of file diff --git a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json index ea6e2757621..b555bbcf8be 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json +++ b/extensions/spark/kyuubi-spark-authz/src/main/resources/table_command_spec.json @@ -8,10 +8,17 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_ADDCOLS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "child", + "fieldExtractor" : "ResolvedTableURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.AddPartitions", "tableDescs" : [ { @@ -22,10 +29,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_ADDPARTS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.AlterColumn", "tableDescs" : [ { @@ -36,10 +45,17 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_ADDCOLS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "child", + "fieldExtractor" : "ResolvedTableURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.AlterTable", "tableDescs" : [ { @@ -50,10 +66,17 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_PROPERTIES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "ident", + "fieldExtractor" : "IdentifierURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.AppendData", "tableDescs" : [ { @@ -63,17 +86,26 @@ "actionTypeDesc" : { "fieldName" : null, "fieldExtractor" : null, - "actionType" : "INSERT" + "actionType" : "INSERT", + "comment" : "" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "QUERY", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "DataSourceV2RelationURIExtractor", + "isInput" : false, + "comment" : "" } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.CacheTable", @@ -81,30 +113,20 @@ "opType" : "CREATEVIEW", "queryDescs" : [ { "fieldName" : "table", - "fieldExtractor" : "LogicalPlanQueryExtractor" - } ] + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.CacheTableAsSelect", "tableDescs" : [ ], "opType" : "CREATEVIEW", "queryDescs" : [ { "fieldName" : "plan", - "fieldExtractor" : "LogicalPlanQueryExtractor" - } ] -}, { - "classname" : "org.apache.spark.sql.catalyst.plans.logical.Call", - "tableDescs" : [ { - "fieldName" : "args", - "fieldExtractor" : "ExpressionSeqTableExtractor", - "columnDesc" : null, - "actionTypeDesc" : null, - "tableTypeDesc" : null, - "catalogDesc" : null, - "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" } ], - "opType" : "ALTERTABLE_PROPERTIES", - "queryDescs" : [ ] + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.CommentOnTable", "tableDescs" : [ { @@ -115,10 +137,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_PROPERTIES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.CreateTable", "tableDescs" : [ { @@ -129,7 +153,8 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" }, { "fieldName" : "tableName", "fieldExtractor" : "IdentifierTableExtractor", @@ -138,10 +163,12 @@ "tableTypeDesc" : null, "catalogDesc" : { "fieldName" : "catalog", - "fieldExtractor" : "CatalogPluginCatalogExtractor" + "fieldExtractor" : "CatalogPluginCatalogExtractor", + "comment" : "" }, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" }, { "fieldName" : "child", "fieldExtractor" : "ResolvedDbObjectNameTableExtractor", @@ -150,10 +177,27 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "CREATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "tableSpec", + "fieldExtractor" : "TableSpecURIExtractor", + "isInput" : false, + "comment" : "" + }, { + "fieldName" : "properties", + "fieldExtractor" : "PropertiesLocationUriExtractor", + "isInput" : false, + "comment" : "" + }, { + "fieldName" : "tableName", + "fieldExtractor" : "IdentifierURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.CreateTableAsSelect", "tableDescs" : [ { @@ -164,7 +208,8 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" }, { "fieldName" : "tableName", "fieldExtractor" : "IdentifierTableExtractor", @@ -173,10 +218,12 @@ "tableTypeDesc" : null, "catalogDesc" : { "fieldName" : "catalog", - "fieldExtractor" : "CatalogPluginCatalogExtractor" + "fieldExtractor" : "CatalogPluginCatalogExtractor", + "comment" : "" }, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" }, { "fieldName" : "name", "fieldExtractor" : "ResolvedDbObjectNameTableExtractor", @@ -185,12 +232,25 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "CREATETABLE_AS_SELECT", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ { + "fieldName" : "tableSpec", + "fieldExtractor" : "TableSpecURIExtractor", + "isInput" : false, + "comment" : "" + }, { + "fieldName" : "properties", + "fieldExtractor" : "PropertiesLocationUriExtractor", + "isInput" : false, + "comment" : "" } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.CreateV2Table", @@ -202,31 +262,26 @@ "tableTypeDesc" : null, "catalogDesc" : { "fieldName" : "catalog", - "fieldExtractor" : "CatalogPluginCatalogExtractor" + "fieldExtractor" : "CatalogPluginCatalogExtractor", + "comment" : "" }, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "CREATETABLE", - "queryDescs" : [ ] -}, { - "classname" : "org.apache.spark.sql.catalyst.plans.logical.DeleteFromIcebergTable", - "tableDescs" : [ { - "fieldName" : "table", - "fieldExtractor" : "DataSourceV2RelationTableExtractor", - "columnDesc" : null, - "actionTypeDesc" : { - "fieldName" : null, - "fieldExtractor" : null, - "actionType" : "UPDATE" - }, - "tableTypeDesc" : null, - "catalogDesc" : null, + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "properties", + "fieldExtractor" : "PropertiesLocationUriExtractor", "isInput" : false, - "setCurrentDatabaseIfMissing" : false - } ], - "opType" : "QUERY", - "queryDescs" : [ ] + "comment" : "" + }, { + "fieldName" : "tableName", + "fieldExtractor" : "IdentifierURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.DeleteFromTable", "tableDescs" : [ { @@ -236,15 +291,18 @@ "actionTypeDesc" : { "fieldName" : null, "fieldExtractor" : null, - "actionType" : "UPDATE" + "actionType" : "UPDATE", + "comment" : "" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "QUERY", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.DescribeRelation", "tableDescs" : [ { @@ -255,10 +313,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : true + "setCurrentDatabaseIfMissing" : true, + "comment" : "" } ], "opType" : "DESCTABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.DropColumns", "tableDescs" : [ { @@ -269,10 +329,17 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_ADDCOLS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "child", + "fieldExtractor" : "ResolvedTableURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.DropPartitions", "tableDescs" : [ { @@ -283,10 +350,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_DROPPARTS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.DropTable", "tableDescs" : [ { @@ -297,7 +366,8 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" }, { "fieldName" : "child", "fieldExtractor" : "ResolvedTableTableExtractor", @@ -306,31 +376,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "DROPTABLE", - "queryDescs" : [ ] -}, { - "classname" : "org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable", - "tableDescs" : [ { - "fieldName" : "targetTable", - "fieldExtractor" : "DataSourceV2RelationTableExtractor", - "columnDesc" : null, - "actionTypeDesc" : { - "fieldName" : null, - "fieldExtractor" : null, - "actionType" : "UPDATE" - }, - "tableTypeDesc" : null, - "catalogDesc" : null, - "isInput" : false, - "setCurrentDatabaseIfMissing" : false - } ], - "opType" : "QUERY", - "queryDescs" : [ { - "fieldName" : "sourceTable", - "fieldExtractor" : "LogicalPlanQueryExtractor" - } ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.MergeIntoTable", "tableDescs" : [ { @@ -340,18 +391,22 @@ "actionTypeDesc" : { "fieldName" : null, "fieldExtractor" : null, - "actionType" : "UPDATE" + "actionType" : "UPDATE", + "comment" : "" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "QUERY", "queryDescs" : [ { "fieldName" : "sourceTable", - "fieldExtractor" : "LogicalPlanQueryExtractor" - } ] + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.OverwriteByExpression", "tableDescs" : [ { @@ -361,17 +416,26 @@ "actionTypeDesc" : { "fieldName" : null, "fieldExtractor" : null, - "actionType" : "INSERT_OVERWRITE" + "actionType" : "INSERT_OVERWRITE", + "comment" : "" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "QUERY", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "DataSourceV2RelationURIExtractor", + "isInput" : false, + "comment" : "" } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.OverwritePartitionsDynamic", @@ -382,17 +446,26 @@ "actionTypeDesc" : { "fieldName" : null, "fieldExtractor" : null, - "actionType" : "INSERT_OVERWRITE" + "actionType" : "INSERT_OVERWRITE", + "comment" : "" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "QUERY", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "DataSourceV2RelationURIExtractor", + "isInput" : false, + "comment" : "" } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.RefreshTable", @@ -404,10 +477,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "QUERY", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.RenameColumn", "tableDescs" : [ { @@ -418,10 +493,17 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_RENAMECOL", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "child", + "fieldExtractor" : "ResolvedTableURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.RenamePartitions", "tableDescs" : [ { @@ -432,10 +514,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_RENAMEPART", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.RepairTable", "tableDescs" : [ { @@ -446,10 +530,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "MSCK", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.ReplaceColumns", "tableDescs" : [ { @@ -460,10 +546,17 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_REPLACECOLS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "child", + "fieldExtractor" : "ResolvedTableURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.ReplaceData", "tableDescs" : [ { @@ -473,18 +566,22 @@ "actionTypeDesc" : { "fieldName" : null, "fieldExtractor" : null, - "actionType" : "UPDATE" + "actionType" : "UPDATE", + "comment" : "" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "QUERY", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" - } ] + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.ReplaceTable", "tableDescs" : [ { @@ -495,7 +592,8 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" }, { "fieldName" : "tableName", "fieldExtractor" : "IdentifierTableExtractor", @@ -504,10 +602,12 @@ "tableTypeDesc" : null, "catalogDesc" : { "fieldName" : "catalog", - "fieldExtractor" : "CatalogPluginCatalogExtractor" + "fieldExtractor" : "CatalogPluginCatalogExtractor", + "comment" : "" }, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" }, { "fieldName" : "child", "fieldExtractor" : "ResolvedDbObjectNameTableExtractor", @@ -516,10 +616,27 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "CREATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "tableSpec", + "fieldExtractor" : "TableSpecURIExtractor", + "isInput" : false, + "comment" : "" + }, { + "fieldName" : "properties", + "fieldExtractor" : "PropertiesLocationUriExtractor", + "isInput" : false, + "comment" : "" + }, { + "fieldName" : "tableName", + "fieldExtractor" : "IdentifierURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.ReplaceTableAsSelect", "tableDescs" : [ { @@ -530,7 +647,8 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" }, { "fieldName" : "tableName", "fieldExtractor" : "IdentifierTableExtractor", @@ -539,10 +657,12 @@ "tableTypeDesc" : null, "catalogDesc" : { "fieldName" : "catalog", - "fieldExtractor" : "CatalogPluginCatalogExtractor" + "fieldExtractor" : "CatalogPluginCatalogExtractor", + "comment" : "" }, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" }, { "fieldName" : "name", "fieldExtractor" : "ResolvedDbObjectNameTableExtractor", @@ -551,12 +671,46 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "CREATETABLE_AS_SELECT", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ { + "fieldName" : "tableSpec", + "fieldExtractor" : "TableSpecURIExtractor", + "isInput" : false, + "comment" : "" + }, { + "fieldName" : "properties", + "fieldExtractor" : "PropertiesLocationUriExtractor", + "isInput" : false, + "comment" : "" + } ] +}, { + "classname" : "org.apache.spark.sql.catalyst.plans.logical.SetTableProperties", + "tableDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "ResolvedTableTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false, + "comment" : "" + } ], + "opType" : "ALTERTABLE_PROPERTIES", + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "ResolvedTableURIExtractor", + "isInput" : false, + "comment" : "" } ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.ShowCreateTable", @@ -568,10 +722,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "SHOW_CREATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.ShowTableProperties", "tableDescs" : [ { @@ -582,10 +738,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "SHOW_TBLPROPERTIES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.TruncatePartition", "tableDescs" : [ { @@ -596,10 +754,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_DROPPARTS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.catalyst.plans.logical.TruncateTable", "tableDescs" : [ { @@ -610,67 +770,88 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "TRUNCATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { - "classname" : "org.apache.spark.sql.catalyst.plans.logical.UnresolvedMergeIntoIcebergTable", + "classname" : "org.apache.spark.sql.catalyst.plans.logical.UpdateTable", "tableDescs" : [ { - "fieldName" : "targetTable", + "fieldName" : "table", "fieldExtractor" : "DataSourceV2RelationTableExtractor", "columnDesc" : null, "actionTypeDesc" : { "fieldName" : null, "fieldExtractor" : null, - "actionType" : "UPDATE" + "actionType" : "UPDATE", + "comment" : "" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "QUERY", - "queryDescs" : [ { - "fieldName" : "sourceTable", - "fieldExtractor" : "LogicalPlanQueryExtractor" + "queryDescs" : [ ], + "uriDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.execution.command.AddArchivesCommand", + "tableDescs" : [ ], + "opType" : "ADD", + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "paths", + "fieldExtractor" : "StringSeqURIExtractor", + "isInput" : true, + "comment" : "" } ] }, { - "classname" : "org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable", - "tableDescs" : [ { - "fieldName" : "table", - "fieldExtractor" : "DataSourceV2RelationTableExtractor", - "columnDesc" : null, - "actionTypeDesc" : { - "fieldName" : null, - "fieldExtractor" : null, - "actionType" : "UPDATE" - }, - "tableTypeDesc" : null, - "catalogDesc" : null, - "isInput" : false, - "setCurrentDatabaseIfMissing" : false - } ], - "opType" : "QUERY", - "queryDescs" : [ ] + "classname" : "org.apache.spark.sql.execution.command.AddFileCommand", + "tableDescs" : [ ], + "opType" : "ADD", + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "path", + "fieldExtractor" : "StringURIExtractor", + "isInput" : true, + "comment" : "" + } ] }, { - "classname" : "org.apache.spark.sql.catalyst.plans.logical.UpdateTable", - "tableDescs" : [ { - "fieldName" : "table", - "fieldExtractor" : "DataSourceV2RelationTableExtractor", - "columnDesc" : null, - "actionTypeDesc" : { - "fieldName" : null, - "fieldExtractor" : null, - "actionType" : "UPDATE" - }, - "tableTypeDesc" : null, - "catalogDesc" : null, - "isInput" : false, - "setCurrentDatabaseIfMissing" : false - } ], - "opType" : "QUERY", - "queryDescs" : [ ] + "classname" : "org.apache.spark.sql.execution.command.AddFilesCommand", + "tableDescs" : [ ], + "opType" : "ADD", + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "paths", + "fieldExtractor" : "StringSeqURIExtractor", + "isInput" : true, + "comment" : "" + } ] +}, { + "classname" : "org.apache.spark.sql.execution.command.AddJarCommand", + "tableDescs" : [ ], + "opType" : "ADD", + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "path", + "fieldExtractor" : "StringURIExtractor", + "isInput" : true, + "comment" : "" + } ] +}, { + "classname" : "org.apache.spark.sql.execution.command.AddJarsCommand", + "tableDescs" : [ ], + "opType" : "ADD", + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "paths", + "fieldExtractor" : "StringSeqURIExtractor", + "isInput" : true, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterTableAddColumnsCommand", "tableDescs" : [ { @@ -678,16 +859,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "colsToAdd", - "fieldExtractor" : "StructFieldSeqColumnExtractor" + "fieldExtractor" : "StructFieldSeqColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_ADDCOLS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterTableAddPartitionCommand", "tableDescs" : [ { @@ -695,16 +879,24 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "partitionSpecsAndLocs", - "fieldExtractor" : "PartitionLocsSeqColumnExtractor" + "fieldExtractor" : "PartitionLocsSeqColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_ADDPARTS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "partitionSpecsAndLocs", + "fieldExtractor" : "PartitionLocsSeqURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterTableChangeColumnCommand", "tableDescs" : [ { @@ -712,16 +904,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "columnName", - "fieldExtractor" : "StringColumnExtractor" + "fieldExtractor" : "StringColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_REPLACECOLS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterTableDropPartitionCommand", "tableDescs" : [ { @@ -729,16 +924,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "specs", - "fieldExtractor" : "PartitionSeqColumnExtractor" + "fieldExtractor" : "PartitionSeqColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_DROPPARTS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterTableRecoverPartitionsCommand", "tableDescs" : [ { @@ -749,10 +947,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "MSCK", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterTableRenameCommand", "tableDescs" : [ { @@ -763,14 +963,17 @@ "tableTypeDesc" : { "fieldName" : "oldName", "fieldExtractor" : "TableIdentifierTableTypeExtractor", - "skipTypes" : [ "TEMP_VIEW" ] + "skipTypes" : [ "TEMP_VIEW" ], + "comment" : "" }, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_RENAME", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterTableRenamePartitionCommand", "tableDescs" : [ { @@ -778,16 +981,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "oldPartition", - "fieldExtractor" : "PartitionColumnExtractor" + "fieldExtractor" : "PartitionColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_RENAMEPART", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterTableSerDePropertiesCommand", "tableDescs" : [ { @@ -795,16 +1001,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "partSpec", - "fieldExtractor" : "PartitionOptionColumnExtractor" + "fieldExtractor" : "PartitionOptionColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_SERDEPROPERTIES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterTableSetLocationCommand", "tableDescs" : [ { @@ -812,16 +1021,24 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "partitionSpec", - "fieldExtractor" : "PartitionOptionColumnExtractor" + "fieldExtractor" : "PartitionOptionColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_LOCATION", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "location", + "fieldExtractor" : "StringURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterTableSetPropertiesCommand", "tableDescs" : [ { @@ -832,10 +1049,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_PROPERTIES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterTableUnsetPropertiesCommand", "tableDescs" : [ { @@ -846,10 +1065,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_PROPERTIES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AlterViewAsCommand", "tableDescs" : [ { @@ -860,17 +1081,21 @@ "tableTypeDesc" : { "fieldName" : "name", "fieldExtractor" : "TableIdentifierTableTypeExtractor", - "skipTypes" : [ "TEMP_VIEW" ] + "skipTypes" : [ "TEMP_VIEW" ], + "comment" : "" }, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERVIEW_AS", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" - } ] + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AnalyzeColumnCommand", "tableDescs" : [ { @@ -881,34 +1106,40 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" }, { "fieldName" : "tableIdent", "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "columnNames", - "fieldExtractor" : "StringSeqColumnExtractor" + "fieldExtractor" : "StringSeqColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" }, { "fieldName" : "tableIdent", "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "columnNames", - "fieldExtractor" : "StringSeqOptionColumnExtractor" + "fieldExtractor" : "StringSeqOptionColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_PROPERTIES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AnalyzePartitionCommand", "tableDescs" : [ { @@ -919,22 +1150,26 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" }, { "fieldName" : "tableIdent", "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "partitionSpec", - "fieldExtractor" : "PartitionColumnExtractor" + "fieldExtractor" : "PartitionColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_PROPERTIES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.AnalyzeTableCommand", "tableDescs" : [ { @@ -945,7 +1180,8 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" }, { "fieldName" : "tableIdent", "fieldExtractor" : "TableIdentifierTableExtractor", @@ -954,18 +1190,22 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "ALTERTABLE_PROPERTIES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.CacheTableCommand", "tableDescs" : [ ], "opType" : "CREATEVIEW", "queryDescs" : [ { "fieldName" : "plan", - "fieldExtractor" : "LogicalPlanOptionQueryExtractor" - } ] + "fieldExtractor" : "LogicalPlanOptionQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.CreateDataSourceTableAsSelectCommand", "tableDescs" : [ { @@ -976,12 +1216,20 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : true + "setCurrentDatabaseIfMissing" : true, + "comment" : "" } ], "opType" : "CREATETABLE_AS_SELECT", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "CatalogTableURIExtractor", + "isInput" : false, + "comment" : "" } ] }, { "classname" : "org.apache.spark.sql.execution.command.CreateDataSourceTableCommand", @@ -993,10 +1241,17 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : true + "setCurrentDatabaseIfMissing" : true, + "comment" : "" } ], "opType" : "CREATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "CatalogTableURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.execution.command.CreateTableCommand", "tableDescs" : [ { @@ -1007,10 +1262,17 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : true + "setCurrentDatabaseIfMissing" : true, + "comment" : "" } ], "opType" : "CREATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "CatalogTableURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.execution.command.CreateTableLikeCommand", "tableDescs" : [ { @@ -1021,7 +1283,8 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : true + "setCurrentDatabaseIfMissing" : true, + "comment" : "" }, { "fieldName" : "sourceTable", "fieldExtractor" : "TableIdentifierTableExtractor", @@ -1030,10 +1293,17 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : true + "setCurrentDatabaseIfMissing" : true, + "comment" : "" } ], "opType" : "CREATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "fileFormat", + "fieldExtractor" : "CatalogStorageFormatURIExtractor", + "isInput" : false, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.execution.command.CreateViewCommand", "tableDescs" : [ { @@ -1044,20 +1314,25 @@ "tableTypeDesc" : { "fieldName" : "viewType", "fieldExtractor" : "ViewTypeTableTypeExtractor", - "skipTypes" : [ "TEMP_VIEW", "GLOBAL_TEMP_VIEW" ] + "skipTypes" : [ "TEMP_VIEW", "GLOBAL_TEMP_VIEW" ], + "comment" : "" }, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "CREATEVIEW", "queryDescs" : [ { "fieldName" : "plan", - "fieldExtractor" : "LogicalPlanQueryExtractor" + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" }, { "fieldName" : "child", - "fieldExtractor" : "LogicalPlanQueryExtractor" - } ] + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.DescribeColumnCommand", "tableDescs" : [ { @@ -1065,16 +1340,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "colNameParts", - "fieldExtractor" : "StringSeqLastColumnExtractor" + "fieldExtractor" : "StringSeqLastColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "DESCTABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.DescribeTableCommand", "tableDescs" : [ { @@ -1082,16 +1360,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "partitionSpec", - "fieldExtractor" : "PartitionColumnExtractor" + "fieldExtractor" : "PartitionColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : true + "setCurrentDatabaseIfMissing" : true, + "comment" : "" } ], "opType" : "DESCTABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.DropTableCommand", "tableDescs" : [ { @@ -1102,21 +1383,31 @@ "tableTypeDesc" : { "fieldName" : "tableName", "fieldExtractor" : "TableIdentifierTableTypeExtractor", - "skipTypes" : [ "TEMP_VIEW" ] + "skipTypes" : [ "TEMP_VIEW" ], + "comment" : "" }, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "DROPTABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.InsertIntoDataSourceDirCommand", "tableDescs" : [ ], "opType" : "QUERY", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ { + "fieldName" : "storage", + "fieldExtractor" : "CatalogStorageFormatURIExtractor", + "isInput" : false, + "comment" : "" } ] }, { "classname" : "org.apache.spark.sql.execution.command.LoadDataCommand", @@ -1125,20 +1416,29 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "partition", - "fieldExtractor" : "PartitionOptionColumnExtractor" + "fieldExtractor" : "PartitionOptionColumnExtractor", + "comment" : "" }, "actionTypeDesc" : { "fieldName" : "isOverwrite", "fieldExtractor" : "OverwriteOrInsertActionTypeExtractor", - "actionType" : null + "actionType" : null, + "comment" : "" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "LOAD", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "path", + "fieldExtractor" : "StringURIExtractor", + "isInput" : true, + "comment" : "" + } ] }, { "classname" : "org.apache.spark.sql.execution.command.RefreshTableCommand", "tableDescs" : [ { @@ -1149,10 +1449,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "QUERY", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.RepairTableCommand", "tableDescs" : [ { @@ -1163,10 +1465,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "MSCK", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.ShowColumnsCommand", "tableDescs" : [ { @@ -1177,10 +1481,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "SHOWCOLUMNS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.ShowCreateTableAsSerdeCommand", "tableDescs" : [ { @@ -1191,10 +1497,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "SHOW_CREATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.ShowCreateTableCommand", "tableDescs" : [ { @@ -1205,10 +1513,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "SHOW_CREATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.ShowPartitionsCommand", "tableDescs" : [ { @@ -1216,16 +1526,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "spec", - "fieldExtractor" : "PartitionOptionColumnExtractor" + "fieldExtractor" : "PartitionOptionColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "SHOWPARTITIONS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.ShowTablePropertiesCommand", "tableDescs" : [ { @@ -1236,10 +1549,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "SHOW_TBLPROPERTIES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.command.TruncateTableCommand", "tableDescs" : [ { @@ -1247,16 +1562,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "partitionSpec", - "fieldExtractor" : "PartitionOptionColumnExtractor" + "fieldExtractor" : "PartitionOptionColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "TRUNCATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.datasources.CreateTable", "tableDescs" : [ { @@ -1267,18 +1585,27 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "CREATETABLE", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanOptionQueryExtractor" + "fieldExtractor" : "LogicalPlanOptionQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ { + "fieldName" : "tableDesc", + "fieldExtractor" : "CatalogTableURIExtractor", + "isInput" : false, + "comment" : "" } ] }, { "classname" : "org.apache.spark.sql.execution.datasources.CreateTempViewUsing", "tableDescs" : [ ], "opType" : "CREATEVIEW", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.datasources.InsertIntoDataSourceCommand", "tableDescs" : [ { @@ -1288,18 +1615,22 @@ "actionTypeDesc" : { "fieldName" : "overwrite", "fieldExtractor" : "OverwriteOrInsertActionTypeExtractor", - "actionType" : null + "actionType" : null, + "comment" : "" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "QUERY", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" - } ] + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand", "tableDescs" : [ { @@ -1307,23 +1638,28 @@ "fieldExtractor" : "CatalogTableOptionTableExtractor", "columnDesc" : { "fieldName" : "outputColumnNames", - "fieldExtractor" : "StringSeqColumnExtractor" + "fieldExtractor" : "StringSeqColumnExtractor", + "comment" : "" }, "actionTypeDesc" : { "fieldName" : "mode", "fieldExtractor" : "SaveModeActionTypeExtractor", - "actionType" : null + "actionType" : null, + "comment" : "" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "QUERY", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" - } ] + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.datasources.RefreshTable", "tableDescs" : [ { @@ -1334,17 +1670,26 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "QUERY", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand", "tableDescs" : [ ], "opType" : "QUERY", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ { + "fieldName" : "options", + "fieldExtractor" : "PropertiesPathUriExtractor", + "isInput" : false, + "comment" : "" } ] }, { "classname" : "org.apache.spark.sql.hive.execution.CreateHiveTableAsSelectCommand", @@ -1353,18 +1698,27 @@ "fieldExtractor" : "CatalogTableTableExtractor", "columnDesc" : { "fieldName" : "outputColumnNames", - "fieldExtractor" : "StringSeqColumnExtractor" + "fieldExtractor" : "StringSeqColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "CREATETABLE_AS_SELECT", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ { + "fieldName" : "tableDesc", + "fieldExtractor" : "CatalogTableURIExtractor", + "isInput" : false, + "comment" : "" } ] }, { "classname" : "org.apache.spark.sql.hive.execution.InsertIntoHiveDirCommand", @@ -1372,7 +1726,14 @@ "opType" : "QUERY", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ { + "fieldName" : "storage", + "fieldExtractor" : "CatalogStorageFormatURIExtractor", + "isInput" : false, + "comment" : "" } ] }, { "classname" : "org.apache.spark.sql.hive.execution.InsertIntoHiveTable", @@ -1381,23 +1742,28 @@ "fieldExtractor" : "CatalogTableTableExtractor", "columnDesc" : { "fieldName" : "outputColumnNames", - "fieldExtractor" : "StringSeqColumnExtractor" + "fieldExtractor" : "StringSeqColumnExtractor", + "comment" : "" }, "actionTypeDesc" : { "fieldName" : "overwrite", "fieldExtractor" : "OverwriteOrInsertActionTypeExtractor", - "actionType" : null + "actionType" : null, + "comment" : "" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "QUERY", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" - } ] + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hive.execution.OptimizedCreateHiveTableAsSelectCommand", "tableDescs" : [ { @@ -1405,19 +1771,136 @@ "fieldExtractor" : "CatalogTableTableExtractor", "columnDesc" : { "fieldName" : "outputColumnNames", - "fieldExtractor" : "StringSeqColumnExtractor" + "fieldExtractor" : "StringSeqColumnExtractor", + "comment" : "" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "CREATETABLE_AS_SELECT", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ { + "fieldName" : "tableDesc", + "fieldExtractor" : "CatalogTableURIExtractor", + "isInput" : false, + "comment" : "" } ] +}, { + "classname" : "org.apache.spark.sql.catalyst.plans.logical.Call", + "tableDescs" : [ { + "fieldName" : "args", + "fieldExtractor" : "ExpressionSeqTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false, + "comment" : "Iceberg" + } ], + "opType" : "ALTERTABLE_PROPERTIES", + "queryDescs" : [ ], + "uriDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.catalyst.plans.logical.DeleteFromIcebergTable", + "tableDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "DataSourceV2RelationTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : { + "fieldName" : null, + "fieldExtractor" : null, + "actionType" : "UPDATE", + "comment" : "" + }, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false, + "comment" : "Iceberg" + } ], + "opType" : "QUERY", + "queryDescs" : [ ], + "uriDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.catalyst.plans.logical.MergeIntoIcebergTable", + "tableDescs" : [ { + "fieldName" : "targetTable", + "fieldExtractor" : "DataSourceV2RelationTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : { + "fieldName" : null, + "fieldExtractor" : null, + "actionType" : "UPDATE", + "comment" : "" + }, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false, + "comment" : "Iceberg" + } ], + "opType" : "QUERY", + "queryDescs" : [ { + "fieldName" : "sourceTable", + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.catalyst.plans.logical.UnresolvedMergeIntoIcebergTable", + "tableDescs" : [ { + "fieldName" : "targetTable", + "fieldExtractor" : "DataSourceV2RelationTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : { + "fieldName" : null, + "fieldExtractor" : null, + "actionType" : "UPDATE", + "comment" : "" + }, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false, + "comment" : "Iceberg" + } ], + "opType" : "QUERY", + "queryDescs" : [ { + "fieldName" : "sourceTable", + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.catalyst.plans.logical.UpdateIcebergTable", + "tableDescs" : [ { + "fieldName" : "table", + "fieldExtractor" : "DataSourceV2RelationTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : { + "fieldName" : null, + "fieldExtractor" : null, + "actionType" : "UPDATE", + "comment" : "" + }, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false, + "comment" : "Iceberg" + } ], + "opType" : "QUERY", + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.AlterHoodieTableAddColumnsCommand", "tableDescs" : [ { @@ -1425,16 +1908,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "colsToAdd", - "fieldExtractor" : "StructFieldSeqColumnExtractor" + "fieldExtractor" : "StructFieldSeqColumnExtractor", + "comment" : "Hudi" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "ALTERTABLE_ADDCOLS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.AlterHoodieTableChangeColumnCommand", "tableDescs" : [ { @@ -1442,16 +1928,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "columnName", - "fieldExtractor" : "StringColumnExtractor" + "fieldExtractor" : "StringColumnExtractor", + "comment" : "Hudi" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "ALTERTABLE_REPLACECOLS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.AlterHoodieTableDropPartitionCommand", "tableDescs" : [ { @@ -1459,16 +1948,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "partitionSpecs", - "fieldExtractor" : "PartitionSeqColumnExtractor" + "fieldExtractor" : "PartitionSeqColumnExtractor", + "comment" : "Hudi" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "ALTERTABLE_DROPPARTS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.AlterHoodieTableRenameCommand", "tableDescs" : [ { @@ -1479,14 +1971,17 @@ "tableTypeDesc" : { "fieldName" : "oldName", "fieldExtractor" : "TableIdentifierTableTypeExtractor", - "skipTypes" : [ "TEMP_VIEW" ] + "skipTypes" : [ "TEMP_VIEW" ], + "comment" : "Hudi" }, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "ALTERTABLE_RENAME", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.AlterTableCommand", "tableDescs" : [ { @@ -1497,10 +1992,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "ALTERTABLE_PROPERTIES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.CallProcedureHoodieCommand", "tableDescs" : [ { @@ -1510,12 +2007,14 @@ "actionTypeDesc" : { "fieldName" : null, "fieldExtractor" : null, - "actionType" : "OTHER" + "actionType" : "OTHER", + "comment" : "" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : true + "setCurrentDatabaseIfMissing" : true, + "comment" : "" }, { "fieldName" : "clone", "fieldExtractor" : "HudiCallProcedureOutputTableExtractor", @@ -1523,15 +2022,29 @@ "actionTypeDesc" : { "fieldName" : null, "fieldExtractor" : null, - "actionType" : "UPDATE" + "actionType" : "UPDATE", + "comment" : "" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : true + "setCurrentDatabaseIfMissing" : true, + "comment" : "" } ], "opType" : "QUERY", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.CompactionHoodiePathCommand", + "tableDescs" : [ ], + "opType" : "CREATETABLE", + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "path", + "fieldExtractor" : "StringURIExtractor", + "isInput" : false, + "comment" : "Hudi" + } ] }, { "classname" : "org.apache.spark.sql.hudi.command.CompactionHoodieTableCommand", "tableDescs" : [ { @@ -1542,19 +2055,23 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false - }, { - "fieldName" : "table", - "fieldExtractor" : "CatalogTableTableExtractor", - "columnDesc" : null, - "actionTypeDesc" : null, - "tableTypeDesc" : null, - "catalogDesc" : null, - "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "CREATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] +}, { + "classname" : "org.apache.spark.sql.hudi.command.CompactionShowHoodiePathCommand", + "tableDescs" : [ ], + "opType" : "SHOW_TBLPROPERTIES", + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "path", + "fieldExtractor" : "StringURIExtractor", + "isInput" : true, + "comment" : "Hudi" + } ] }, { "classname" : "org.apache.spark.sql.hudi.command.CompactionShowHoodieTableCommand", "tableDescs" : [ { @@ -1565,10 +2082,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "SHOW_TBLPROPERTIES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.CreateHoodieTableAsSelectCommand", "tableDescs" : [ { @@ -1579,13 +2098,16 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "CREATETABLE_AS_SELECT", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" - } ] + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.CreateHoodieTableCommand", "tableDescs" : [ { @@ -1596,10 +2118,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "CREATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.CreateHoodieTableLikeCommand", "tableDescs" : [ { @@ -1610,7 +2134,8 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : true + "setCurrentDatabaseIfMissing" : true, + "comment" : "Hudi" }, { "fieldName" : "sourceTable", "fieldExtractor" : "TableIdentifierTableExtractor", @@ -1619,10 +2144,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : true + "setCurrentDatabaseIfMissing" : true, + "comment" : "Hudi" } ], "opType" : "CREATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.CreateIndexCommand", "tableDescs" : [ { @@ -1633,10 +2160,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "CREATEINDEX", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.DeleteHoodieTableCommand", "tableDescs" : [ { @@ -1646,15 +2175,18 @@ "actionTypeDesc" : { "fieldName" : null, "fieldExtractor" : null, - "actionType" : "UPDATE" + "actionType" : "UPDATE", + "comment" : "Hudi" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "QUERY", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.DropHoodieTableCommand", "tableDescs" : [ { @@ -1665,14 +2197,17 @@ "tableTypeDesc" : { "fieldName" : "tableIdentifier", "fieldExtractor" : "TableIdentifierTableTypeExtractor", - "skipTypes" : [ "TEMP_VIEW" ] + "skipTypes" : [ "TEMP_VIEW" ], + "comment" : "Hudi" }, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "DROPTABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.DropIndexCommand", "tableDescs" : [ { @@ -1683,10 +2218,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "DROPINDEX", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.InsertIntoHoodieTableCommand", "tableDescs" : [ { @@ -1696,18 +2233,22 @@ "actionTypeDesc" : { "fieldName" : "overwrite", "fieldExtractor" : "OverwriteOrInsertActionTypeExtractor", - "actionType" : null + "actionType" : null, + "comment" : "Hudi" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "QUERY", "queryDescs" : [ { "fieldName" : "query", - "fieldExtractor" : "LogicalPlanQueryExtractor" - } ] + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.MergeIntoHoodieTableCommand", "tableDescs" : [ { @@ -1717,18 +2258,22 @@ "actionTypeDesc" : { "fieldName" : null, "fieldExtractor" : null, - "actionType" : "UPDATE" + "actionType" : "UPDATE", + "comment" : "Hudi" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "QUERY", "queryDescs" : [ { "fieldName" : "mergeInto", - "fieldExtractor" : "HudiMergeIntoSourceTableExtractor" - } ] + "fieldExtractor" : "HudiMergeIntoSourceTableExtractor", + "comment" : "Hudi" + } ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.RefreshIndexCommand", "tableDescs" : [ { @@ -1739,10 +2284,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "ALTERINDEX_REBUILD", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.RepairHoodieTableCommand", "tableDescs" : [ { @@ -1753,10 +2300,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "" } ], "opType" : "MSCK", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.ShowHoodieTablePartitionsCommand", "tableDescs" : [ { @@ -1764,16 +2313,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "specOpt", - "fieldExtractor" : "PartitionOptionColumnExtractor" + "fieldExtractor" : "PartitionOptionColumnExtractor", + "comment" : "Hudi" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "SHOWPARTITIONS", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.ShowIndexesCommand", "tableDescs" : [ { @@ -1784,10 +2336,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : true, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "SHOWINDEXES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.Spark31AlterTableCommand", "tableDescs" : [ { @@ -1798,10 +2352,12 @@ "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "ALTERTABLE_PROPERTIES", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.TruncateHoodieTableCommand", "tableDescs" : [ { @@ -1809,16 +2365,19 @@ "fieldExtractor" : "TableIdentifierTableExtractor", "columnDesc" : { "fieldName" : "partitionSpec", - "fieldExtractor" : "PartitionOptionColumnExtractor" + "fieldExtractor" : "PartitionOptionColumnExtractor", + "comment" : "Hudi" }, "actionTypeDesc" : null, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" } ], "opType" : "TRUNCATETABLE", - "queryDescs" : [ ] + "queryDescs" : [ ], + "uriDescs" : [ ] }, { "classname" : "org.apache.spark.sql.hudi.command.UpdateHoodieTableCommand", "tableDescs" : [ { @@ -1828,13 +2387,180 @@ "actionTypeDesc" : { "fieldName" : null, "fieldExtractor" : null, - "actionType" : "UPDATE" + "actionType" : "UPDATE", + "comment" : "Hudi" + }, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false, + "comment" : "Hudi" + } ], + "opType" : "QUERY", + "queryDescs" : [ ], + "uriDescs" : [ ] +}, { + "classname" : "io.delta.tables.execution.VacuumTableCommand", + "tableDescs" : [ { + "fieldName" : "child", + "fieldExtractor" : "ResolvedTableTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false, + "comment" : "Delta" + }, { + "fieldName" : "table", + "fieldExtractor" : "TableIdentifierOptionTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false, + "comment" : "Delta" + } ], + "opType" : "MSCK", + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "child", + "fieldExtractor" : "ResolvedTableURIExtractor", + "isInput" : false, + "comment" : "Delta" + }, { + "fieldName" : "table", + "fieldExtractor" : "TableIdentifierOptionURIExtractor", + "isInput" : false, + "comment" : "Delta" + }, { + "fieldName" : "path", + "fieldExtractor" : "StringURIExtractor", + "isInput" : false, + "comment" : "Delta" + } ] +}, { + "classname" : "org.apache.spark.sql.delta.commands.DeleteCommand", + "tableDescs" : [ { + "fieldName" : "target", + "fieldExtractor" : "SubqueryAliasTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : { + "fieldName" : null, + "fieldExtractor" : null, + "actionType" : "UPDATE", + "comment" : "Delta" + }, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false, + "comment" : "Delta" + } ], + "opType" : "QUERY", + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "target", + "fieldExtractor" : "SubqueryAliasURIExtractor", + "isInput" : false, + "comment" : "Delta" + } ] +}, { + "classname" : "org.apache.spark.sql.delta.commands.MergeIntoCommand", + "tableDescs" : [ { + "fieldName" : "target", + "fieldExtractor" : "SubqueryAliasTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : { + "fieldName" : null, + "fieldExtractor" : null, + "actionType" : "UPDATE", + "comment" : "Delta" }, "tableTypeDesc" : null, "catalogDesc" : null, "isInput" : false, - "setCurrentDatabaseIfMissing" : false + "setCurrentDatabaseIfMissing" : false, + "comment" : "Delta" } ], "opType" : "QUERY", - "queryDescs" : [ ] + "queryDescs" : [ { + "fieldName" : "source", + "fieldExtractor" : "LogicalPlanQueryExtractor", + "comment" : "Delta" + } ], + "uriDescs" : [ { + "fieldName" : "target", + "fieldExtractor" : "SubqueryAliasURIExtractor", + "isInput" : false, + "comment" : "Delta" + } ] +}, { + "classname" : "org.apache.spark.sql.delta.commands.OptimizeTableCommand", + "tableDescs" : [ { + "fieldName" : "child", + "fieldExtractor" : "ResolvedTableTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false, + "comment" : "Delta" + }, { + "fieldName" : "tableId", + "fieldExtractor" : "TableIdentifierOptionTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : null, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false, + "comment" : "Delta" + } ], + "opType" : "ALTERTABLE_COMPACT", + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "child", + "fieldExtractor" : "ResolvedTableURIExtractor", + "isInput" : false, + "comment" : "Delta" + }, { + "fieldName" : "tableId", + "fieldExtractor" : "TableIdentifierOptionURIExtractor", + "isInput" : false, + "comment" : "Delta" + }, { + "fieldName" : "path", + "fieldExtractor" : "StringURIExtractor", + "isInput" : false, + "comment" : "Delta" + } ] +}, { + "classname" : "org.apache.spark.sql.delta.commands.UpdateCommand", + "tableDescs" : [ { + "fieldName" : "target", + "fieldExtractor" : "SubqueryAliasTableExtractor", + "columnDesc" : null, + "actionTypeDesc" : { + "fieldName" : null, + "fieldExtractor" : null, + "actionType" : "UPDATE", + "comment" : "Delta" + }, + "tableTypeDesc" : null, + "catalogDesc" : null, + "isInput" : false, + "setCurrentDatabaseIfMissing" : false, + "comment" : "Delta" + } ], + "opType" : "QUERY", + "queryDescs" : [ ], + "uriDescs" : [ { + "fieldName" : "target", + "fieldExtractor" : "SubqueryAliasURIExtractor", + "isInput" : false, + "comment" : "Delta" + } ] } ] \ No newline at end of file diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ObjectType.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ObjectType.scala index c94bf4f8d20..c8662f29d18 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ObjectType.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ObjectType.scala @@ -23,7 +23,7 @@ object ObjectType extends Enumeration { type ObjectType = Value - val DATABASE, TABLE, VIEW, COLUMN, FUNCTION, INDEX = Value + val DATABASE, TABLE, VIEW, COLUMN, FUNCTION, INDEX, URI = Value def apply(obj: PrivilegeObject, opType: OperationType): ObjectType = { obj.privilegeObjectType match { @@ -33,6 +33,7 @@ object ObjectType extends Enumeration { case PrivilegeObjectType.TABLE_OR_VIEW if opType.toString.contains("VIEW") => VIEW case PrivilegeObjectType.TABLE_OR_VIEW => TABLE case PrivilegeObjectType.FUNCTION => FUNCTION + case PrivilegeObjectType.DFS_URI | PrivilegeObjectType.LOCAL_URI => URI } } } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/OperationType.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/OperationType.scala index 3f2062b20a0..07066cc270e 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/OperationType.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/OperationType.scala @@ -22,14 +22,14 @@ object OperationType extends Enumeration { type OperationType = Value // According to https://scalameta.org/scalafmt/docs/known-issues.html // format: off - val ALTERDATABASE, ALTERDATABASE_LOCATION, ALTERTABLE_ADDCOLS, ALTERTABLE_ADDPARTS, - ALTERTABLE_RENAMECOL, ALTERTABLE_REPLACECOLS, ALTERTABLE_DROPPARTS, ALTERTABLE_RENAMEPART, - ALTERTABLE_RENAME, ALTERTABLE_PROPERTIES, ALTERTABLE_SERDEPROPERTIES, ALTERTABLE_LOCATION, - ALTERVIEW_AS, ALTERVIEW_RENAME, ANALYZE_TABLE, CREATEDATABASE, CREATETABLE, - CREATETABLE_AS_SELECT, CREATEFUNCTION, CREATEVIEW, DESCDATABASE, DESCFUNCTION, DESCTABLE, - DROPDATABASE, DROPFUNCTION, DROPTABLE, DROPVIEW, EXPLAIN, LOAD, MSCK, QUERY, RELOADFUNCTION, - SHOWCONF, SHOW_CREATETABLE, SHOWCOLUMNS, SHOWDATABASES, SHOWFUNCTIONS, SHOWPARTITIONS, - SHOWTABLES, SHOW_TBLPROPERTIES, SWITCHDATABASE, TRUNCATETABLE, + val ADD, ALTERDATABASE, ALTERDATABASE_LOCATION, ALTERTABLE_ADDCOLS, ALTERTABLE_ADDPARTS, + ALTERTABLE_COMPACT, ALTERTABLE_RENAMECOL, ALTERTABLE_REPLACECOLS, ALTERTABLE_DROPPARTS, + ALTERTABLE_RENAMEPART, ALTERTABLE_RENAME, ALTERTABLE_PROPERTIES, ALTERTABLE_SERDEPROPERTIES, + ALTERTABLE_LOCATION, ALTERVIEW_AS, ALTERVIEW_RENAME, ANALYZE_TABLE, CREATEDATABASE, + CREATETABLE, CREATETABLE_AS_SELECT, CREATEFUNCTION, CREATEVIEW, DESCDATABASE, DESCFUNCTION, + DESCTABLE, DROPDATABASE, DROPFUNCTION, DROPTABLE, DROPVIEW, EXPLAIN, LOAD, MSCK, QUERY, + RELOADFUNCTION, SHOWCONF, SHOW_CREATETABLE, SHOWCOLUMNS, SHOWDATABASES, SHOWFUNCTIONS, + SHOWPARTITIONS, SHOWTABLES, SHOW_TBLPROPERTIES, SWITCHDATABASE, TRUNCATETABLE, CREATEINDEX, DROPINDEX, ALTERINDEX_REBUILD, SHOWINDEXES = Value // format: on } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegeObject.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegeObject.scala index 195aa79892c..228aaeb11a7 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegeObject.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegeObject.scala @@ -17,11 +17,12 @@ package org.apache.kyuubi.plugin.spark.authz +import java.net.URI import javax.annotation.Nonnull import org.apache.kyuubi.plugin.spark.authz.PrivilegeObjectActionType.PrivilegeObjectActionType import org.apache.kyuubi.plugin.spark.authz.PrivilegeObjectType._ -import org.apache.kyuubi.plugin.spark.authz.serde.{Database, Function, Table} +import org.apache.kyuubi.plugin.spark.authz.serde.{Database, Function, Table, Uri} /** * Build a Spark logical plan to different `PrivilegeObject`s @@ -86,4 +87,19 @@ object PrivilegeObject { None ) // TODO: Support catalog for function } + + def apply(uri: Uri): PrivilegeObject = { + val privilegeObjectType = Option(new URI(uri.path).getScheme) match { + case Some("file") => LOCAL_URI + case _ => DFS_URI + } + new PrivilegeObject( + privilegeObjectType, + PrivilegeObjectActionType.OTHER, + uri.path, + null, + Nil, + None, + None) + } } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegeObjectType.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegeObjectType.scala index f514fcb828c..28b9588eaa2 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegeObjectType.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegeObjectType.scala @@ -20,5 +20,5 @@ package org.apache.kyuubi.plugin.spark.authz object PrivilegeObjectType extends Enumeration { type PrivilegeObjectType = Value - val DATABASE, TABLE_OR_VIEW, FUNCTION = Value + val DATABASE, TABLE_OR_VIEW, FUNCTION, LOCAL_URI, DFS_URI = Value } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala index a0ed5fb6a14..2d452ba9d67 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilder.scala @@ -22,13 +22,15 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.{Expression, NamedExpression} import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.execution.command.ExplainCommand import org.slf4j.LoggerFactory import org.apache.kyuubi.plugin.spark.authz.OperationType.OperationType import org.apache.kyuubi.plugin.spark.authz.PrivilegeObjectActionType._ +import org.apache.kyuubi.plugin.spark.authz.rule.Authorization._ +import org.apache.kyuubi.plugin.spark.authz.rule.rowfilter._ import org.apache.kyuubi.plugin.spark.authz.serde._ import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ -import org.apache.kyuubi.plugin.spark.authz.util.PermanentViewMarker import org.apache.kyuubi.util.reflect.ReflectUtils._ object PrivilegesBuilder { @@ -74,6 +76,8 @@ object PrivilegesBuilder { } plan match { + case p if p.getTagValue(KYUUBI_AUTHZ_TAG).nonEmpty => + case p: Project => buildQuery(p.child, privilegeObjects, p.projectList, conditionList, spark) case j: Join => @@ -103,13 +107,15 @@ object PrivilegesBuilder { val cols = conditionList ++ aggCols buildQuery(a.child, privilegeObjects, projectionList, cols, spark) - case pvm: PermanentViewMarker => - getScanSpec(pvm).tables(pvm, spark).foreach { table => - privilegeObjects += PrivilegeObject(table, pvm.visitColNames) - } - case scan if isKnownScan(scan) && scan.resolved => - getScanSpec(scan).tables(scan, spark).foreach(mergeProjection(_, scan)) + val tables = getScanSpec(scan).tables(scan, spark) + // If the the scan is table-based, we check privileges on the table we found + // otherwise, we check privileges on the uri we found + if (tables.nonEmpty) { + tables.foreach(mergeProjection(_, scan)) + } else { + getScanSpec(scan).uris(scan).foreach(privilegeObjects += PrivilegeObject(_)) + } case u if u.nodeName == "UnresolvedRelation" => val parts = invokeAs[String](u, "tableName").split("\\.") @@ -178,6 +184,19 @@ object PrivilegesBuilder { LOG.debug(databaseDesc.error(plan, e)) } } + desc.uriDescs.foreach { ud => + try { + val uris = ud.extract(plan, spark) + if (ud.isInput) { + inputObjs ++= uris.map(PrivilegeObject(_)) + } else { + outputObjs ++= uris.map(PrivilegeObject(_)) + } + } catch { + case e: Exception => + LOG.debug(ud.error(plan, e)) + } + } desc.operationType case classname if TABLE_COMMAND_SPECS.contains(classname) => @@ -189,6 +208,19 @@ object PrivilegesBuilder { outputObjs ++= getTablePriv(td) } } + spec.uriDescs.foreach { ud => + try { + val uris = ud.extract(plan, spark) + if (ud.isInput) { + inputObjs ++= uris.map(PrivilegeObject(_)) + } else { + outputObjs ++= uris.map(PrivilegeObject(_)) + } + } catch { + case e: Exception => + LOG.debug(ud.error(plan, e)) + } + } spec.queries(plan).foreach(buildQuery(_, inputObjs, spark = spark)) spec.operationType @@ -265,6 +297,20 @@ object PrivilegesBuilder { val inputObjs = new ArrayBuffer[PrivilegeObject] val outputObjs = new ArrayBuffer[PrivilegeObject] val opType = plan match { + case ObjectFilterPlaceHolder(child) if child.nodeName == "ShowTables" => + OperationType.SHOWTABLES + case ObjectFilterPlaceHolder(child) if child.nodeName == "ShowNamespaces" => + OperationType.SHOWDATABASES + case _: FilteredShowTablesCommand => OperationType.SHOWTABLES + case _: FilteredShowFunctionsCommand => OperationType.SHOWFUNCTIONS + case _: FilteredShowColumnsCommand => OperationType.SHOWCOLUMNS + + // ExplainCommand run will execute the plan, should avoid check privilege for the plan. + case _: ExplainCommand => + setExplainCommandExecutionId(spark) + OperationType.EXPLAIN + case _ if isExplainCommandChild(spark) => + OperationType.EXPLAIN // RunnableCommand case cmd: Command => buildCommand(cmd, inputObjs, outputObjs, spark) // Queries diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AccessResource.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AccessResource.scala index 23cd87b2745..858dc1c3733 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AccessResource.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AccessResource.scala @@ -17,6 +17,9 @@ package org.apache.kyuubi.plugin.spark.authz.ranger +import java.io.File +import java.util + import scala.language.implicitConversions import org.apache.ranger.plugin.policyengine.RangerAccessResourceImpl @@ -35,6 +38,7 @@ class AccessResource private (val objectType: ObjectType, val catalog: Option[St val columnStr = getColumn if (columnStr == null) Nil else columnStr.split(",").filter(_.nonEmpty) } + def getUrl: String = getValue("url") } object AccessResource { @@ -60,6 +64,16 @@ object AccessResource { case TABLE | VIEW | INDEX => resource.setValue("database", firstLevelResource) resource.setValue("table", secondLevelResource) + case URI => + val objectList = new util.ArrayList[String] + Option(firstLevelResource) + .filter(_.nonEmpty) + .foreach { path => + val s = path.stripSuffix(File.separator) + objectList.add(s) + objectList.add(s + File.separator) + } + resource.setValue("url", objectList) } resource.setServiceDef(SparkRangerAdminPlugin.getServiceDef) owner.foreach(resource.setOwnerUser) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AccessType.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AccessType.scala index d533d638bac..3a836df372f 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AccessType.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AccessType.scala @@ -25,11 +25,18 @@ object AccessType extends Enumeration { type AccessType = Value - val NONE, CREATE, ALTER, DROP, SELECT, UPDATE, USE, READ, WRITE, ALL, ADMIN, INDEX = Value + val NONE, CREATE, ALTER, DROP, SELECT, UPDATE, USE, READ, WRITE, ALL, ADMIN, INDEX, TEMPUDFADMIN = + Value def apply(obj: PrivilegeObject, opType: OperationType, isInput: Boolean): AccessType = { + if (obj.privilegeObjectType == DFS_URI || obj.privilegeObjectType == LOCAL_URI) { + // This is equivalent to ObjectType.URI + return if (isInput) READ else WRITE + } + obj.actionType match { case PrivilegeObjectActionType.OTHER => opType match { + case ADD => TEMPUDFADMIN case CREATEDATABASE if obj.privilegeObjectType == DATABASE => CREATE case CREATEFUNCTION if obj.privilegeObjectType == FUNCTION => CREATE case CREATETABLE | CREATEVIEW | CREATETABLE_AS_SELECT @@ -39,6 +46,7 @@ object AccessType extends Enumeration { ALTERDATABASE_LOCATION | ALTERTABLE_ADDCOLS | ALTERTABLE_ADDPARTS | + ALTERTABLE_COMPACT | ALTERTABLE_DROPPARTS | ALTERTABLE_LOCATION | ALTERTABLE_RENAME | diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RangerConfigProvider.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerConfigProvider.scala similarity index 88% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RangerConfigProvider.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerConfigProvider.scala index a61d94a8fc8..05d8cc64f40 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RangerConfigProvider.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerConfigProvider.scala @@ -15,12 +15,12 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.util +package org.apache.kyuubi.plugin.spark.authz.ranger import org.apache.hadoop.conf.Configuration -import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ -import org.apache.kyuubi.util.reflect.ReflectUtils._ +import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils.isRanger21orGreater +import org.apache.kyuubi.util.reflect.ReflectUtils.invokeAs trait RangerConfigProvider { diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala index f8e941d9def..288719f07bf 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtension.scala @@ -19,9 +19,12 @@ package org.apache.kyuubi.plugin.spark.authz.ranger import org.apache.spark.sql.SparkSessionExtensions -import org.apache.kyuubi.plugin.spark.authz.ranger.datamasking.{RuleApplyDataMaskingStage0, RuleApplyDataMaskingStage1} -import org.apache.kyuubi.plugin.spark.authz.ranger.rowfilter.RuleApplyRowFilter -import org.apache.kyuubi.plugin.spark.authz.util.{RuleEliminateMarker, RuleEliminateViewMarker} +import org.apache.kyuubi.plugin.spark.authz.rule.{RuleEliminateMarker, RuleEliminatePermanentViewMarker, RuleEliminateTypeOf} +import org.apache.kyuubi.plugin.spark.authz.rule.config.AuthzConfigurationChecker +import org.apache.kyuubi.plugin.spark.authz.rule.datamasking.{RuleApplyDataMaskingStage0, RuleApplyDataMaskingStage1} +import org.apache.kyuubi.plugin.spark.authz.rule.expression.RuleApplyTypeOfMarker +import org.apache.kyuubi.plugin.spark.authz.rule.permanentview.RuleApplyPermanentViewMarker +import org.apache.kyuubi.plugin.spark.authz.rule.rowfilter.{FilterDataSourceV2Strategy, RuleApplyRowFilter, RuleReplaceShowObjectCommands} /** * ACL Management for Apache Spark SQL with Apache Ranger, enabling: @@ -42,14 +45,16 @@ class RangerSparkExtension extends (SparkSessionExtensions => Unit) { override def apply(v1: SparkSessionExtensions): Unit = { v1.injectCheckRule(AuthzConfigurationChecker) - v1.injectResolutionRule(_ => new RuleReplaceShowObjectCommands()) - v1.injectResolutionRule(_ => new RuleApplyPermanentViewMarker()) + v1.injectResolutionRule(_ => RuleReplaceShowObjectCommands) + v1.injectResolutionRule(_ => RuleApplyPermanentViewMarker) + v1.injectResolutionRule(_ => RuleApplyTypeOfMarker) v1.injectResolutionRule(RuleApplyRowFilter) v1.injectResolutionRule(RuleApplyDataMaskingStage0) v1.injectResolutionRule(RuleApplyDataMaskingStage1) - v1.injectOptimizerRule(_ => new RuleEliminateMarker()) - v1.injectOptimizerRule(new RuleAuthorization(_)) - v1.injectOptimizerRule(_ => new RuleEliminateViewMarker()) - v1.injectPlannerStrategy(new FilterDataSourceV2Strategy(_)) + v1.injectOptimizerRule(_ => RuleEliminateMarker) + v1.injectOptimizerRule(RuleAuthorization) + v1.injectOptimizerRule(RuleEliminatePermanentViewMarker) + v1.injectOptimizerRule(_ => RuleEliminateTypeOf) + v1.injectPlannerStrategy(FilterDataSourceV2Strategy) } } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleAuthorization.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleAuthorization.scala index 3203108dfae..e25cd2a7004 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleAuthorization.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleAuthorization.scala @@ -22,36 +22,19 @@ import scala.collection.mutable.ArrayBuffer import org.apache.ranger.plugin.policyengine.RangerAccessRequest import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.kyuubi.plugin.spark.authz._ import org.apache.kyuubi.plugin.spark.authz.ObjectType._ -import org.apache.kyuubi.plugin.spark.authz.ranger.RuleAuthorization._ import org.apache.kyuubi.plugin.spark.authz.ranger.SparkRangerAdminPlugin._ +import org.apache.kyuubi.plugin.spark.authz.rule.Authorization import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ -class RuleAuthorization(spark: SparkSession) extends Rule[LogicalPlan] { - override def apply(plan: LogicalPlan): LogicalPlan = { - plan match { - case plan if isAuthChecked(plan) => plan // do nothing if checked privileges already. - case p => checkPrivileges(spark, p) - } - } -} -object RuleAuthorization { - - val KYUUBI_AUTHZ_TAG = TreeNodeTag[Boolean]("__KYUUBI_AUTHZ_TAG") - - private def checkPrivileges(spark: SparkSession, plan: LogicalPlan): LogicalPlan = { +case class RuleAuthorization(spark: SparkSession) extends Authorization(spark) { + override def checkPrivileges(spark: SparkSession, plan: LogicalPlan): Unit = { val auditHandler = new SparkRangerAuditHandler val ugi = getAuthzUgi(spark.sparkContext) val (inputs, outputs, opType) = PrivilegesBuilder.build(plan, spark) val requests = new ArrayBuffer[AccessRequest]() - if (inputs.isEmpty && opType == OperationType.SHOWDATABASES) { - val resource = AccessResource(DATABASE, null, None) - requests += AccessRequest(resource, ugi, opType, AccessType.USE) - } def addAccessRequest(objects: Iterable[PrivilegeObject], isInput: Boolean): Unit = { objects.foreach { obj => @@ -93,17 +76,5 @@ object RuleAuthorization { verify(Seq(req), auditHandler) } } - markAuthChecked(plan) - } - - private def markAuthChecked(plan: LogicalPlan): LogicalPlan = { - plan.transformUp { case p => - p.setTagValue(KYUUBI_AUTHZ_TAG, true) - p - } - } - - private def isAuthChecked(plan: LogicalPlan): Boolean = { - plan.find(_.getTagValue(KYUUBI_AUTHZ_TAG).contains(true)).nonEmpty } } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala index d3059ef2dd3..66f34db9106 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/SparkRangerAdminPlugin.scala @@ -26,7 +26,6 @@ import org.apache.ranger.plugin.service.RangerBasePlugin import org.slf4j.LoggerFactory import org.apache.kyuubi.plugin.spark.authz.AccessControlException -import org.apache.kyuubi.plugin.spark.authz.util.RangerConfigProvider object SparkRangerAdminPlugin extends RangerBasePlugin("spark", "sparkSql") with RangerConfigProvider { diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/Authorization.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/Authorization.scala new file mode 100644 index 00000000000..d1494266e85 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/Authorization.scala @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.rule + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, View} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.trees.TreeNodeTag +import org.apache.spark.sql.execution.SQLExecution.EXECUTION_ID_KEY + +import org.apache.kyuubi.plugin.spark.authz.rule.Authorization._ +import org.apache.kyuubi.plugin.spark.authz.util.ReservedKeys._ + +abstract class Authorization(spark: SparkSession) extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = { + plan match { + case plan if isAuthChecked(plan) => plan // do nothing if checked privileges already. + case p => + checkPrivileges(spark, p) + markAuthChecked(p) + } + } + + def checkPrivileges(spark: SparkSession, plan: LogicalPlan): Unit +} + +object Authorization { + + val KYUUBI_AUTHZ_TAG = TreeNodeTag[Unit]("__KYUUBI_AUTHZ_TAG") + + private def markAllNodesAuthChecked(plan: LogicalPlan): LogicalPlan = { + plan.transformDown { case p => + p.setTagValue(KYUUBI_AUTHZ_TAG, ()) + p + } + } + + def markAuthChecked(plan: LogicalPlan): LogicalPlan = { + plan.setTagValue(KYUUBI_AUTHZ_TAG, ()) + plan transformDown { + // TODO: Add this line Support for spark3.1, we can remove this + // after spark 3.2 since https://issues.apache.org/jira/browse/SPARK-34269 + case view: View => + markAllNodesAuthChecked(view.child) + } + } + + protected def isAuthChecked(plan: LogicalPlan): Boolean = { + plan.getTagValue(KYUUBI_AUTHZ_TAG).nonEmpty + } + + def setExplainCommandExecutionId(sparkSession: SparkSession): Unit = { + sparkSession.sparkContext.setLocalProperty( + KYUUBI_EXPLAIN_COMMAND_EXECUTION_ID, + executionId(sparkSession)) + } + + def isExplainCommandChild(sparkSession: SparkSession): Boolean = { + if (null == executionId(sparkSession)) { + false + } else { + executionId(sparkSession).equals( + sparkSession.sparkContext.getLocalProperty(KYUUBI_EXPLAIN_COMMAND_EXECUTION_ID)) + } + } + + private def executionId(sparkSession: SparkSession): String = { + sparkSession.sparkContext.getLocalProperty(EXECUTION_ID_KEY) + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateMarker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateMarker.scala similarity index 82% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateMarker.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateMarker.scala index 448439b8426..a3a22a5f321 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateMarker.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateMarker.scala @@ -15,16 +15,16 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.util +package org.apache.kyuubi.plugin.spark.authz.rule import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.kyuubi.plugin.spark.authz.ranger.datamasking.{DataMaskingStage0Marker, DataMaskingStage1Marker} -import org.apache.kyuubi.plugin.spark.authz.ranger.rowfilter.RowFilterMarker +import org.apache.kyuubi.plugin.spark.authz.rule.datamasking.{DataMaskingStage0Marker, DataMaskingStage1Marker} +import org.apache.kyuubi.plugin.spark.authz.rule.rowfilter.RowFilterMarker -class RuleEliminateMarker extends Rule[LogicalPlan] { +object RuleEliminateMarker extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = { plan.transformUp { case p => p.transformExpressionsUp { diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminatePermanentViewMarker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminatePermanentViewMarker.scala new file mode 100644 index 00000000000..003521c727b --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminatePermanentViewMarker.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.rule + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.SubqueryExpression +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule + +import org.apache.kyuubi.plugin.spark.authz.rule.permanentview.PermanentViewMarker + +/** + * Transforming up [[PermanentViewMarker]] + */ +case class RuleEliminatePermanentViewMarker(sparkSession: SparkSession) extends Rule[LogicalPlan] { + + def eliminatePVM(plan: LogicalPlan): LogicalPlan = { + plan.transformUp { + case pvm: PermanentViewMarker => + val ret = pvm.child.transformAllExpressions { + case s: SubqueryExpression => s.withNewPlan(eliminatePVM(s.plan)) + } + // For each SubqueryExpression's PVM, we should mark as resolved to + // avoid check privilege of PVM's internal Subquery. + Authorization.markAuthChecked(ret) + ret + } + } + + override def apply(plan: LogicalPlan): LogicalPlan = { + var matched = false + val eliminatedPVM = plan.transformUp { + case pvm: PermanentViewMarker => + matched = true + pvm.child.transformAllExpressions { + case s: SubqueryExpression => s.withNewPlan(eliminatePVM(s.plan)) + } + } + if (matched) { + Authorization.markAuthChecked(eliminatedPVM) + sparkSession.sessionState.optimizer.execute(eliminatedPVM) + } else { + eliminatedPVM + } + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateViewMarker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateTypeOf.scala similarity index 68% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateViewMarker.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateTypeOf.scala index 8044f1283e5..0f3ae136c4a 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/RuleEliminateViewMarker.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleEliminateTypeOf.scala @@ -15,21 +15,20 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.util +package org.apache.kyuubi.plugin.spark.authz.rule -import org.apache.spark.sql.catalyst.expressions.SubqueryExpression +import org.apache.spark.sql.catalyst.expressions.TypeOf import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule -/** - * Transforming up [[org.apache.kyuubi.plugin.spark.authz.util.PermanentViewMarker]] - */ -class RuleEliminateViewMarker extends Rule[LogicalPlan] { +import org.apache.kyuubi.plugin.spark.authz.rule.expression.TypeOfPlaceHolder + +object RuleEliminateTypeOf extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = { - plan.transformUp { - case pvm: PermanentViewMarker => pvm.child.transformAllExpressions { - case s: SubqueryExpression => s.withNewPlan(apply(s.plan)) - } + plan.transformUp { case p => + p.transformExpressionsUp { + case toph: TypeOfPlaceHolder => TypeOf(toph.expr) + } } } } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleHelper.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleHelper.scala similarity index 97% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleHelper.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleHelper.scala index 3cfe2b9406b..c163cafe931 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleHelper.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/RuleHelper.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.ranger +package org.apache.kyuubi.plugin.spark.authz.rule import org.apache.hadoop.security.UserGroupInformation import org.apache.spark.sql.SparkSession diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AuthzConfigurationChecker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/config/AuthzConfigurationChecker.scala similarity index 97% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AuthzConfigurationChecker.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/config/AuthzConfigurationChecker.scala index 56ab27d2244..3ab2c3fd640 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AuthzConfigurationChecker.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/config/AuthzConfigurationChecker.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.ranger +package org.apache.kyuubi.plugin.spark.authz.rule.config import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingStage0Marker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/datamasking/DataMaskingStage0Marker.scala similarity index 95% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingStage0Marker.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/datamasking/DataMaskingStage0Marker.scala index b4314938324..c1d3a75321e 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingStage0Marker.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/datamasking/DataMaskingStage0Marker.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking +package org.apache.kyuubi.plugin.spark.authz.rule.datamasking import org.apache.spark.sql.catalyst.expressions.{Attribute, ExprId} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingStage1Marker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/datamasking/DataMaskingStage1Marker.scala similarity index 95% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingStage1Marker.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/datamasking/DataMaskingStage1Marker.scala index aed0ac693b1..1c30879e4e6 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/DataMaskingStage1Marker.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/datamasking/DataMaskingStage1Marker.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking +package org.apache.kyuubi.plugin.spark.authz.rule.datamasking import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/RuleApplyDataMaskingStage0.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/datamasking/RuleApplyDataMaskingStage0.scala similarity index 95% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/RuleApplyDataMaskingStage0.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/datamasking/RuleApplyDataMaskingStage0.scala index de125550ac9..27cde162113 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/RuleApplyDataMaskingStage0.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/datamasking/RuleApplyDataMaskingStage0.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking +package org.apache.kyuubi.plugin.spark.authz.rule.datamasking import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.Alias @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} import org.apache.kyuubi.plugin.spark.authz.ObjectType import org.apache.kyuubi.plugin.spark.authz.OperationType.QUERY import org.apache.kyuubi.plugin.spark.authz.ranger._ +import org.apache.kyuubi.plugin.spark.authz.rule.RuleHelper import org.apache.kyuubi.plugin.spark.authz.serde._ /** diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/RuleApplyDataMaskingStage1.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/datamasking/RuleApplyDataMaskingStage1.scala similarity index 96% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/RuleApplyDataMaskingStage1.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/datamasking/RuleApplyDataMaskingStage1.scala index 9589be2e97b..b0069c9a543 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/datamasking/RuleApplyDataMaskingStage1.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/datamasking/RuleApplyDataMaskingStage1.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.ranger.datamasking +package org.apache.kyuubi.plugin.spark.authz.rule.datamasking import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions.NamedExpression import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan} -import org.apache.kyuubi.plugin.spark.authz.ranger.RuleHelper +import org.apache.kyuubi.plugin.spark.authz.rule.RuleHelper import org.apache.kyuubi.plugin.spark.authz.serde._ /** diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/expression/RuleApplyTypeOfMarker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/expression/RuleApplyTypeOfMarker.scala new file mode 100644 index 00000000000..8d47c56f7af --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/expression/RuleApplyTypeOfMarker.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.rule.expression + +import org.apache.spark.sql.catalyst.expressions.TypeOf +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule + +object RuleApplyTypeOfMarker extends Rule[LogicalPlan] { + + override def apply(plan: LogicalPlan): LogicalPlan = { + plan transformAllExpressions { + case typeof: TypeOf => TypeOfPlaceHolder(typeof.child) + } + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/expression/TypeOfPlaceHolder.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/expression/TypeOfPlaceHolder.scala new file mode 100644 index 00000000000..ebc9cecf5d5 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/expression/TypeOfPlaceHolder.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.rule.expression + +import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.types.{DataType, StringType} + +import org.apache.kyuubi.plugin.spark.authz.util.WithInternalExpressionChild + +case class TypeOfPlaceHolder(expr: Expression) extends UnaryExpression + with WithInternalExpressionChild { + override def dataType: DataType = StringType + + // Avoid fold constant expression by Spark Optimizer + override def foldable: Boolean = false + + override def child: Expression = expr + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + defineCodeGen(ctx, ev, _ => s"""UTF8String.fromString(${child.dataType.catalogString})""") + } + + override def withNewChildInternal(newChild: Expression): Expression = + copy(expr = newChild) +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/permanentview/PermanentViewMarker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/permanentview/PermanentViewMarker.scala new file mode 100644 index 00000000000..fc52adc0458 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/permanentview/PermanentViewMarker.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.rule.permanentview + +import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation +import org.apache.spark.sql.catalyst.catalog.CatalogTable +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast} +import org.apache.spark.sql.catalyst.plans.QueryPlan +import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Project, Statistics} + +case class PermanentViewMarker(child: LogicalPlan, catalogTable: CatalogTable) + extends LeafNode with MultiInstanceRelation { + + override def output: Seq[Attribute] = child.output + + override def argString(maxFields: Int): String = "" + + override def innerChildren: Seq[QueryPlan[_]] = child :: Nil + + override def computeStats(): Statistics = child.stats + + override def newInstance(): LogicalPlan = { + val projectList = child.output.map { case attr => + Alias(Cast(attr, attr.dataType), attr.name)(explicitMetadata = Some(attr.metadata)) + } + this.copy(child = Project(projectList, child), catalogTable = catalogTable) + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyPermanentViewMarker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/permanentview/RuleApplyPermanentViewMarker.scala similarity index 62% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyPermanentViewMarker.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/permanentview/RuleApplyPermanentViewMarker.scala index 909cd9e93d3..a84ecec8c31 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleApplyPermanentViewMarker.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/permanentview/RuleApplyPermanentViewMarker.scala @@ -15,40 +15,42 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.ranger +package org.apache.kyuubi.plugin.spark.authz.rule.permanentview +import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, View} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ -import org.apache.kyuubi.plugin.spark.authz.util.PermanentViewMarker /** - * Adding [[org.apache.kyuubi.plugin.spark.authz.util.PermanentViewMarker]] for permanent views + * Adding [[PermanentViewMarker]] for permanent views * for marking catalogTable of views used by privilege checking * in [[org.apache.kyuubi.plugin.spark.authz.ranger.RuleAuthorization]]. - * [[org.apache.kyuubi.plugin.spark.authz.util.PermanentViewMarker]] must be transformed up later - * in [[org.apache.kyuubi.plugin.spark.authz.util.RuleEliminateViewMarker]] optimizer. + * [[PermanentViewMarker]] must be transformed up later + * in [[org.apache.kyuubi.plugin.spark.authz.rule.RuleEliminatePermanentViewMarker]] optimizer. */ -class RuleApplyPermanentViewMarker extends Rule[LogicalPlan] { +object RuleApplyPermanentViewMarker extends Rule[LogicalPlan] { + + private def resolveSubqueryExpression( + plan: LogicalPlan, + catalogTable: CatalogTable): LogicalPlan = { + plan.transformAllExpressions { + case subquery: SubqueryExpression => + subquery.withNewPlan(plan = PermanentViewMarker( + resolveSubqueryExpression(subquery.plan, catalogTable), + catalogTable)) + } + } override def apply(plan: LogicalPlan): LogicalPlan = { plan mapChildren { case p: PermanentViewMarker => p case permanentView: View if hasResolvedPermanentView(permanentView) => - val resolvedSubquery = permanentView.transformAllExpressions { - case subquery: SubqueryExpression => - subquery.withNewPlan(plan = - PermanentViewMarker( - subquery.plan, - permanentView.desc, - permanentView.output.map(_.name))) - } PermanentViewMarker( - resolvedSubquery, - resolvedSubquery.desc, - resolvedSubquery.output.map(_.name)) + resolveSubqueryExpression(permanentView, permanentView.desc), + permanentView.desc) case other => apply(other) } } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilterDataSourceV2Strategy.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/FilterDataSourceV2Strategy.scala similarity index 90% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilterDataSourceV2Strategy.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/FilterDataSourceV2Strategy.scala index cbf79581ed6..e268ed6bc7c 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilterDataSourceV2Strategy.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/FilterDataSourceV2Strategy.scala @@ -14,15 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.ranger +package org.apache.kyuubi.plugin.spark.authz.rule.rowfilter import org.apache.spark.sql.{SparkSession, Strategy} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} import org.apache.spark.sql.execution.SparkPlan -import org.apache.kyuubi.plugin.spark.authz.util.ObjectFilterPlaceHolder - -class FilterDataSourceV2Strategy(spark: SparkSession) extends Strategy { +case class FilterDataSourceV2Strategy(spark: SparkSession) extends Strategy { override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { // For Spark 3.1 and below, `ColumnPruning` rule will set `ObjectFilterPlaceHolder#child` to // `Project` diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilteredShowObjectsExec.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/FilteredShowObjectsExec.scala similarity index 94% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilteredShowObjectsExec.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/FilteredShowObjectsExec.scala index 67519118ecc..0bb4213561c 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/FilteredShowObjectsExec.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/FilteredShowObjectsExec.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.ranger +package org.apache.kyuubi.plugin.spark.authz.rule.rowfilter import org.apache.hadoop.security.UserGroupInformation import org.apache.spark.SparkContext @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan} import org.apache.kyuubi.plugin.spark.authz.{ObjectType, OperationType} +import org.apache.kyuubi.plugin.spark.authz.ranger.{AccessRequest, AccessResource, AccessType, SparkRangerAdminPlugin} import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils trait FilteredShowObjectsExec extends LeafExecNode { diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/ObjectFilterPlaceHolder.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/ObjectFilterPlaceHolder.scala similarity index 91% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/ObjectFilterPlaceHolder.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/ObjectFilterPlaceHolder.scala index 0d3c39adb69..6a7f1beab18 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/ObjectFilterPlaceHolder.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/ObjectFilterPlaceHolder.scala @@ -15,11 +15,13 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.util +package org.apache.kyuubi.plugin.spark.authz.rule.rowfilter import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} +import org.apache.kyuubi.plugin.spark.authz.util.WithInternalChild + case class ObjectFilterPlaceHolder(child: LogicalPlan) extends UnaryNode with WithInternalChild { diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfilter/RowFilterMarker.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/RowFilterMarker.scala similarity index 95% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfilter/RowFilterMarker.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/RowFilterMarker.scala index 8817958b585..f4295a0942f 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfilter/RowFilterMarker.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/RowFilterMarker.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.ranger.rowfilter +package org.apache.kyuubi.plugin.spark.authz.rule.rowfilter import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfilter/RuleApplyRowFilter.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/RuleApplyRowFilter.scala similarity index 94% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfilter/RuleApplyRowFilter.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/RuleApplyRowFilter.scala index 22bcfae49d9..defee4005b6 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/rowfilter/RuleApplyRowFilter.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/RuleApplyRowFilter.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.ranger.rowfilter +package org.apache.kyuubi.plugin.spark.authz.rule.rowfilter import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan} @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan} import org.apache.kyuubi.plugin.spark.authz.ObjectType import org.apache.kyuubi.plugin.spark.authz.OperationType.QUERY import org.apache.kyuubi.plugin.spark.authz.ranger._ +import org.apache.kyuubi.plugin.spark.authz.rule.RuleHelper import org.apache.kyuubi.plugin.spark.authz.serde._ case class RuleApplyRowFilter(spark: SparkSession) extends RuleHelper { diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleReplaceShowObjectCommands.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/RuleReplaceShowObjectCommands.scala similarity index 93% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleReplaceShowObjectCommands.scala rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/RuleReplaceShowObjectCommands.scala index bf762109cba..06982d70106 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RuleReplaceShowObjectCommands.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/rule/rowfilter/RuleReplaceShowObjectCommands.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.ranger +package org.apache.kyuubi.plugin.spark.authz.rule.rowfilter import org.apache.hadoop.security.UserGroupInformation import org.apache.spark.sql.{Row, SparkSession} @@ -25,10 +25,11 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.command.{RunnableCommand, ShowColumnsCommand} import org.apache.kyuubi.plugin.spark.authz.{ObjectType, OperationType} -import org.apache.kyuubi.plugin.spark.authz.util.{AuthZUtils, ObjectFilterPlaceHolder, WithInternalChildren} +import org.apache.kyuubi.plugin.spark.authz.ranger.{AccessRequest, AccessResource, AccessType, SparkRangerAdminPlugin} +import org.apache.kyuubi.plugin.spark.authz.util.{AuthZUtils, WithInternalChildren} import org.apache.kyuubi.util.reflect.ReflectUtils._ -class RuleReplaceShowObjectCommands extends Rule[LogicalPlan] { +object RuleReplaceShowObjectCommands extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan match { case r: RunnableCommand if r.nodeName == "ShowTablesCommand" => FilteredShowTablesCommand(r) case n: LogicalPlan if n.nodeName == "ShowTables" => diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala index 32ad30e211f..c4fd721ca98 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/CommandSpec.scala @@ -43,6 +43,10 @@ trait CommandSpec extends { final def operationType: OperationType = OperationType.withName(opType) } +trait CommandSpecs[T <: CommandSpec] { + def specs: Seq[T] +} + /** * A specification describe a database command * @@ -53,7 +57,8 @@ trait CommandSpec extends { case class DatabaseCommandSpec( classname: String, databaseDescs: Seq[DatabaseDesc], - opType: String = "QUERY") extends CommandSpec {} + opType: String = OperationType.QUERY.toString, + uriDescs: Seq[UriDesc] = Nil) extends CommandSpec {} /** * A specification describe a function command @@ -79,7 +84,8 @@ case class TableCommandSpec( classname: String, tableDescs: Seq[TableDesc], opType: String = OperationType.QUERY.toString, - queryDescs: Seq[QueryDesc] = Nil) extends CommandSpec { + queryDescs: Seq[QueryDesc] = Nil, + uriDescs: Seq[UriDesc] = Nil) extends CommandSpec { def queries: LogicalPlan => Seq[LogicalPlan] = plan => { queryDescs.flatMap { qd => try { @@ -96,7 +102,8 @@ case class TableCommandSpec( case class ScanSpec( classname: String, scanDescs: Seq[ScanDesc], - functionDescs: Seq[FunctionDesc] = Seq.empty) extends CommandSpec { + functionDescs: Seq[FunctionDesc] = Seq.empty, + uriDescs: Seq[UriDesc] = Seq.empty) extends CommandSpec { override def opType: String = OperationType.QUERY.toString def tables: (LogicalPlan, SparkSession) => Seq[Table] = (plan, spark) => { scanDescs.flatMap { td => @@ -110,6 +117,18 @@ case class ScanSpec( } } + def uris: LogicalPlan => Seq[Uri] = plan => { + uriDescs.flatMap { ud => + try { + ud.extract(plan) + } catch { + case e: Exception => + LOG.debug(ud.error(plan, e)) + None + } + } + } + def functions: (Expression) => Seq[Function] = (expr) => { functionDescs.flatMap { fd => try { diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/Descriptor.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/Descriptor.scala index fc660ce143e..4c0cf2a141d 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/Descriptor.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/Descriptor.scala @@ -54,11 +54,15 @@ sealed trait Descriptor { */ def extract(v: AnyRef): AnyRef + def comment: String + final def error(v: AnyRef, e: Throwable): String = { val resourceName = getClass.getSimpleName.stripSuffix("Desc") val objectClass = v.getClass.getName s"[Spark$SPARK_VERSION] failed to get $resourceName from $objectClass by" + - s" $fieldExtractor/$fieldName, due to ${e.getMessage}" + s" $fieldExtractor/$fieldName, " + + (if (comment.nonEmpty) s"desc comment: ${comment}") + + s"due to ${e.getMessage}" } } @@ -70,7 +74,8 @@ sealed trait Descriptor { */ case class ColumnDesc( fieldName: String, - fieldExtractor: String) extends Descriptor { + fieldExtractor: String, + comment: String = "") extends Descriptor { override def extract(v: AnyRef): Seq[String] = { val columnsVal = invokeAs[AnyRef](v, fieldName) val columnExtractor = lookupExtractor[ColumnExtractor](fieldExtractor) @@ -89,7 +94,8 @@ case class DatabaseDesc( fieldName: String, fieldExtractor: String, catalogDesc: Option[CatalogDesc] = None, - isInput: Boolean = false) extends Descriptor { + isInput: Boolean = false, + comment: String = "") extends Descriptor { override def extract(v: AnyRef): Database = { val databaseVal = invokeAs[AnyRef](v, fieldName) val databaseExtractor = lookupExtractor[DatabaseExtractor](fieldExtractor) @@ -113,7 +119,8 @@ case class DatabaseDesc( case class FunctionTypeDesc( fieldName: String, fieldExtractor: String, - skipTypes: Seq[String]) extends Descriptor { + skipTypes: Seq[String], + comment: String = "") extends Descriptor { override def extract(v: AnyRef): FunctionType = { extract(v, SparkSession.active) } @@ -143,7 +150,8 @@ case class FunctionDesc( fieldExtractor: String, databaseDesc: Option[DatabaseDesc] = None, functionTypeDesc: Option[FunctionTypeDesc] = None, - isInput: Boolean = false) extends Descriptor { + isInput: Boolean = false, + comment: String = "") extends Descriptor { override def extract(v: AnyRef): Function = { val functionVal = invokeAs[AnyRef](v, fieldName) val functionExtractor = lookupExtractor[FunctionExtractor](fieldExtractor) @@ -168,7 +176,8 @@ case class FunctionDesc( */ case class QueryDesc( fieldName: String, - fieldExtractor: String = "LogicalPlanQueryExtractor") extends Descriptor { + fieldExtractor: String = "LogicalPlanQueryExtractor", + comment: String = "") extends Descriptor { override def extract(v: AnyRef): Option[LogicalPlan] = { val queryVal = invokeAs[AnyRef](v, fieldName) val queryExtractor = lookupExtractor[QueryExtractor](fieldExtractor) @@ -186,7 +195,8 @@ case class QueryDesc( case class TableTypeDesc( fieldName: String, fieldExtractor: String, - skipTypes: Seq[String]) extends Descriptor { + skipTypes: Seq[String], + comment: String = "") extends Descriptor { override def extract(v: AnyRef): TableType = { extract(v, SparkSession.active) } @@ -224,7 +234,8 @@ case class TableDesc( tableTypeDesc: Option[TableTypeDesc] = None, catalogDesc: Option[CatalogDesc] = None, isInput: Boolean = false, - setCurrentDatabaseIfMissing: Boolean = false) extends Descriptor { + setCurrentDatabaseIfMissing: Boolean = false, + comment: String = "") extends Descriptor { override def extract(v: AnyRef): Option[Table] = { extract(v, SparkSession.active) } @@ -254,7 +265,8 @@ case class TableDesc( case class ActionTypeDesc( fieldName: String = null, fieldExtractor: String = null, - actionType: Option[String] = None) extends Descriptor { + actionType: Option[String] = None, + comment: String = "") extends Descriptor { override def extract(v: AnyRef): PrivilegeObjectActionType = { actionType.map(PrivilegeObjectActionType.withName).getOrElse { val actionTypeVal = invokeAs[AnyRef](v, fieldName) @@ -272,7 +284,8 @@ case class ActionTypeDesc( */ case class CatalogDesc( fieldName: String = "catalog", - fieldExtractor: String = "CatalogPluginCatalogExtractor") extends Descriptor { + fieldExtractor: String = "CatalogPluginCatalogExtractor", + comment: String = "") extends Descriptor { override def extract(v: AnyRef): Option[String] = { val catalogVal = invokeAs[AnyRef](v, fieldName) val catalogExtractor = lookupExtractor[CatalogExtractor](fieldExtractor) @@ -283,7 +296,8 @@ case class CatalogDesc( case class ScanDesc( fieldName: String, fieldExtractor: String, - catalogDesc: Option[CatalogDesc] = None) extends Descriptor { + catalogDesc: Option[CatalogDesc] = None, + comment: String = "") extends Descriptor { override def extract(v: AnyRef): Option[Table] = { extract(v, SparkSession.active) } @@ -306,3 +320,26 @@ case class ScanDesc( } } } + +/** + * URI Descriptor + * + * @param fieldName the field name or method name of this uri field + * @param fieldExtractor the key of a [[URIExtractor]] instance + * @param isInput read or write + */ +case class UriDesc( + fieldName: String, + fieldExtractor: String, + isInput: Boolean = false, + comment: String = "") extends Descriptor { + override def extract(v: AnyRef): Seq[Uri] = { + extract(v, SparkSession.active) + } + + def extract(v: AnyRef, spark: SparkSession): Seq[Uri] = { + val uriVal = invokeAs[AnyRef](v, fieldName) + val uriExtractor = lookupExtractor[URIExtractor](fieldExtractor) + uriExtractor(spark, uriVal) + } +} diff --git a/kyuubi-server/web-ui/src/router/operation/index.ts b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/Uri.scala similarity index 69% rename from kyuubi-server/web-ui/src/router/operation/index.ts rename to extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/Uri.scala index 03ba4c28575..aa9af87327d 100644 --- a/kyuubi-server/web-ui/src/router/operation/index.ts +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/Uri.scala @@ -15,17 +15,12 @@ * limitations under the License. */ -const routes = [ - { - path: '/operation/runningJobs', - name: 'operation-runningJobs', - component: () => import('@/views/operation/runningJobs/index.vue') - }, - { - path: '/operation/completedJobs', - name: 'operation-completedJobs', - component: () => import('@/views/operation/completedJobs/index.vue') - } -] +package org.apache.kyuubi.plugin.spark.authz.serde -export default routes +/** + * :: Developer API :: + * + * Represents a URI identity + * @param path + */ +case class Uri(path: String) diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/package.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/package.scala index 6863516b698..1c5ffb6299a 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/package.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/package.scala @@ -34,6 +34,7 @@ import org.apache.kyuubi.plugin.spark.authz.serde.FunctionTypeExtractor.function import org.apache.kyuubi.plugin.spark.authz.serde.QueryExtractor.queryExtractors import org.apache.kyuubi.plugin.spark.authz.serde.TableExtractor.tableExtractors import org.apache.kyuubi.plugin.spark.authz.serde.TableTypeExtractor.tableTypeExtractors +import org.apache.kyuubi.plugin.spark.authz.serde.URIExtractor.uriExtractors import org.apache.kyuubi.util.reflect.ReflectUtils._ package object serde { @@ -129,6 +130,7 @@ package object serde { case c if classOf[FunctionExtractor].isAssignableFrom(c) => functionExtractors case c if classOf[FunctionTypeExtractor].isAssignableFrom(c) => functionTypeExtractors case c if classOf[ActionTypeExtractor].isAssignableFrom(c) => actionTypeExtractors + case c if classOf[URIExtractor].isAssignableFrom(c) => uriExtractors case _ => throw new IllegalArgumentException(s"Unknown extractor type: $ct") } extractors(extractorKey).asInstanceOf[T] diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/tableExtractors.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/tableExtractors.scala index 2c212cc5cdb..8a7bc452293 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/tableExtractors.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/tableExtractors.scala @@ -17,8 +17,7 @@ package org.apache.kyuubi.plugin.spark.authz.serde -import java.util.{Map => JMap} -import java.util.LinkedHashMap +import java.util.{LinkedHashMap, Map => JMap} import scala.collection.JavaConverters._ @@ -27,10 +26,13 @@ import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} +import org.apache.spark.sql.connector.catalog.Identifier +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.types.DataType import org.apache.spark.unsafe.types.UTF8String import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ +import org.apache.kyuubi.plugin.spark.authz.util.PathIdentifier._ import org.apache.kyuubi.util.reflect.ReflectUtils._ /** @@ -78,14 +80,28 @@ object TableExtractor { class TableIdentifierTableExtractor extends TableExtractor { override def apply(spark: SparkSession, v1: AnyRef): Option[Table] = { val identifier = v1.asInstanceOf[TableIdentifier] - val owner = - try { - val catalogTable = spark.sessionState.catalog.getTableMetadata(identifier) - Option(catalogTable.owner).filter(_.nonEmpty) - } catch { - case _: Exception => None - } - Some(Table(None, identifier.database, identifier.table, owner)) + if (isPathIdentifier(identifier.table, spark)) { + None + } else { + val owner = + try { + val catalogTable = spark.sessionState.catalog.getTableMetadata(identifier) + Option(catalogTable.owner).filter(_.nonEmpty) + } catch { + case _: Exception => None + } + Some(Table(None, identifier.database, identifier.table, owner)) + } + } +} + +/** + * org.apache.spark.sql.catalyst.TableIdentifier Option + */ +class TableIdentifierOptionTableExtractor extends TableExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Option[Table] = { + val tableIdentifier = v1.asInstanceOf[Option[TableIdentifier]] + tableIdentifier.flatMap(lookupExtractor[TableIdentifierTableExtractor].apply(spark, _)) } } @@ -133,10 +149,10 @@ class ResolvedTableTableExtractor extends TableExtractor { * org.apache.spark.sql.connector.catalog.Identifier */ class IdentifierTableExtractor extends TableExtractor { - override def apply(spark: SparkSession, v1: AnyRef): Option[Table] = { - val namespace = invokeAs[Array[String]](v1, "namespace") - val table = invokeAs[String](v1, "name") - Some(Table(None, Some(quote(namespace)), table, None)) + override def apply(spark: SparkSession, v1: AnyRef): Option[Table] = v1 match { + case identifier: Identifier if !isPathIdentifier(identifier.name(), spark) => + Some(Table(None, Some(quote(identifier.namespace())), identifier.name(), None)) + case _ => None } } @@ -174,18 +190,18 @@ class ExpressionSeqTableExtractor extends TableExtractor { class DataSourceV2RelationTableExtractor extends TableExtractor { override def apply(spark: SparkSession, v1: AnyRef): Option[Table] = { val plan = v1.asInstanceOf[LogicalPlan] - val maybeV2Relation = plan.find(_.getClass.getSimpleName == "DataSourceV2Relation") - maybeV2Relation match { - case None => None - case Some(v2Relation) => - val maybeCatalogPlugin = invokeAs[Option[AnyRef]](v2Relation, "catalog") - val maybeCatalog = maybeCatalogPlugin.flatMap(catalogPlugin => + plan.find(_.getClass.getSimpleName == "DataSourceV2Relation").get match { + case v2Relation: DataSourceV2Relation + if v2Relation.identifier.isEmpty || + !isPathIdentifier(v2Relation.identifier.get.name(), spark) => + val maybeCatalog = v2Relation.catalog.flatMap(catalogPlugin => lookupExtractor[CatalogPluginCatalogExtractor].apply(catalogPlugin)) - lookupExtractor[TableTableExtractor].apply(spark, invokeAs[AnyRef](v2Relation, "table")) + lookupExtractor[TableTableExtractor].apply(spark, v2Relation.table) .map { table => val maybeOwner = TableExtractor.getOwner(v2Relation) table.copy(catalog = maybeCatalog, owner = maybeOwner) } + case _ => None } } } @@ -207,12 +223,16 @@ class LogicalRelationTableExtractor extends TableExtractor { */ class ResolvedDbObjectNameTableExtractor extends TableExtractor { override def apply(spark: SparkSession, v1: AnyRef): Option[Table] = { - val catalogVal = invokeAs[AnyRef](v1, "catalog") - val catalog = lookupExtractor[CatalogPluginCatalogExtractor].apply(catalogVal) val nameParts = invokeAs[Seq[String]](v1, "nameParts") - val namespace = nameParts.init.toArray val table = nameParts.last - Some(Table(catalog, Some(quote(namespace)), table, None)) + if (isPathIdentifier(table, spark)) { + None + } else { + val catalogVal = invokeAs[AnyRef](v1, "catalog") + val catalog = lookupExtractor[CatalogPluginCatalogExtractor].apply(catalogVal) + val namespace = nameParts.init.toArray + Some(Table(catalog, Some(quote(namespace)), table, None)) + } } } @@ -234,6 +254,25 @@ class ResolvedIdentifierTableExtractor extends TableExtractor { } } +/** + * org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias + */ +class SubqueryAliasTableExtractor extends TableExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Option[Table] = { + v1.asInstanceOf[SubqueryAlias] match { + case SubqueryAlias(_, SubqueryAlias(identifier, _)) => + if (isPathIdentifier(identifier.name, spark)) { + None + } else { + lookupExtractor[StringTableExtractor].apply(spark, identifier.toString()) + } + case SubqueryAlias(identifier, _) if !isPathIdentifier(identifier.name, spark) => + lookupExtractor[StringTableExtractor].apply(spark, identifier.toString()) + case _ => None + } + } +} + /** * org.apache.spark.sql.connector.catalog.Table */ @@ -249,10 +288,11 @@ class HudiDataSourceV2RelationTableExtractor extends TableExtractor { invokeAs[LogicalPlan](v1, "table") match { // Match multipartIdentifier with tableAlias case SubqueryAlias(_, SubqueryAlias(identifier, _)) => - new StringTableExtractor().apply(spark, identifier.toString()) + lookupExtractor[StringTableExtractor].apply(spark, identifier.toString()) // Match multipartIdentifier without tableAlias case SubqueryAlias(identifier, _) => - new StringTableExtractor().apply(spark, identifier.toString()) + lookupExtractor[StringTableExtractor].apply(spark, identifier.toString()) + case _ => None } } } @@ -262,10 +302,11 @@ class HudiMergeIntoTargetTableExtractor extends TableExtractor { invokeAs[LogicalPlan](v1, "targetTable") match { // Match multipartIdentifier with tableAlias case SubqueryAlias(_, SubqueryAlias(identifier, relation)) => - new StringTableExtractor().apply(spark, identifier.toString()) + lookupExtractor[StringTableExtractor].apply(spark, identifier.toString()) // Match multipartIdentifier without tableAlias case SubqueryAlias(identifier, _) => - new StringTableExtractor().apply(spark, identifier.toString()) + lookupExtractor[StringTableExtractor].apply(spark, identifier.toString()) + case _ => None } } } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/uriExtractors.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/uriExtractors.scala new file mode 100644 index 00000000000..434cc769927 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/serde/uriExtractors.scala @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.serde + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} +import org.apache.spark.sql.connector.catalog.Identifier +import org.apache.spark.sql.execution.datasources.HadoopFsRelation +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation + +import org.apache.kyuubi.plugin.spark.authz.util.PathIdentifier._ +import org.apache.kyuubi.util.reflect.ReflectUtils.invokeAs + +trait URIExtractor extends ((SparkSession, AnyRef) => Seq[Uri]) with Extractor + +object URIExtractor { + val uriExtractors: Map[String, URIExtractor] = { + loadExtractorsToMap[URIExtractor] + } +} + +/** + * String + */ +class StringURIExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = { + v1 match { + case uriPath: String => Seq(Uri(uriPath)) + case Some(uriPath: String) => Seq(Uri(uriPath)) + case _ => Nil + } + } +} + +class StringSeqURIExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = { + v1.asInstanceOf[Seq[String]].map(Uri) + } +} + +class CatalogStorageFormatURIExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = { + v1.asInstanceOf[CatalogStorageFormat].locationUri.map(uri => Uri(uri.getPath)).toSeq + } +} + +class PropertiesPathUriExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = { + v1.asInstanceOf[Map[String, String]].get("path").map(Uri).toSeq + } +} + +class PropertiesLocationUriExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = { + v1.asInstanceOf[Map[String, String]].get("location").map(Uri).toSeq + } +} + +class BaseRelationFileIndexURIExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = { + v1 match { + case h: HadoopFsRelation => h.location.rootPaths.map(_.toString).map(Uri) + case _ => Nil + } + } +} + +class TableSpecURIExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = { + new StringURIExtractor().apply(spark, invokeAs[Option[String]](v1, "location")) + } +} + +class CatalogTableURIExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = { + v1.asInstanceOf[CatalogTable].storage.locationUri.map(_.toString).map(Uri).toSeq + } +} + +class PartitionLocsSeqURIExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = { + v1.asInstanceOf[Seq[(_, Option[String])]].flatMap(_._2).map(Uri) + } +} + +class IdentifierURIExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = v1 match { + case identifier: Identifier if isPathIdentifier(identifier.name(), spark) => + Seq(identifier.name()).map(Uri) + case _ => Nil + } +} + +class SubqueryAliasURIExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = v1 match { + case SubqueryAlias(_, SubqueryAlias(identifier, _)) => + if (isPathIdentifier(identifier.name, spark)) { + Seq(identifier.name).map(Uri) + } else { + Nil + } + case SubqueryAlias(identifier, _) if isPathIdentifier(identifier.name, spark) => + Seq(identifier.name).map(Uri) + case _ => Nil + } +} + +class DataSourceV2RelationURIExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = { + val plan = v1.asInstanceOf[LogicalPlan] + plan.find(_.getClass.getSimpleName == "DataSourceV2Relation").get match { + case v2Relation: DataSourceV2Relation + if v2Relation.identifier.isDefined && + isPathIdentifier(v2Relation.identifier.get.name, spark) => + Seq(v2Relation.identifier.get.name).map(Uri) + case _ => Nil + } + } +} + +class ResolvedTableURIExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = { + val identifier = invokeAs[AnyRef](v1, "identifier") + lookupExtractor[IdentifierURIExtractor].apply(spark, identifier) + } +} + +class TableIdentifierURIExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = v1 match { + case tableIdentifier: TableIdentifier if isPathIdentifier(tableIdentifier.table, spark) => + Seq(tableIdentifier.table).map(Uri) + case _ => Nil + } +} + +class TableIdentifierOptionURIExtractor extends URIExtractor { + override def apply(spark: SparkSession, v1: AnyRef): Seq[Uri] = v1 match { + case Some(tableIdentifier: TableIdentifier) => + lookupExtractor[TableIdentifierURIExtractor].apply(spark, tableIdentifier) + case _ => Nil + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/PathIdentifier.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/PathIdentifier.scala new file mode 100644 index 00000000000..2666c37c3d3 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/PathIdentifier.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.util + +import java.io.File + +import org.apache.spark.sql.SparkSession + +/** + * An object for handling table access on path-based table. This is a stop-gap solution + * until PathIdentifiers are implemented in Apache Spark. + */ +object PathIdentifier { + def isPathIdentifier(path: String, spark: SparkSession): Boolean = + spark.sessionState.conf.runSQLonFile && path != null && path.startsWith(File.separator) +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/ReservedKeys.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/ReservedKeys.scala index 60d9898452d..81259be2a0e 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/ReservedKeys.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/ReservedKeys.scala @@ -22,4 +22,5 @@ object ReservedKeys { final val KYUUBI_SESSION_USER = "kyuubi.session.user" final val KYUUBI_SESSION_SIGN_PUBLICKEY = "kyuubi.session.sign.publickey" final val KYUUBI_SESSION_USER_SIGN = "kyuubi.session.user.sign" + final var KYUUBI_EXPLAIN_COMMAND_EXECUTION_ID = "kyuubi.authz.command.explain.executionid" } diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala index bbce1dff89e..582b34abee4 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala +++ b/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/WithInternalChildren.scala @@ -17,6 +17,7 @@ package org.apache.kyuubi.plugin.spark.authz.util +import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan trait WithInternalChildren { @@ -26,3 +27,7 @@ trait WithInternalChildren { trait WithInternalChild { def withNewChildInternal(newChild: LogicalPlan): LogicalPlan } + +trait WithInternalExpressionChild { + def withNewChildInternal(newChild: Expression): Expression +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/gen/scala/org/apache/kyuubi/plugin/spark/authz/gen/PolicyJsonFileGenerator.scala b/extensions/spark/kyuubi-spark-authz/src/test/gen/scala/org/apache/kyuubi/plugin/spark/authz/gen/PolicyJsonFileGenerator.scala index 7faddd0c7fa..afc7a5fde53 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/gen/scala/org/apache/kyuubi/plugin/spark/authz/gen/PolicyJsonFileGenerator.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/gen/scala/org/apache/kyuubi/plugin/spark/authz/gen/PolicyJsonFileGenerator.scala @@ -27,9 +27,9 @@ import com.fasterxml.jackson.databind.json.JsonMapper import com.fasterxml.jackson.databind.node.ObjectNode import com.fasterxml.jackson.module.scala.DefaultScalaModule import org.apache.ranger.plugin.model.RangerPolicy +// scalastyle:off import org.scalatest.funsuite.AnyFunSuite -// scalastyle:off import org.apache.kyuubi.plugin.spark.authz.RangerTestNamespace._ import org.apache.kyuubi.plugin.spark.authz.RangerTestUsers._ import org.apache.kyuubi.plugin.spark.authz.gen.KRangerPolicyItemAccess.allowTypes @@ -37,6 +37,7 @@ import org.apache.kyuubi.plugin.spark.authz.gen.KRangerPolicyResource._ import org.apache.kyuubi.plugin.spark.authz.gen.RangerAccessType._ import org.apache.kyuubi.plugin.spark.authz.gen.RangerClassConversions._ import org.apache.kyuubi.util.AssertionUtils._ +import org.apache.kyuubi.util.GoldenFileUtils._ /** * Generates the policy file to test/main/resources dir. @@ -59,11 +60,9 @@ class PolicyJsonFileGenerator extends AnyFunSuite { .build() test("check ranger policy file") { - val pluginHome = getClass.getProtectionDomain.getCodeSource.getLocation.getPath - .split("target").head val policyFileName = "sparkSql_hive_jenkins.json" - val policyFilePath = - Paths.get(pluginHome, "src", "test", "resources", policyFileName) + val policyFilePath = Paths.get( + s"${getCurrentModuleHome(this)}/src/test/resources/$policyFileName") val generatedStr = mapper.writerWithDefaultPrettyPrinter() .writeValueAsString(servicePolicies) @@ -108,6 +107,7 @@ class PolicyJsonFileGenerator extends AnyFunSuite { policyAccessForDefaultBobUse, policyAccessForDefaultBobSelect, policyAccessForPermViewAccessOnly, + policyAccessForTable2AccessOnly, // row filter policyFilterForSrcTableKeyLessThan20, policyFilterForPermViewKeyLessThan20, @@ -345,4 +345,16 @@ class PolicyJsonFileGenerator extends AnyFunSuite { users = List(permViewOnlyUser), accesses = allowTypes(select), delegateAdmin = true))) + + private val policyAccessForTable2AccessOnly = KRangerPolicy( + name = "someone_access_table2", + resources = Map( + databaseRes(defaultDb), + tableRes("table2"), + allColumnRes), + policyItems = List( + KRangerPolicyItem( + users = List(table2OnlyUser), + accesses = allowTypes(select), + delegateAdmin = true))) } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/resources/sparkSql_hive_jenkins.json b/extensions/spark/kyuubi-spark-authz/src/test/resources/sparkSql_hive_jenkins.json index 6c160d3216a..76d8c788a22 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/resources/sparkSql_hive_jenkins.json +++ b/extensions/spark/kyuubi-spark-authz/src/test/resources/sparkSql_hive_jenkins.json @@ -544,6 +544,55 @@ "isEnabled" : true, "version" : 1, "service" : "hive_jenkins", + "name" : "someone_access_table2", + "policyType" : 0, + "policyPriority" : 0, + "description" : "", + "isAuditEnabled" : true, + "resources" : { + "database" : { + "values" : [ "default" ], + "isExcludes" : false, + "isRecursive" : false + }, + "column" : { + "values" : [ "*" ], + "isExcludes" : false, + "isRecursive" : false + }, + "table" : { + "values" : [ "table2" ], + "isExcludes" : false, + "isRecursive" : false + } + }, + "conditions" : [ ], + "policyItems" : [ { + "accesses" : [ { + "type" : "select", + "isAllowed" : true + } ], + "users" : [ "user_table2_only" ], + "groups" : [ ], + "roles" : [ ], + "conditions" : [ ], + "delegateAdmin" : true + } ], + "denyPolicyItems" : [ ], + "allowExceptions" : [ ], + "denyExceptions" : [ ], + "dataMaskPolicyItems" : [ ], + "rowFilterPolicyItems" : [ ], + "options" : { }, + "validitySchedules" : [ ], + "policyLabels" : [ ], + "isDenyAllElse" : false + }, { + "id" : 9, + "guid" : "45c48cce-2e2d-3fbd-aa1a-fc51c7c6ad26", + "isEnabled" : true, + "version" : 1, + "service" : "hive_jenkins", "name" : "src_key_less_than_20", "policyType" : 2, "policyPriority" : 0, @@ -586,8 +635,8 @@ "policyLabels" : [ ], "isDenyAllElse" : false }, { - "id" : 9, - "guid" : "45c48cce-2e2d-3fbd-aa1a-fc51c7c6ad26", + "id" : 10, + "guid" : "d3d94468-02a4-3259-b55d-38e6d163e820", "isEnabled" : true, "version" : 1, "service" : "hive_jenkins", @@ -633,8 +682,8 @@ "policyLabels" : [ ], "isDenyAllElse" : false }, { - "id" : 10, - "guid" : "d3d94468-02a4-3259-b55d-38e6d163e820", + "id" : 11, + "guid" : "6512bd43-d9ca-36e0-ac99-0b0a82652dca", "isEnabled" : true, "version" : 1, "service" : "hive_jenkins", @@ -685,8 +734,8 @@ "policyLabels" : [ ], "isDenyAllElse" : false }, { - "id" : 11, - "guid" : "6512bd43-d9ca-36e0-ac99-0b0a82652dca", + "id" : 12, + "guid" : "c20ad4d7-6fe9-3759-aa27-a0c99bff6710", "isEnabled" : true, "version" : 1, "service" : "hive_jenkins", @@ -737,8 +786,8 @@ "policyLabels" : [ ], "isDenyAllElse" : false }, { - "id" : 12, - "guid" : "c20ad4d7-6fe9-3759-aa27-a0c99bff6710", + "id" : 13, + "guid" : "c51ce410-c124-310e-8db5-e4b97fc2af39", "isEnabled" : true, "version" : 1, "service" : "hive_jenkins", @@ -789,8 +838,8 @@ "policyLabels" : [ ], "isDenyAllElse" : false }, { - "id" : 13, - "guid" : "c51ce410-c124-310e-8db5-e4b97fc2af39", + "id" : 14, + "guid" : "aab32389-22bc-325a-af60-6eb525ffdc56", "isEnabled" : true, "version" : 1, "service" : "hive_jenkins", @@ -841,8 +890,8 @@ "policyLabels" : [ ], "isDenyAllElse" : false }, { - "id" : 14, - "guid" : "aab32389-22bc-325a-af60-6eb525ffdc56", + "id" : 15, + "guid" : "9bf31c7f-f062-336a-96d3-c8bd1f8f2ff3", "isEnabled" : true, "version" : 1, "service" : "hive_jenkins", @@ -893,8 +942,8 @@ "policyLabels" : [ ], "isDenyAllElse" : false }, { - "id" : 15, - "guid" : "9bf31c7f-f062-336a-96d3-c8bd1f8f2ff3", + "id" : 16, + "guid" : "c74d97b0-1eae-357e-84aa-9d5bade97baf", "isEnabled" : true, "version" : 1, "service" : "hive_jenkins", diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilderSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilderSuite.scala index 54b91eb2837..214a0375485 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilderSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/PrivilegesBuilderSuite.scala @@ -306,17 +306,26 @@ abstract class PrivilegesBuilderSuite extends AnyFunSuite val (in, out, operationType) = PrivilegesBuilder.build(plan, spark) assert(in.isEmpty) - assert(out.size === 1) - val po = out.head - assert(po.actionType === PrivilegeObjectActionType.OTHER) - assert(po.privilegeObjectType === PrivilegeObjectType.TABLE_OR_VIEW) - assert(po.catalog.isEmpty) - assertEqualsIgnoreCase(reusedDb)(po.dbname) - assertEqualsIgnoreCase(reusedPartTableShort)(po.objectName) - assert(po.columns.head === "pid") - checkTableOwner(po) - val accessType = ranger.AccessType(po, operationType, isInput = false) - assert(accessType === AccessType.ALTER) + assert(out.size === 2) + val po0 = out.head + assert(po0.actionType === PrivilegeObjectActionType.OTHER) + assert(po0.privilegeObjectType === PrivilegeObjectType.TABLE_OR_VIEW) + assert(po0.catalog.isEmpty) + assertEqualsIgnoreCase(reusedDb)(po0.dbname) + assertEqualsIgnoreCase(reusedPartTableShort)(po0.objectName) + assert(po0.columns.head === "pid") + checkTableOwner(po0) + val accessType0 = ranger.AccessType(po0, operationType, isInput = false) + assert(accessType0 === AccessType.ALTER) + + val po1 = out.last + assert(po1.actionType === PrivilegeObjectActionType.OTHER) + assert(po1.catalog.isEmpty) + assert(po1.dbname === newLoc) + assert(po1.columns === Seq.empty) + checkTableOwner(po1) + val accessType1 = ranger.AccessType(po1, operationType, isInput = false) + assert(accessType1 === AccessType.WRITE) } test("AlterTable(Un)SetPropertiesCommand") { @@ -1292,16 +1301,25 @@ class InMemoryPrivilegeBuilderSuite extends PrivilegesBuilderSuite { "org.apache.spark.sql.execution.command.AlterDatabaseSetLocationCommand") assert(operationType === ALTERDATABASE_LOCATION) assert(in.isEmpty) - assert(out.size === 1) - val po = out.head - assert(po.actionType === PrivilegeObjectActionType.OTHER) - assert(po.privilegeObjectType === PrivilegeObjectType.DATABASE) - assert(po.catalog.isEmpty) - assertEqualsIgnoreCase(defaultDb)(po.dbname) - assertEqualsIgnoreCase(defaultDb)(po.objectName) - assert(po.columns.isEmpty) - val accessType = ranger.AccessType(po, operationType, isInput = false) - assert(accessType === AccessType.ALTER) + assert(out.size === 2) + val po0 = out.head + assert(po0.actionType === PrivilegeObjectActionType.OTHER) + assert(po0.privilegeObjectType === PrivilegeObjectType.DATABASE) + assert(po0.catalog.isEmpty) + assertEqualsIgnoreCase(defaultDb)(po0.dbname) + assertEqualsIgnoreCase(defaultDb)(po0.objectName) + assert(po0.columns.isEmpty) + val accessType0 = ranger.AccessType(po0, operationType, isInput = false) + assert(accessType0 === AccessType.ALTER) + + val po1 = out.last + assert(po1.actionType === PrivilegeObjectActionType.OTHER) + assert(po1.catalog.isEmpty) + assertEqualsIgnoreCase(defaultDb)(po0.dbname) + assertEqualsIgnoreCase(defaultDb)(po0.objectName) + assert(po1.columns.isEmpty) + val accessType1 = ranger.AccessType(po1, operationType, isInput = false) + assert(accessType1 === AccessType.WRITE) } test("CreateDataSourceTableAsSelectCommand") { @@ -1430,18 +1448,27 @@ class HiveCatalogPrivilegeBuilderSuite extends PrivilegesBuilderSuite { .queryExecution.analyzed val (in, out, operationType) = PrivilegesBuilder.build(plan, spark) assert(operationType === LOAD) - assert(in.isEmpty) - - assert(out.size === 1) - val po0 = out.head - assert(po0.actionType === PrivilegeObjectActionType.INSERT_OVERWRITE) - assert(po0.privilegeObjectType === PrivilegeObjectType.TABLE_OR_VIEW) - assertEqualsIgnoreCase(reusedDb)(po0.dbname) - assert(po0.objectName equalsIgnoreCase tableName.split("\\.").last) + assert(in.size === 1) + val po0 = in.head + assert(po0.actionType === PrivilegeObjectActionType.OTHER) + assert(po0.privilegeObjectType === PrivilegeObjectType.DFS_URI) + assert(po0.dbname === dataPath) + assert(po0.objectName === null) assert(po0.columns.isEmpty) checkTableOwner(po0) - val accessType0 = ranger.AccessType(po0, operationType, isInput = false) - assert(accessType0 === AccessType.UPDATE) + val accessType0 = ranger.AccessType(po0, operationType, isInput = true) + assert(accessType0 === AccessType.READ) + + assert(out.size === 1) + val po1 = out.head + assert(po1.actionType === PrivilegeObjectActionType.INSERT_OVERWRITE) + assert(po1.privilegeObjectType === PrivilegeObjectType.TABLE_OR_VIEW) + assertEqualsIgnoreCase(reusedDb)(po1.dbname) + assert(po1.objectName equalsIgnoreCase tableName.split("\\.").last) + assert(po1.columns.isEmpty) + checkTableOwner(po1) + val accessType1 = ranger.AccessType(po1, operationType, isInput = false) + assert(accessType1 === AccessType.UPDATE) } } @@ -1450,7 +1477,7 @@ class HiveCatalogPrivilegeBuilderSuite extends PrivilegesBuilderSuite { val directory = File(tableDirectory).createDirectory() val plan = sql( s""" - |INSERT OVERWRITE DIRECTORY '$directory.path' + |INSERT OVERWRITE DIRECTORY '${directory.path}' |USING parquet |SELECT * FROM $reusedPartTable""".stripMargin) .queryExecution.analyzed @@ -1467,7 +1494,15 @@ class HiveCatalogPrivilegeBuilderSuite extends PrivilegesBuilderSuite { val accessType0 = ranger.AccessType(po0, operationType, isInput = true) assert(accessType0 === AccessType.SELECT) - assert(out.isEmpty) + assert(out.size == 1) + val po1 = out.head + assert(po1.actionType === PrivilegeObjectActionType.OTHER) + assert(po1.privilegeObjectType === PrivilegeObjectType.DFS_URI) + assert(po1.dbname === directory.path) + assert(po1.objectName === null) + assert(po1.columns === Seq.empty) + val accessType1 = ranger.AccessType(po1, operationType, isInput = false) + assert(accessType1 == AccessType.WRITE) } test("InsertIntoDataSourceCommand") { @@ -1523,7 +1558,6 @@ class HiveCatalogPrivilegeBuilderSuite extends PrivilegesBuilderSuite { checkTableOwner(po) val accessType = ranger.AccessType(po, operationType, isInput = false) assert(accessType === AccessType.UPDATE) - } } } @@ -1574,7 +1608,7 @@ class HiveCatalogPrivilegeBuilderSuite extends PrivilegesBuilderSuite { val directory = File(tableDirectory).createDirectory() val plan = sql( s""" - |INSERT OVERWRITE DIRECTORY '$directory.path' + |INSERT OVERWRITE DIRECTORY '${directory.path}' |USING parquet |SELECT * FROM $reusedPartTable""".stripMargin) .queryExecution.analyzed @@ -1591,7 +1625,15 @@ class HiveCatalogPrivilegeBuilderSuite extends PrivilegesBuilderSuite { val accessType0 = ranger.AccessType(po0, operationType, isInput = true) assert(accessType0 === AccessType.SELECT) - assert(out.isEmpty) + assert(out.size == 1) + val po1 = out.head + assert(po1.actionType === PrivilegeObjectActionType.OTHER) + assert(po1.privilegeObjectType === PrivilegeObjectType.DFS_URI) + assert(po1.dbname === directory.path) + assert(po1.objectName === null) + assert(po1.columns === Seq.empty) + val accessType1 = ranger.AccessType(po1, operationType, isInput = false) + assert(accessType1 == AccessType.WRITE) } test("InsertIntoHiveDirCommand") { @@ -1599,7 +1641,7 @@ class HiveCatalogPrivilegeBuilderSuite extends PrivilegesBuilderSuite { val directory = File(tableDirectory).createDirectory() val plan = sql( s""" - |INSERT OVERWRITE DIRECTORY '$directory.path' + |INSERT OVERWRITE DIRECTORY '${directory.path}' |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' |SELECT * FROM $reusedPartTable""".stripMargin) .queryExecution.analyzed @@ -1616,7 +1658,15 @@ class HiveCatalogPrivilegeBuilderSuite extends PrivilegesBuilderSuite { val accessType0 = ranger.AccessType(po0, operationType, isInput = true) assert(accessType0 === AccessType.SELECT) - assert(out.isEmpty) + assert(out.size == 1) + val po1 = out.head + assert(po1.actionType === PrivilegeObjectActionType.OTHER) + assert(po1.privilegeObjectType === PrivilegeObjectType.DFS_URI) + assert(po1.dbname === directory.path) + assert(po1.objectName === null) + assert(po1.columns === Seq.empty) + val accessType1 = ranger.AccessType(po1, operationType, isInput = false) + assert(accessType1 == AccessType.WRITE) } test("InsertIntoHiveTableCommand") { diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/RangerTestResources.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/RangerTestResources.scala index 0b1df64da78..4f870d504f5 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/RangerTestResources.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/RangerTestResources.scala @@ -28,6 +28,7 @@ object RangerTestUsers { val createOnlyUser = "create_only_user" val defaultTableOwner = "default_table_owner" val permViewOnlyUser = "user_perm_view_only" + val table2OnlyUser = "user_table2_only" // non-authorized users val invisibleUser = "i_am_invisible" @@ -41,6 +42,7 @@ object RangerTestNamespace { val sparkCatalog = "spark_catalog" val icebergNamespace = "iceberg_ns" val hudiNamespace = "hudi_ns" + val deltaNamespace = "delta_ns" val namespace1 = "ns1" val namespace2 = "ns2" } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/SparkSessionProvider.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/SparkSessionProvider.scala index c7e541ef525..7aa4d99e45c 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/SparkSessionProvider.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/SparkSessionProvider.scala @@ -28,6 +28,7 @@ import org.scalatest.Assertions._ import org.apache.kyuubi.Utils import org.apache.kyuubi.plugin.spark.authz.RangerTestUsers._ import org.apache.kyuubi.plugin.spark.authz.V2JdbcTableCatalogPrivilegesBuilderSuite._ +import org.apache.kyuubi.plugin.spark.authz.ranger.DeltaCatalogRangerSparkExtensionSuite._ import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ trait SparkSessionProvider { @@ -106,7 +107,7 @@ trait SparkSessionProvider { } private def isCatalogSupportPurge(catalogName: String): Boolean = { - val unsupportedCatalogs = Set(v2JdbcTableCatalogClassName) + val unsupportedCatalogs = Set(v2JdbcTableCatalogClassName, deltaCatalogClassName) spark.conf.getOption(s"spark.sql.catalog.$catalogName") match { case Some(catalog) if !unsupportedCatalogs.contains(catalog) => true case _ => false diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2CommandsPrivilegesSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2CommandsPrivilegesSuite.scala index 149c9ba8f6b..62b7939b3cb 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2CommandsPrivilegesSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/V2CommandsPrivilegesSuite.scala @@ -738,16 +738,25 @@ abstract class V2CommandsPrivilegesSuite extends PrivilegesBuilderSuite { "org.apache.spark.sql.catalyst.plans.logical.SetNamespaceLocation") assert(operationType === ALTERDATABASE_LOCATION) assert(in.isEmpty) - assert(out.size === 1) - val po = out.head - assert(po.actionType === PrivilegeObjectActionType.OTHER) - assert(po.privilegeObjectType === PrivilegeObjectType.DATABASE) - assert(po.catalog.get === sparkSessionCatalogName) - assertEqualsIgnoreCase(defaultDb)(po.dbname) - assertEqualsIgnoreCase(defaultDb)(po.objectName) - assert(po.columns.isEmpty) - val accessType = ranger.AccessType(po, operationType, isInput = false) - assert(accessType === AccessType.ALTER) + assert(out.size === 2) + val po0 = out.head + assert(po0.actionType === PrivilegeObjectActionType.OTHER) + assert(po0.privilegeObjectType === PrivilegeObjectType.DATABASE) + assert(po0.catalog.get === sparkSessionCatalogName) + assertEqualsIgnoreCase(defaultDb)(po0.dbname) + assertEqualsIgnoreCase(defaultDb)(po0.objectName) + assert(po0.columns.isEmpty) + val accessType0 = ranger.AccessType(po0, operationType, isInput = false) + assert(accessType0 === AccessType.ALTER) + + val po1 = out.last + assert(po1.actionType === PrivilegeObjectActionType.OTHER) + assert(po1.catalog.isEmpty) + assertEqualsIgnoreCase(defaultDb)(po0.dbname) + assertEqualsIgnoreCase(defaultDb)(po0.objectName) + assert(po1.columns.isEmpty) + val accessType1 = ranger.AccessType(po1, operationType, isInput = false) + assert(accessType1 === AccessType.WRITE) } test("DescribeNamespace") { diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/CheckAuthzExtractorSPISuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/CheckAuthzExtractorSPISuite.scala new file mode 100644 index 00000000000..7a66e99eafa --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/CheckAuthzExtractorSPISuite.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.gen + +import java.nio.file.Paths + +// scalastyle:off +import org.scalatest.funsuite.AnyFunSuite + +import org.apache.kyuubi.util.AssertionUtils._ +import org.apache.kyuubi.util.GoldenFileUtils._ + +class CheckAuthzExtractorSPISuite extends AnyFunSuite { + // scalastyle:on + + test("check authz extractor SPI service file sorted") { + Seq( + "org.apache.kyuubi.plugin.spark.authz.serde.ActionTypeExtractor", + "org.apache.kyuubi.plugin.spark.authz.serde.CatalogExtractor", + "org.apache.kyuubi.plugin.spark.authz.serde.ColumnExtractor", + "org.apache.kyuubi.plugin.spark.authz.serde.DatabaseExtractor", + "org.apache.kyuubi.plugin.spark.authz.serde.FunctionExtractor", + "org.apache.kyuubi.plugin.spark.authz.serde.FunctionTypeExtractor", + "org.apache.kyuubi.plugin.spark.authz.serde.QueryExtractor", + "org.apache.kyuubi.plugin.spark.authz.serde.TableExtractor", + "org.apache.kyuubi.plugin.spark.authz.serde.TableTypeExtractor", + "org.apache.kyuubi.plugin.spark.authz.serde.URIExtractor") + .foreach { fileName => + val filePath = Paths.get( + s"${getCurrentModuleHome(this)}/src/main/resources/META-INF/services/$fileName") + assertFileContentSorted(filePath) + } + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/DatabaseCommands.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/DatabaseCommands.scala index a61c142edb5..ebaddf6228a 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/DatabaseCommands.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/DatabaseCommands.scala @@ -20,7 +20,23 @@ package org.apache.kyuubi.plugin.spark.authz.gen import org.apache.kyuubi.plugin.spark.authz.OperationType._ import org.apache.kyuubi.plugin.spark.authz.serde._ -object DatabaseCommands { +object DatabaseCommands extends CommandSpecs[DatabaseCommandSpec] { + + val CreateDatabaseCommand = { + DatabaseCommandSpec( + "org.apache.spark.sql.execution.command.CreateDatabaseCommand", + Seq(DatabaseDesc("databaseName", classOf[StringDatabaseExtractor])), + CREATEDATABASE, + Seq(UriDesc("path", classOf[StringURIExtractor]))) + } + + val AlterDatabaseSetLocationCommand = { + DatabaseCommandSpec( + "org.apache.spark.sql.execution.command.AlterDatabaseSetLocationCommand", + Seq(DatabaseDesc("databaseName", classOf[StringDatabaseExtractor])), + ALTERDATABASE_LOCATION, + Seq(UriDesc("location", classOf[StringURIExtractor]))) + } val AlterDatabaseProperties = { DatabaseCommandSpec( @@ -47,7 +63,8 @@ object DatabaseCommands { DatabaseCommandSpec( "org.apache.spark.sql.catalyst.plans.logical.SetNamespaceLocation", Seq(DatabaseDesc("namespace", classOf[ResolvedNamespaceDatabaseExtractor])), - ALTERDATABASE_LOCATION) + ALTERDATABASE_LOCATION, + Seq(UriDesc("location", classOf[StringURIExtractor]))) } val CreateNamespace = { @@ -62,7 +79,8 @@ object DatabaseCommands { DatabaseCommandSpec( "org.apache.spark.sql.catalyst.plans.logical.CreateNamespace", Seq(databaseDesc1, databaseDesc2, databaseDesc3), - CREATEDATABASE) + CREATEDATABASE, + Seq(UriDesc("properties", classOf[PropertiesLocationUriExtractor]))) } val DropNamespace = { @@ -141,18 +159,14 @@ object DatabaseCommands { DatabaseCommandSpec(cmd, Seq(databaseDesc), DESCDATABASE) } - val data: Array[DatabaseCommandSpec] = Array( + override def specs: Seq[DatabaseCommandSpec] = Seq( AlterDatabaseProperties, - AlterDatabaseProperties.copy( - classname = "org.apache.spark.sql.execution.command.AlterDatabaseSetLocationCommand", - opType = ALTERDATABASE_LOCATION), - AlterDatabaseProperties.copy( - classname = "org.apache.spark.sql.execution.command.CreateDatabaseCommand", - opType = CREATEDATABASE), + AlterDatabaseSetLocationCommand, AlterDatabaseProperties.copy( classname = "org.apache.spark.sql.execution.command.DropDatabaseCommand", opType = DROPDATABASE), AnalyzeTables, + CreateDatabaseCommand, CreateNamespace, CommentOnNamespace, DescribeDatabase, diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/DeltaCommands.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/DeltaCommands.scala new file mode 100644 index 00000000000..12f434a50c1 --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/DeltaCommands.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.plugin.spark.authz.gen + +import org.apache.kyuubi.plugin.spark.authz.OperationType._ +import org.apache.kyuubi.plugin.spark.authz.PrivilegeObjectActionType._ +import org.apache.kyuubi.plugin.spark.authz.serde._ + +object DeltaCommands extends CommandSpecs[TableCommandSpec] { + + val DeleteCommand = { + val cmd = "org.apache.spark.sql.delta.commands.DeleteCommand" + val actionTypeDesc = ActionTypeDesc(actionType = Some(UPDATE), comment = "Delta") + val tableDesc = TableDesc( + "target", + classOf[SubqueryAliasTableExtractor], + actionTypeDesc = Some(actionTypeDesc), + comment = "Delta") + val uriDescs = Seq(UriDesc("target", classOf[SubqueryAliasURIExtractor], comment = "Delta")) + TableCommandSpec(cmd, Seq(tableDesc), uriDescs = uriDescs) + } + + val UpdateCommand = { + val cmd = "org.apache.spark.sql.delta.commands.UpdateCommand" + DeleteCommand.copy(classname = cmd) + } + + val MergeIntoCommand = { + val cmd = "org.apache.spark.sql.delta.commands.MergeIntoCommand" + val queryDesc = QueryDesc("source", comment = "Delta") + DeleteCommand.copy(classname = cmd, queryDescs = Seq(queryDesc)) + } + + val OptimizeTableCommand = { + val cmd = "org.apache.spark.sql.delta.commands.OptimizeTableCommand" + val childDesc = TableDesc("child", classOf[ResolvedTableTableExtractor], comment = "Delta") + val tableDesc = + TableDesc("tableId", classOf[TableIdentifierOptionTableExtractor], comment = "Delta") + val uriDescs = Seq( + UriDesc("child", classOf[ResolvedTableURIExtractor], comment = "Delta"), + UriDesc("tableId", classOf[TableIdentifierOptionURIExtractor], comment = "Delta"), + UriDesc("path", classOf[StringURIExtractor], comment = "Delta")) + TableCommandSpec(cmd, Seq(childDesc, tableDesc), ALTERTABLE_COMPACT, uriDescs = uriDescs) + } + + val VacuumTableCommand = { + val cmd = "io.delta.tables.execution.VacuumTableCommand" + val childDesc = TableDesc("child", classOf[ResolvedTableTableExtractor], comment = "Delta") + val tableDesc = + TableDesc("table", classOf[TableIdentifierOptionTableExtractor], comment = "Delta") + val uriDescs = Seq( + UriDesc("child", classOf[ResolvedTableURIExtractor], comment = "Delta"), + UriDesc("table", classOf[TableIdentifierOptionURIExtractor], comment = "Delta"), + UriDesc("path", classOf[StringURIExtractor], comment = "Delta")) + TableCommandSpec(cmd, Seq(childDesc, tableDesc), MSCK, uriDescs = uriDescs) + } + + override def specs: Seq[TableCommandSpec] = Seq( + DeleteCommand, + MergeIntoCommand, + OptimizeTableCommand, + UpdateCommand, + VacuumTableCommand) +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/FunctionCommands.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/FunctionCommands.scala index 1822e80fc8a..d5c849dd6bf 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/FunctionCommands.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/FunctionCommands.scala @@ -21,7 +21,7 @@ import org.apache.kyuubi.plugin.spark.authz.OperationType._ import org.apache.kyuubi.plugin.spark.authz.serde._ import org.apache.kyuubi.plugin.spark.authz.serde.FunctionType.{SYSTEM, TEMP} -object FunctionCommands { +object FunctionCommands extends CommandSpecs[FunctionCommandSpec] { val CreateFunction = { val cmd = "org.apache.spark.sql.execution.command.CreateFunctionCommand" @@ -83,9 +83,9 @@ object FunctionCommands { FunctionCommandSpec(cmd, Seq(functionDesc), RELOADFUNCTION) } - val data: Array[FunctionCommandSpec] = Array( + override def specs: Seq[FunctionCommandSpec] = Seq( CreateFunction, DropFunction, DescribeFunction, - RefreshFunction).sortBy(_.classname) + RefreshFunction) } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala index 9b843b1f600..87fc509b5d0 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/HudiCommands.scala @@ -22,27 +22,41 @@ import org.apache.kyuubi.plugin.spark.authz.PrivilegeObjectActionType._ import org.apache.kyuubi.plugin.spark.authz.serde._ import org.apache.kyuubi.plugin.spark.authz.serde.TableType._ -object HudiCommands { +object HudiCommands extends CommandSpecs[TableCommandSpec] { val AlterHoodieTableAddColumnsCommand = { val cmd = "org.apache.spark.sql.hudi.command.AlterHoodieTableAddColumnsCommand" - val columnDesc = ColumnDesc("colsToAdd", classOf[StructFieldSeqColumnExtractor]) - val tableDesc = TableDesc("tableId", classOf[TableIdentifierTableExtractor], Some(columnDesc)) + val columnDesc = + ColumnDesc("colsToAdd", classOf[StructFieldSeqColumnExtractor], comment = "Hudi") + val tableDesc = TableDesc( + "tableId", + classOf[TableIdentifierTableExtractor], + Some(columnDesc), + comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_ADDCOLS) } val AlterHoodieTableChangeColumnCommand = { val cmd = "org.apache.spark.sql.hudi.command.AlterHoodieTableChangeColumnCommand" - val columnDesc = ColumnDesc("columnName", classOf[StringColumnExtractor]) + val columnDesc = ColumnDesc("columnName", classOf[StringColumnExtractor], comment = "Hudi") val tableDesc = - TableDesc("tableIdentifier", classOf[TableIdentifierTableExtractor], Some(columnDesc)) + TableDesc( + "tableIdentifier", + classOf[TableIdentifierTableExtractor], + Some(columnDesc), + comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_REPLACECOLS) } val AlterHoodieTableDropPartitionCommand = { val cmd = "org.apache.spark.sql.hudi.command.AlterHoodieTableDropPartitionCommand" - val columnDesc = ColumnDesc("partitionSpecs", classOf[PartitionSeqColumnExtractor]) + val columnDesc = + ColumnDesc("partitionSpecs", classOf[PartitionSeqColumnExtractor], comment = "Hudi") val tableDesc = - TableDesc("tableIdentifier", classOf[TableIdentifierTableExtractor], Some(columnDesc)) + TableDesc( + "tableIdentifier", + classOf[TableIdentifierTableExtractor], + Some(columnDesc), + comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_DROPPARTS) } @@ -52,30 +66,32 @@ object HudiCommands { TableTypeDesc( "oldName", classOf[TableIdentifierTableTypeExtractor], - Seq(TEMP_VIEW)) + Seq(TEMP_VIEW), + comment = "Hudi") val oldTableD = TableDesc( "oldName", classOf[TableIdentifierTableExtractor], - tableTypeDesc = Some(oldTableTableTypeDesc)) + tableTypeDesc = Some(oldTableTableTypeDesc), + comment = "Hudi") TableCommandSpec(cmd, Seq(oldTableD), ALTERTABLE_RENAME) } val AlterTableCommand = { val cmd = "org.apache.spark.sql.hudi.command.AlterTableCommand" - val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], None) + val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], None, comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_PROPERTIES) } val Spark31AlterTableCommand = { val cmd = "org.apache.spark.sql.hudi.command.Spark31AlterTableCommand" - val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], None) + val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], None, comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_PROPERTIES) } val CreateHoodieTableCommand = { val cmd = "org.apache.spark.sql.hudi.command.CreateHoodieTableCommand" - val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor]) + val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), CREATETABLE) } @@ -92,12 +108,14 @@ object HudiCommands { val tableDesc1 = TableDesc( "targetTable", classOf[TableIdentifierTableExtractor], - setCurrentDatabaseIfMissing = true) + setCurrentDatabaseIfMissing = true, + comment = "Hudi") val tableDesc2 = TableDesc( "sourceTable", classOf[TableIdentifierTableExtractor], isInput = true, - setCurrentDatabaseIfMissing = true) + setCurrentDatabaseIfMissing = true, + comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc1, tableDesc2), CREATETABLE) } @@ -107,7 +125,8 @@ object HudiCommands { TableTypeDesc( "tableIdentifier", classOf[TableIdentifierTableTypeExtractor], - Seq(TEMP_VIEW)) + Seq(TEMP_VIEW), + comment = "Hudi") TableCommandSpec( cmd, Seq(TableDesc( @@ -124,48 +143,68 @@ object HudiCommands { val TruncateHoodieTableCommand = { val cmd = "org.apache.spark.sql.hudi.command.TruncateHoodieTableCommand" - val columnDesc = ColumnDesc("partitionSpec", classOf[PartitionOptionColumnExtractor]) + val columnDesc = + ColumnDesc("partitionSpec", classOf[PartitionOptionColumnExtractor], comment = "Hudi") val tableDesc = TableDesc( "tableIdentifier", classOf[TableIdentifierTableExtractor], - columnDesc = Some(columnDesc)) + columnDesc = Some(columnDesc), + comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), TRUNCATETABLE) } val CompactionHoodieTableCommand = { val cmd = "org.apache.spark.sql.hudi.command.CompactionHoodieTableCommand" - val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor]) - TableCommandSpec(cmd, Seq(tableDesc, tableDesc.copy(isInput = true)), CREATETABLE) + val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], comment = "Hudi") + TableCommandSpec(cmd, Seq(tableDesc), CREATETABLE) } val CompactionShowHoodieTableCommand = { val cmd = "org.apache.spark.sql.hudi.command.CompactionShowHoodieTableCommand" - val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], isInput = true) + val tableDesc = + TableDesc("table", classOf[CatalogTableTableExtractor], isInput = true, comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), SHOW_TBLPROPERTIES) } + val CompactionHoodiePathCommand = { + val cmd = "org.apache.spark.sql.hudi.command.CompactionHoodiePathCommand" + val uriDesc = UriDesc("path", classOf[StringURIExtractor], comment = "Hudi") + TableCommandSpec( + cmd, + Seq.empty, + CREATETABLE, + uriDescs = Seq(uriDesc)) + } + + val CompactionShowHoodiePathCommand = { + val cmd = "org.apache.spark.sql.hudi.command.CompactionShowHoodiePathCommand" + val uriDesc = UriDesc("path", classOf[StringURIExtractor], isInput = true, comment = "Hudi") + TableCommandSpec(cmd, Seq.empty, SHOW_TBLPROPERTIES, uriDescs = Seq(uriDesc)) + } + val CreateIndexCommand = { val cmd = "org.apache.spark.sql.hudi.command.CreateIndexCommand" - val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor]) + val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), CREATEINDEX) } val DropIndexCommand = { val cmd = "org.apache.spark.sql.hudi.command.DropIndexCommand" - val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor]) + val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), DROPINDEX) } val ShowIndexCommand = { val cmd = "org.apache.spark.sql.hudi.command.ShowIndexesCommand" - val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], isInput = true) + val tableDesc = + TableDesc("table", classOf[CatalogTableTableExtractor], isInput = true, comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), SHOWINDEXES) } val RefreshIndexCommand = { val cmd = "org.apache.spark.sql.hudi.command.RefreshIndexCommand" - val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor]) + val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), ALTERINDEX_REBUILD) } @@ -175,52 +214,62 @@ object HudiCommands { "logicalRelation", classOf[LogicalRelationTableExtractor], actionTypeDesc = - Some(ActionTypeDesc("overwrite", classOf[OverwriteOrInsertActionTypeExtractor]))) + Some(ActionTypeDesc( + "overwrite", + classOf[OverwriteOrInsertActionTypeExtractor], + comment = "Hudi")), + comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), queryDescs = Seq(QueryDesc("query"))) } val ShowHoodieTablePartitionsCommand = { val cmd = "org.apache.spark.sql.hudi.command.ShowHoodieTablePartitionsCommand" - val columnDesc = ColumnDesc("specOpt", classOf[PartitionOptionColumnExtractor]) + val columnDesc = + ColumnDesc("specOpt", classOf[PartitionOptionColumnExtractor], comment = "Hudi") val tableDesc = TableDesc( "tableIdentifier", classOf[TableIdentifierTableExtractor], isInput = true, - columnDesc = Some(columnDesc)) + columnDesc = Some(columnDesc), + comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), SHOWPARTITIONS) } val DeleteHoodieTableCommand = { val cmd = "org.apache.spark.sql.hudi.command.DeleteHoodieTableCommand" - val actionTypeDesc = ActionTypeDesc(actionType = Some(UPDATE)) + val actionTypeDesc = ActionTypeDesc(actionType = Some(UPDATE), comment = "Hudi") val tableDesc = TableDesc( "dft", classOf[HudiDataSourceV2RelationTableExtractor], - actionTypeDesc = Some(actionTypeDesc)) + actionTypeDesc = Some(actionTypeDesc), + comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc)) } val UpdateHoodieTableCommand = { val cmd = "org.apache.spark.sql.hudi.command.UpdateHoodieTableCommand" - val actionTypeDesc = ActionTypeDesc(actionType = Some(UPDATE)) + val actionTypeDesc = ActionTypeDesc(actionType = Some(UPDATE), comment = "Hudi") val tableDesc = TableDesc( "ut", classOf[HudiDataSourceV2RelationTableExtractor], - actionTypeDesc = Some(actionTypeDesc)) + actionTypeDesc = Some(actionTypeDesc), + comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc)) } val MergeIntoHoodieTableCommand = { val cmd = "org.apache.spark.sql.hudi.command.MergeIntoHoodieTableCommand" - val actionTypeDesc = ActionTypeDesc(actionType = Some(UPDATE)) + val actionTypeDesc = ActionTypeDesc(actionType = Some(UPDATE), comment = "Hudi") val tableDesc = TableDesc( "mergeInto", classOf[HudiMergeIntoTargetTableExtractor], - actionTypeDesc = Some(actionTypeDesc)) - val queryDescs = QueryDesc("mergeInto", classOf[HudiMergeIntoSourceTableExtractor]) + actionTypeDesc = Some(actionTypeDesc), + comment = "Hudi") + val queryDescs = + QueryDesc("mergeInto", classOf[HudiMergeIntoSourceTableExtractor], comment = "Hudi") TableCommandSpec(cmd, Seq(tableDesc), queryDescs = Seq(queryDescs)) } @@ -242,7 +291,7 @@ object HudiCommands { setCurrentDatabaseIfMissing = true))) } - val data: Array[TableCommandSpec] = Array( + override def specs: Seq[TableCommandSpec] = Seq( AlterHoodieTableAddColumnsCommand, AlterHoodieTableChangeColumnCommand, AlterHoodieTableDropPartitionCommand, @@ -253,7 +302,9 @@ object HudiCommands { CreateHoodieTableCommand, CreateHoodieTableLikeCommand, CreateIndexCommand, + CompactionHoodiePathCommand, CompactionHoodieTableCommand, + CompactionShowHoodiePathCommand, CompactionShowHoodieTableCommand, DeleteHoodieTableCommand, DropHoodieTableCommand, diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/IcebergCommands.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/IcebergCommands.scala index fb195b4554c..33e94d718c2 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/IcebergCommands.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/IcebergCommands.scala @@ -21,7 +21,7 @@ import org.apache.kyuubi.plugin.spark.authz.OperationType import org.apache.kyuubi.plugin.spark.authz.PrivilegeObjectActionType._ import org.apache.kyuubi.plugin.spark.authz.serde._ -object IcebergCommands { +object IcebergCommands extends CommandSpecs[TableCommandSpec] { val DeleteFromIcebergTable = { val cmd = "org.apache.spark.sql.catalyst.plans.logical.DeleteFromIcebergTable" @@ -30,7 +30,8 @@ object IcebergCommands { TableDesc( "table", classOf[DataSourceV2RelationTableExtractor], - actionTypeDesc = Some(actionTypeDesc)) + actionTypeDesc = Some(actionTypeDesc), + comment = "Iceberg") TableCommandSpec(cmd, Seq(tableDesc)) } @@ -45,18 +46,19 @@ object IcebergCommands { val tableDesc = TableDesc( "targetTable", classOf[DataSourceV2RelationTableExtractor], - actionTypeDesc = Some(actionTypeDesc)) + actionTypeDesc = Some(actionTypeDesc), + comment = "Iceberg") val queryDesc = QueryDesc("sourceTable") TableCommandSpec(cmd, Seq(tableDesc), queryDescs = Seq(queryDesc)) } val CallProcedure = { val cmd = "org.apache.spark.sql.catalyst.plans.logical.Call" - val td = TableDesc("args", classOf[ExpressionSeqTableExtractor]) + val td = TableDesc("args", classOf[ExpressionSeqTableExtractor], comment = "Iceberg") TableCommandSpec(cmd, Seq(td), opType = OperationType.ALTERTABLE_PROPERTIES) } - val data: Array[TableCommandSpec] = Array( + override def specs: Seq[TableCommandSpec] = Seq( CallProcedure, DeleteFromIcebergTable, UpdateIcebergTable, diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/JsonSpecFileGenerator.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/JsonSpecFileGenerator.scala index 1b2d330d1cb..58d161ce051 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/JsonSpecFileGenerator.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/JsonSpecFileGenerator.scala @@ -24,7 +24,9 @@ import java.nio.file.{Files, Paths, StandardOpenOption} import org.scalatest.funsuite.AnyFunSuite import org.apache.kyuubi.plugin.spark.authz.serde.{mapper, CommandSpec} +import org.apache.kyuubi.plugin.spark.authz.serde.CommandSpecs import org.apache.kyuubi.util.AssertionUtils._ +import org.apache.kyuubi.util.GoldenFileUtils._ /** * Generates the default command specs to src/main/resources dir. @@ -42,26 +44,29 @@ import org.apache.kyuubi.util.AssertionUtils._ class JsonSpecFileGenerator extends AnyFunSuite { // scalastyle:on test("check spec json files") { - writeCommandSpecJson("database", DatabaseCommands.data) - writeCommandSpecJson("table", TableCommands.data ++ IcebergCommands.data ++ HudiCommands.data) - writeCommandSpecJson("function", FunctionCommands.data) - writeCommandSpecJson("scan", Scans.data) + writeCommandSpecJson("database", Seq(DatabaseCommands)) + writeCommandSpecJson("table", Seq(TableCommands, IcebergCommands, HudiCommands, DeltaCommands)) + writeCommandSpecJson("function", Seq(FunctionCommands)) + writeCommandSpecJson("scan", Seq(Scans)) } def writeCommandSpecJson[T <: CommandSpec]( commandType: String, - specArr: Array[T]): Unit = { - val pluginHome = getClass.getProtectionDomain.getCodeSource.getLocation.getPath - .split("target").head + specsArr: Seq[CommandSpecs[T]]): Unit = { val filename = s"${commandType}_command_spec.json" - val filePath = Paths.get(pluginHome, "src", "main", "resources", filename) + val filePath = Paths.get( + s"${getCurrentModuleHome(this)}/src/main/resources/$filename") - val generatedStr = mapper.writerWithDefaultPrettyPrinter() - .writeValueAsString(specArr.sortBy(_.classname)) + val allSpecs = specsArr.flatMap(_.specs.sortBy(_.classname)) + val duplicatedClassnames = allSpecs.groupBy(_.classname).values + .filter(_.size > 1).flatMap(specs => specs.map(_.classname)).toSet + withClue(s"Unexpected duplicated classnames: $duplicatedClassnames")( + assertResult(0)(duplicatedClassnames.size)) + val generatedStr = mapper.writerWithDefaultPrettyPrinter().writeValueAsString(allSpecs) if (sys.env.get("KYUUBI_UPDATE").contains("1")) { // scalastyle:off println - println(s"writing ${specArr.length} specs to $filename") + println(s"writing ${allSpecs.length} specs to $filename") // scalastyle:on println Files.write( filePath, diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/Scans.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/Scans.scala index b2c1868a26d..ed17dc5dc43 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/Scans.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/Scans.scala @@ -20,7 +20,7 @@ package org.apache.kyuubi.plugin.spark.authz.gen import org.apache.kyuubi.plugin.spark.authz.serde._ import org.apache.kyuubi.plugin.spark.authz.serde.FunctionType._ -object Scans { +object Scans extends CommandSpecs[ScanSpec] { val HiveTableRelation = { val r = "org.apache.spark.sql.catalyst.catalog.HiveTableRelation" @@ -37,7 +37,8 @@ object Scans { ScanDesc( "catalogTable", classOf[CatalogTableOptionTableExtractor]) - ScanSpec(r, Seq(tableDesc)) + val uriDesc = UriDesc("relation", classOf[BaseRelationFileIndexURIExtractor]) + ScanSpec(r, Seq(tableDesc), uriDescs = Seq(uriDesc)) } val DataSourceV2Relation = { @@ -50,7 +51,7 @@ object Scans { } val PermanentViewMarker = { - val r = "org.apache.kyuubi.plugin.spark.authz.util.PermanentViewMarker" + val r = "org.apache.kyuubi.plugin.spark.authz.rule.permanentview.PermanentViewMarker" val tableDesc = ScanDesc( "catalogTable", @@ -79,7 +80,7 @@ object Scans { val HiveGenericUDTF = HiveSimpleUDF.copy(classname = "org.apache.spark.sql.hive.HiveGenericUDTF") - val data: Array[ScanSpec] = Array( + override def specs: Seq[ScanSpec] = Seq( HiveTableRelation, LogicalRelation, DataSourceV2Relation, diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/TableCommands.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/TableCommands.scala index 9893953afb7..aced937b9a6 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/TableCommands.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/gen/TableCommands.scala @@ -22,7 +22,7 @@ import org.apache.kyuubi.plugin.spark.authz.PrivilegeObjectActionType._ import org.apache.kyuubi.plugin.spark.authz.serde._ import org.apache.kyuubi.plugin.spark.authz.serde.TableType._ -object TableCommands { +object TableCommands extends CommandSpecs[TableCommandSpec] { // table extractors val tite = classOf[TableIdentifierTableExtractor] val tableNameDesc = TableDesc("tableName", tite) @@ -39,7 +39,8 @@ object TableCommands { val AlterTable = { val cmd = "org.apache.spark.sql.catalyst.plans.logical.AlterTable" val tableDesc = TableDesc("ident", classOf[IdentifierTableExtractor]) - TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_PROPERTIES) + val uriDescs = Seq(UriDesc("ident", classOf[IdentifierURIExtractor])) + TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_PROPERTIES, uriDescs = uriDescs) } val AlterTableAddColumns = { @@ -51,7 +52,8 @@ object TableCommands { val AddColumns = { val cmd = "org.apache.spark.sql.catalyst.plans.logical.AddColumns" - TableCommandSpec(cmd, Seq(resolvedTableDesc), ALTERTABLE_ADDCOLS) + val uriDescs = Seq(UriDesc("child", classOf[ResolvedTableURIExtractor])) + TableCommandSpec(cmd, Seq(resolvedTableDesc), ALTERTABLE_ADDCOLS, uriDescs = uriDescs) } val AlterColumn = { @@ -66,22 +68,24 @@ object TableCommands { val ReplaceColumns = { val cmd = "org.apache.spark.sql.catalyst.plans.logical.ReplaceColumns" - TableCommandSpec(cmd, Seq(resolvedTableDesc), ALTERTABLE_REPLACECOLS) + AddColumns.copy(classname = cmd, opType = ALTERTABLE_REPLACECOLS) } val RenameColumn = { val cmd = "org.apache.spark.sql.catalyst.plans.logical.RenameColumn" - TableCommandSpec(cmd, Seq(resolvedTableDesc), ALTERTABLE_RENAMECOL) + AddColumns.copy(classname = cmd, opType = ALTERTABLE_RENAMECOL) } val AlterTableAddPartition = { val cmd = "org.apache.spark.sql.execution.command.AlterTableAddPartitionCommand" val columnDesc = ColumnDesc("partitionSpecsAndLocs", classOf[PartitionLocsSeqColumnExtractor]) + val uriDesc = UriDesc("partitionSpecsAndLocs", classOf[PartitionLocsSeqURIExtractor]) TableCommandSpec( cmd, Seq(tableNameDesc.copy(columnDesc = Some(columnDesc))), - ALTERTABLE_ADDPARTS) + ALTERTABLE_ADDPARTS, + uriDescs = Seq(uriDesc)) } val AlterTableChangeColumn = { @@ -150,10 +154,12 @@ object TableCommands { val AlterTableSetLocation = { val cmd = "org.apache.spark.sql.execution.command.AlterTableSetLocationCommand" val columnDesc = ColumnDesc("partitionSpec", classOf[PartitionOptionColumnExtractor]) + val uriDesc = UriDesc("location", classOf[StringURIExtractor]) TableCommandSpec( cmd, Seq(tableNameDesc.copy(columnDesc = Some(columnDesc))), - ALTERTABLE_LOCATION) + ALTERTABLE_LOCATION, + uriDescs = Seq(uriDesc)) } val AlterTableSetProperties = TableCommandSpec( @@ -210,10 +216,15 @@ object TableCommands { "tableName", classOf[IdentifierTableExtractor], catalogDesc = Some(CatalogDesc())) + val uriDescs = Seq( + UriDesc("tableSpec", classOf[TableSpecURIExtractor]), + UriDesc("properties", classOf[PropertiesLocationUriExtractor]), + UriDesc("tableName", classOf[IdentifierURIExtractor])) TableCommandSpec( cmd, Seq(resolvedIdentifierTableDesc, tableDesc, resolvedDbObjectNameDesc), - CREATETABLE) + CREATETABLE, + uriDescs = uriDescs) } val CreateV2Table = { @@ -222,7 +233,10 @@ object TableCommands { "tableName", classOf[IdentifierTableExtractor], catalogDesc = Some(CatalogDesc())) - TableCommandSpec(cmd, Seq(tableDesc), CREATETABLE) + val uriDescs = Seq( + UriDesc("properties", classOf[PropertiesLocationUriExtractor]), + UriDesc("tableName", classOf[IdentifierURIExtractor])) + TableCommandSpec(cmd, Seq(tableDesc), CREATETABLE, uriDescs = uriDescs) } val CreateTableAsSelectV2 = { @@ -231,6 +245,9 @@ object TableCommands { "tableName", classOf[IdentifierTableExtractor], catalogDesc = Some(CatalogDesc())) + val uriDescs = Seq( + UriDesc("tableSpec", classOf[TableSpecURIExtractor]), + UriDesc("properties", classOf[PropertiesLocationUriExtractor])) TableCommandSpec( cmd, Seq( @@ -238,7 +255,8 @@ object TableCommands { tableDesc, resolvedDbObjectNameDesc.copy(fieldName = "name")), CREATETABLE_AS_SELECT, - Seq(queryQueryDesc)) + Seq(queryQueryDesc), + uriDescs = uriDescs) } val CommentOnTable = { @@ -254,7 +272,8 @@ object TableCommands { "table", classOf[DataSourceV2RelationTableExtractor], actionTypeDesc = Some(actionTypeDesc)) - TableCommandSpec(cmd, Seq(tableDesc), queryDescs = Seq(queryQueryDesc)) + val uriDescs = Seq(UriDesc("table", classOf[DataSourceV2RelationURIExtractor])) + TableCommandSpec(cmd, Seq(tableDesc), queryDescs = Seq(queryQueryDesc), uriDescs = uriDescs) } val ReplaceData = { @@ -292,7 +311,8 @@ object TableCommands { "table", classOf[DataSourceV2RelationTableExtractor], actionTypeDesc = Some(actionTypeDesc)) - TableCommandSpec(cmd, Seq(tableDesc), queryDescs = Seq(queryQueryDesc)) + val uriDescs = Seq(UriDesc("table", classOf[DataSourceV2RelationURIExtractor])) + TableCommandSpec(cmd, Seq(tableDesc), queryDescs = Seq(queryQueryDesc), uriDescs = uriDescs) } val OverwritePartitionsDynamic = { @@ -372,14 +392,21 @@ object TableCommands { val cmd = "org.apache.spark.sql.execution.datasources.CreateTable" val tableDesc = TableDesc("tableDesc", classOf[CatalogTableTableExtractor]) val queryDesc = QueryDesc("query", "LogicalPlanOptionQueryExtractor") - TableCommandSpec(cmd, Seq(tableDesc), CREATETABLE, queryDescs = Seq(queryDesc)) + val uriDesc = UriDesc("tableDesc", classOf[CatalogTableURIExtractor]) + TableCommandSpec( + cmd, + Seq(tableDesc), + CREATETABLE, + queryDescs = Seq(queryDesc), + uriDescs = Seq(uriDesc)) } val CreateDataSourceTable = { val cmd = "org.apache.spark.sql.execution.command.CreateDataSourceTableCommand" val tableDesc = TableDesc("table", classOf[CatalogTableTableExtractor], setCurrentDatabaseIfMissing = true) - TableCommandSpec(cmd, Seq(tableDesc), CREATETABLE) + val uriDesc = UriDesc("table", classOf[CatalogTableURIExtractor]) + TableCommandSpec(cmd, Seq(tableDesc), CREATETABLE, uriDescs = Seq(uriDesc)) } val CreateDataSourceTableAsSelect = { @@ -395,8 +422,14 @@ object TableCommands { val columnDesc = ColumnDesc("outputColumnNames", classOf[StringSeqColumnExtractor]) val tableDesc = TableDesc("tableDesc", classOf[CatalogTableTableExtractor], Some(columnDesc)) + val uriDesc = UriDesc("tableDesc", classOf[CatalogTableURIExtractor]) val queryDesc = queryQueryDesc - TableCommandSpec(cmd, Seq(tableDesc), "CREATETABLE_AS_SELECT", queryDescs = Seq(queryDesc)) + TableCommandSpec( + cmd, + Seq(tableDesc), + "CREATETABLE_AS_SELECT", + queryDescs = Seq(queryDesc), + uriDescs = Seq(uriDesc)) } val CreateTableLike = { @@ -410,7 +443,8 @@ object TableCommands { classOf[TableIdentifierTableExtractor], isInput = true, setCurrentDatabaseIfMissing = true) - TableCommandSpec(cmd, Seq(tableDesc1, tableDesc2), CREATETABLE) + val uriDesc = UriDesc("fileFormat", classOf[CatalogStorageFormatURIExtractor]) + TableCommandSpec(cmd, Seq(tableDesc1, tableDesc2), CREATETABLE, uriDescs = Seq(uriDesc)) } val DescribeColumn = { @@ -552,7 +586,15 @@ object TableCommands { val InsertIntoDataSourceDir = { val cmd = "org.apache.spark.sql.execution.command.InsertIntoDataSourceDirCommand" val queryDesc = queryQueryDesc - TableCommandSpec(cmd, Nil, queryDescs = Seq(queryDesc)) + val uriDesc = UriDesc("storage", classOf[CatalogStorageFormatURIExtractor]) + TableCommandSpec(cmd, Nil, queryDescs = Seq(queryDesc), uriDescs = Seq(uriDesc)) + } + + val SaveIntoDataSourceCommand = { + val cmd = "org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand" + val queryDesc = queryQueryDesc + val uriDesc = UriDesc("options", classOf[PropertiesPathUriExtractor]) + TableCommandSpec(cmd, Nil, queryDescs = Seq(queryDesc), uriDescs = Seq(uriDesc)) } val InsertIntoHadoopFsRelationCommand = { @@ -576,7 +618,8 @@ object TableCommands { fieldName = "table", columnDesc = Some(columnDesc), actionTypeDesc = Some(actionTypeDesc)) - TableCommandSpec(cmd, Seq(tableDesc), "LOAD") + val uriDesc = UriDesc("path", classOf[StringURIExtractor], isInput = true) + TableCommandSpec(cmd, Seq(tableDesc), LOAD, uriDescs = Seq(uriDesc)) } val RefreshTable = { @@ -594,7 +637,32 @@ object TableCommands { TableCommandSpec(cmd, Seq(tableIdentDesc.copy(isInput = true))) } - val data: Array[TableCommandSpec] = Array( + val SetTableProperties = { + val cmd = "org.apache.spark.sql.catalyst.plans.logical.SetTableProperties" + val tableDesc = TableDesc("table", classOf[ResolvedTableTableExtractor]) + val uriDescs = Seq(UriDesc("table", classOf[ResolvedTableURIExtractor])) + TableCommandSpec(cmd, Seq(tableDesc), ALTERTABLE_PROPERTIES, uriDescs = uriDescs) + } + + val AddArchivesCommand = { + val cmd = "org.apache.spark.sql.execution.command.AddArchivesCommand" + val uriDesc = UriDesc("paths", classOf[StringSeqURIExtractor], isInput = true) + TableCommandSpec(cmd, Nil, ADD, uriDescs = Seq(uriDesc)) + } + + // For spark-3.1 + val AddFileCommand = { + val cmd = "org.apache.spark.sql.execution.command.AddFileCommand" + val uriDesc = UriDesc("path", classOf[StringURIExtractor], isInput = true) + TableCommandSpec(cmd, Nil, ADD, uriDescs = Seq(uriDesc)) + } + + override def specs: Seq[TableCommandSpec] = Seq( + AddArchivesCommand, + AddArchivesCommand.copy(classname = "org.apache.spark.sql.execution.command.AddFilesCommand"), + AddArchivesCommand.copy(classname = "org.apache.spark.sql.execution.command.AddJarsCommand"), + AddFileCommand, + AddFileCommand.copy(classname = "org.apache.spark.sql.execution.command.AddJarCommand"), AddPartitions, DropPartitions, RenamePartitions, @@ -653,8 +721,7 @@ object TableCommands { DropTableV2, InsertIntoDataSource, InsertIntoDataSourceDir, - InsertIntoDataSourceDir.copy(classname = - "org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand"), + SaveIntoDataSourceCommand, InsertIntoHadoopFsRelationCommand, InsertIntoDataSourceDir.copy(classname = "org.apache.spark.sql.hive.execution.InsertIntoHiveDirCommand"), @@ -668,6 +735,7 @@ object TableCommands { RefreshTableV2, RefreshTable3d0, ReplaceData, + SetTableProperties, ShowColumns, ShowCreateTable, ShowCreateTable.copy(classname = diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/DeltaCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/DeltaCatalogRangerSparkExtensionSuite.scala new file mode 100644 index 00000000000..1ce8ad6765f --- /dev/null +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/DeltaCatalogRangerSparkExtensionSuite.scala @@ -0,0 +1,573 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.plugin.spark.authz.ranger + +import java.nio.file.Path + +import org.scalatest.Outcome + +import org.apache.kyuubi.Utils +import org.apache.kyuubi.plugin.spark.authz.AccessControlException +import org.apache.kyuubi.plugin.spark.authz.RangerTestNamespace._ +import org.apache.kyuubi.plugin.spark.authz.RangerTestUsers._ +import org.apache.kyuubi.plugin.spark.authz.ranger.DeltaCatalogRangerSparkExtensionSuite._ +import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils.{isSparkV32OrGreater, isSparkV35OrGreater} +import org.apache.kyuubi.tags.DeltaTest +import org.apache.kyuubi.util.AssertionUtils._ + +/** + * Tests for RangerSparkExtensionSuite on Delta Lake + */ +@DeltaTest +class DeltaCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { + override protected val catalogImpl: String = "hive" + override protected val sqlExtensions: String = "io.delta.sql.DeltaSparkSessionExtension" + + val namespace1 = deltaNamespace + val table1 = "table1_delta" + val table2 = "table2_delta" + + def propString(props: Map[String, String]): String = + if (props.isEmpty) "" + else { + props + .map { case (key, value) => s"'$key' = '$value'" } + .mkString("TBLPROPERTIES (", ",", ")") + } + + def createTableSql(namespace: String, table: String): String = + s""" + |CREATE TABLE IF NOT EXISTS $namespace.$table ( + | id INT, + | name STRING, + | gender STRING, + | birthDate TIMESTAMP + |) + |USING DELTA + |PARTITIONED BY (gender) + |""".stripMargin + + def createPathBasedTableSql(path: Path, props: Map[String, String] = Map.empty): String = + s""" + |CREATE TABLE IF NOT EXISTS delta.`$path` ( + | id INT, + | name STRING, + | gender STRING, + | birthDate TIMESTAMP + |) + |USING DELTA + |PARTITIONED BY (gender) + |${propString(props)} + |""".stripMargin + + override def withFixture(test: NoArgTest): Outcome = { + test() + } + + override def beforeAll(): Unit = { + spark.conf.set(s"spark.sql.catalog.$sparkCatalog", deltaCatalogClassName) + spark.conf.set( + s"spark.sql.catalog.$sparkCatalog.warehouse", + Utils.createTempDir("delta-hadoop").toString) + super.beforeAll() + } + + override def afterAll(): Unit = { + super.afterAll() + spark.sessionState.catalog.reset() + spark.sessionState.conf.clear() + } + + test("create table") { + withCleanTmpResources(Seq( + (s"$namespace1.$table1", "table"), + (s"$namespace1.$table2", "table"), + (s"$namespace1", "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + val createNonPartitionTableSql = + s""" + |CREATE TABLE IF NOT EXISTS $namespace1.$table1 ( + | id INT, + | name STRING, + | gender STRING, + | birthDate TIMESTAMP + |) USING DELTA + |""".stripMargin + interceptEndsWith[AccessControlException] { + doAs(someone, sql(createNonPartitionTableSql)) + }(s"does not have [create] privilege on [$namespace1/$table1]") + doAs(admin, sql(createNonPartitionTableSql)) + + val createPartitionTableSql = createTableSql(namespace1, table2) + interceptEndsWith[AccessControlException] { + doAs(someone, sql(createPartitionTableSql)) + }(s"does not have [create] privilege on [$namespace1/$table2]") + doAs(admin, sql(createPartitionTableSql)) + } + } + + test("create or replace table") { + withCleanTmpResources(Seq((s"$namespace1.$table1", "table"), (s"$namespace1", "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + val createOrReplaceTableSql = + s""" + |CREATE OR REPLACE TABLE $namespace1.$table1 ( + | id INT, + | name STRING, + | gender STRING, + | birthDate TIMESTAMP + |) USING DELTA + |""".stripMargin + interceptEndsWith[AccessControlException] { + doAs(someone, sql(createOrReplaceTableSql)) + }(s"does not have [create] privilege on [$namespace1/$table1]") + doAs(admin, sql(createOrReplaceTableSql)) + } + } + + test("alter table") { + withCleanTmpResources(Seq((s"$namespace1.$table1", "table"), (s"$namespace1", "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs(admin, sql(createTableSql(namespace1, table1))) + + // add columns + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"ALTER TABLE $namespace1.$table1 ADD COLUMNS (age int)")))( + s"does not have [alter] privilege on [$namespace1/$table1]") + + // change column + interceptEndsWith[AccessControlException]( + doAs( + someone, + sql(s"ALTER TABLE $namespace1.$table1" + + s" CHANGE COLUMN gender gender STRING AFTER birthDate")))( + s"does not have [alter] privilege on [$namespace1/$table1]") + + // replace columns + interceptEndsWith[AccessControlException]( + doAs( + someone, + sql(s"ALTER TABLE $namespace1.$table1" + + s" REPLACE COLUMNS (id INT, name STRING)")))( + s"does not have [alter] privilege on [$namespace1/$table1]") + + // rename column + interceptEndsWith[AccessControlException]( + doAs( + someone, + sql(s"ALTER TABLE $namespace1.$table1" + + s" RENAME COLUMN birthDate TO dateOfBirth")))( + s"does not have [alter] privilege on [$namespace1/$table1]") + + // drop column + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"ALTER TABLE $namespace1.$table1 DROP COLUMN birthDate")))( + s"does not have [alter] privilege on [$namespace1/$table1]") + + // set properties + interceptEndsWith[AccessControlException]( + doAs( + someone, + sql(s"ALTER TABLE $namespace1.$table1" + + s" SET TBLPROPERTIES ('delta.appendOnly' = 'true')")))( + s"does not have [alter] privilege on [$namespace1/$table1]") + } + } + + test("delete from table") { + withCleanTmpResources(Seq((s"$namespace1.$table1", "table"), (s"$namespace1", "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs(admin, sql(createTableSql(namespace1, table1))) + val deleteFromTableSql = s"DELETE FROM $namespace1.$table1 WHERE birthDate < '1955-01-01'" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(deleteFromTableSql)))( + s"does not have [update] privilege on [$namespace1/$table1]") + doAs(admin, sql(deleteFromTableSql)) + } + } + + test("insert table") { + withSingleCallEnabled { + withCleanTmpResources(Seq( + (s"$namespace1.$table1", "table"), + (s"$namespace1.$table2", "table"), + (s"$namespace1", "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs(admin, sql(createTableSql(namespace1, table1))) + doAs(admin, sql(createTableSql(namespace1, table2))) + + // insert into + val insertIntoSql = s"INSERT INTO $namespace1.$table1" + + s" SELECT * FROM $namespace1.$table2" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(insertIntoSql)))( + s"does not have [select] privilege on [$namespace1/$table2/id,$namespace1/$table2/name," + + s"$namespace1/$table2/gender,$namespace1/$table2/birthDate]," + + s" [update] privilege on [$namespace1/$table1]") + doAs(admin, sql(insertIntoSql)) + + // insert overwrite + val insertOverwriteSql = s"INSERT OVERWRITE $namespace1.$table1" + + s" SELECT * FROM $namespace1.$table2" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(insertOverwriteSql)))( + s"does not have [select] privilege on [$namespace1/$table2/id,$namespace1/$table2/name," + + s"$namespace1/$table2/gender,$namespace1/$table2/birthDate]," + + s" [update] privilege on [$namespace1/$table1]") + doAs(admin, sql(insertOverwriteSql)) + } + } + } + + test("update table") { + withCleanTmpResources(Seq((s"$namespace1.$table1", "table"), (s"$namespace1", "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs(admin, sql(createTableSql(namespace1, table1))) + val updateTableSql = s"UPDATE $namespace1.$table1" + + s" SET gender = 'Female' WHERE gender = 'F'" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(updateTableSql)))( + s"does not have [update] privilege on [$namespace1/$table1]") + doAs(admin, sql(updateTableSql)) + } + } + + test("merge into table") { + withSingleCallEnabled { + withCleanTmpResources(Seq( + (s"$namespace1.$table1", "table"), + (s"$namespace1.$table2", "table"), + (s"$namespace1", "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs(admin, sql(createTableSql(namespace1, table1))) + doAs(admin, sql(createTableSql(namespace1, table2))) + + val mergeIntoSql = + s""" + |MERGE INTO $namespace1.$table1 AS target + |USING $namespace1.$table2 AS source + |ON target.id = source.id + |WHEN MATCHED THEN + | UPDATE SET + | id = source.id, + | name = source.name, + | gender = source.gender, + | birthDate = source.birthDate + |WHEN NOT MATCHED + | THEN INSERT ( + | id, + | name, + | gender, + | birthDate + | ) + | VALUES ( + | source.id, + | source.name, + | source.gender, + | source.birthDate + | ) + |""".stripMargin + interceptEndsWith[AccessControlException]( + doAs(someone, sql(mergeIntoSql)))( + s"does not have [select] privilege on [$namespace1/$table2/id,$namespace1/$table2/name," + + s"$namespace1/$table2/gender,$namespace1/$table2/birthDate]," + + s" [update] privilege on [$namespace1/$table1]") + doAs(admin, sql(mergeIntoSql)) + } + } + } + + test("optimize table") { + assume(isSparkV32OrGreater, "optimize table is available in Delta Lake 1.2.0 and above") + + withCleanTmpResources(Seq((s"$namespace1.$table1", "table"), (s"$namespace1", "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs(admin, sql(createTableSql(namespace1, table1))) + val optimizeTableSql = s"OPTIMIZE $namespace1.$table1" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(optimizeTableSql)))( + s"does not have [alter] privilege on [$namespace1/$table1]") + doAs(admin, sql(optimizeTableSql)) + } + } + + test("vacuum table") { + withCleanTmpResources(Seq((s"$namespace1.$table1", "table"), (s"$namespace1", "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs(admin, sql(createTableSql(namespace1, table1))) + val vacuumTableSql = s"VACUUM $namespace1.$table1" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(vacuumTableSql)))( + s"does not have [alter] privilege on [$namespace1/$table1]") + doAs(admin, sql(vacuumTableSql)) + } + } + + test("create path-based table") { + withTempDir(path => { + val createTableSql = createPathBasedTableSql(path) + interceptEndsWith[AccessControlException] { + doAs(someone, sql(createTableSql)) + }(s"does not have [write] privilege on [[$path, $path/]]") + doAs(admin, sql(createTableSql)) + }) + } + + test("create or replace path-based table") { + withTempDir(path => { + val createOrReplaceTableSql = + s""" + |CREATE OR REPLACE TABLE delta.`$path` ( + | id INT, + | name STRING, + | gender STRING, + | birthDate TIMESTAMP + |) USING DELTA + |""".stripMargin + interceptEndsWith[AccessControlException] { + doAs(someone, sql(createOrReplaceTableSql)) + }(s"does not have [write] privilege on [[$path, $path/]]") + doAs(admin, sql(createOrReplaceTableSql)) + }) + } + + test("delete from path-based table") { + withTempDir(path => { + doAs(admin, sql(createPathBasedTableSql(path))) + val deleteFromTableSql = s"DELETE FROM delta.`$path` WHERE birthDate < '1955-01-01'" + interceptEndsWith[AccessControlException] { + doAs(someone, sql(deleteFromTableSql)) + }(s"does not have [write] privilege on [[$path, $path/]]") + doAs(admin, sql(deleteFromTableSql)) + }) + } + + test("update path-based table") { + withTempDir(path => { + doAs(admin, sql(createPathBasedTableSql(path))) + val updateTableSql = s"UPDATE delta.`$path` SET gender = 'Female' WHERE gender = 'F'" + interceptEndsWith[AccessControlException] { + doAs(someone, sql(updateTableSql)) + }(s"does not have [write] privilege on [[$path, $path/]]") + doAs(admin, sql(updateTableSql)) + }) + } + + test("insert path-based table") { + withSingleCallEnabled { + withCleanTmpResources(Seq((s"$namespace1.$table2", "table"), (s"$namespace1", "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs(admin, sql(createTableSql(namespace1, table2))) + withTempDir(path => { + doAs(admin, sql(createPathBasedTableSql(path))) + // insert into + val insertIntoSql = s"INSERT INTO delta.`$path` SELECT * FROM $namespace1.$table2" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(insertIntoSql)))( + s"does not have [select] privilege on [$namespace1/$table2/id," + + s"$namespace1/$table2/name,$namespace1/$table2/gender," + + s"$namespace1/$table2/birthDate], [write] privilege on [[$path, $path/]]") + doAs(admin, sql(insertIntoSql)) + + // insert overwrite + val insertOverwriteSql = + s"INSERT OVERWRITE delta.`$path` SELECT * FROM $namespace1.$table2" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(insertOverwriteSql)))( + s"does not have [select] privilege on [$namespace1/$table2/id," + + s"$namespace1/$table2/name,$namespace1/$table2/gender," + + s"$namespace1/$table2/birthDate], [write] privilege on [[$path, $path/]]") + doAs(admin, sql(insertOverwriteSql)) + }) + } + } + } + + test("merge into path-based table") { + withSingleCallEnabled { + withCleanTmpResources(Seq( + (s"$namespace1.$table2", "table"), + (s"$namespace1", "database"))) { + doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) + doAs(admin, sql(createTableSql(namespace1, table2))) + withTempDir(path => { + doAs(admin, sql(createPathBasedTableSql(path))) + val mergeIntoSql = + s""" + |MERGE INTO delta.`$path` AS target + |USING $namespace1.$table2 AS source + |ON target.id = source.id + |WHEN MATCHED THEN + | UPDATE SET + | id = source.id, + | name = source.name, + | gender = source.gender, + | birthDate = source.birthDate + |WHEN NOT MATCHED + | THEN INSERT ( + | id, + | name, + | gender, + | birthDate + | ) + | VALUES ( + | source.id, + | source.name, + | source.gender, + | source.birthDate + | ) + |""".stripMargin + interceptEndsWith[AccessControlException]( + doAs(someone, sql(mergeIntoSql)))( + s"does not have [select] privilege on [$namespace1/$table2/id," + + s"$namespace1/$table2/name,$namespace1/$table2/gender," + + s"$namespace1/$table2/birthDate], [write] privilege on [[$path, $path/]]") + doAs(admin, sql(mergeIntoSql)) + }) + } + } + } + + test("optimize path-based table") { + assume(isSparkV32OrGreater, "optimize table is available in Delta Lake 1.2.0 and above") + + withTempDir(path => { + doAs(admin, sql(createPathBasedTableSql(path))) + val optimizeTableSql1 = s"OPTIMIZE delta.`$path`" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(optimizeTableSql1)))( + s"does not have [write] privilege on [[$path, $path/]]") + doAs(admin, sql(optimizeTableSql1)) + + val optimizeTableSql2 = s"OPTIMIZE '$path'" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(optimizeTableSql2)))( + s"does not have [write] privilege on [[$path, $path/]]") + doAs(admin, sql(optimizeTableSql2)) + }) + } + + test("vacuum path-based table") { + withTempDir(path => { + doAs(admin, sql(createPathBasedTableSql(path))) + val vacuumTableSql1 = s"VACUUM delta.`$path`" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(vacuumTableSql1)))( + s"does not have [write] privilege on [[$path, $path/]]") + doAs(admin, sql(vacuumTableSql1)) + + val vacuumTableSql2 = s"VACUUM '$path'" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(vacuumTableSql2)))( + s"does not have [write] privilege on [[$path, $path/]]") + doAs(admin, sql(vacuumTableSql2)) + }) + } + + test("alter path-based table set properties") { + withTempDir(path => { + doAs(admin, sql(createPathBasedTableSql(path))) + val setPropertiesSql = s"ALTER TABLE delta.`$path`" + + s" SET TBLPROPERTIES ('delta.appendOnly' = 'true')" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(setPropertiesSql)))( + s"does not have [write] privilege on [[$path, $path/]]") + doAs(admin, sql(setPropertiesSql)) + }) + } + + test("alter path-based table add columns") { + withTempDir(path => { + doAs(admin, sql(createPathBasedTableSql(path))) + val addColumnsSql = s"ALTER TABLE delta.`$path` ADD COLUMNS (age int)" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(addColumnsSql)))( + s"does not have [write] privilege on [[$path, $path/]]") + doAs(admin, sql(addColumnsSql)) + }) + } + + test("alter path-based table change column") { + withTempDir(path => { + doAs(admin, sql(createPathBasedTableSql(path))) + val changeColumnSql = s"ALTER TABLE delta.`$path`" + + s" CHANGE COLUMN gender gender STRING AFTER birthDate" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(changeColumnSql)))( + s"does not have [write] privilege on [[$path, $path/]]") + doAs(admin, sql(changeColumnSql)) + }) + } + + test("alter path-based table drop column") { + assume( + isSparkV32OrGreater, + "alter table drop column is available in Delta Lake 1.2.0 and above") + + withTempDir(path => { + doAs(admin, sql(createPathBasedTableSql(path, Map("delta.columnMapping.mode" -> "name")))) + val dropColumnSql = s"ALTER TABLE delta.`$path` DROP COLUMN birthDate" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(dropColumnSql)))( + s"does not have [write] privilege on [[$path, $path/]]") + doAs(admin, sql(dropColumnSql)) + }) + } + + test("alter path-based table rename column") { + assume( + isSparkV32OrGreater, + "alter table rename column is available in Delta Lake 1.2.0 and above") + + withTempDir(path => { + doAs(admin, sql(createPathBasedTableSql(path, Map("delta.columnMapping.mode" -> "name")))) + val renameColumnSql = s"ALTER TABLE delta.`$path`" + + s" RENAME COLUMN birthDate TO dateOfBirth" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(renameColumnSql)))( + s"does not have [write] privilege on [[$path, $path/]]") + doAs(admin, sql(renameColumnSql)) + }) + } + + test("alter path-based table replace columns") { + withTempDir(path => { + assume( + isSparkV32OrGreater, + "alter table replace columns is not available in Delta Lake 1.0.1") + + doAs(admin, sql(createPathBasedTableSql(path, Map("delta.columnMapping.mode" -> "name")))) + val replaceColumnsSql = s"ALTER TABLE delta.`$path`" + + s" REPLACE COLUMNS (id INT, name STRING, gender STRING)" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(replaceColumnsSql)))( + s"does not have [write] privilege on [[$path, $path/]]") + + // There was a bug before Delta Lake 3.0, it will throw AnalysisException message + // "Cannot drop column from a struct type with a single field: + // StructType(StructField(birthDate,TimestampType,true))". + // For details, see https://github.com/delta-io/delta/pull/1822 + if (isSparkV35OrGreater) { + doAs(admin, sql(replaceColumnsSql)) + } + }) + } +} + +object DeltaCatalogRangerSparkExtensionSuite { + val deltaCatalogClassName: String = "org.apache.spark.sql.delta.catalog.DeltaCatalog" +} diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala index 04207291098..b6b9b6f31a5 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/HudiCatalogRangerSparkExtensionSuite.scala @@ -25,7 +25,7 @@ import org.apache.kyuubi.plugin.spark.authz.RangerTestNamespace._ import org.apache.kyuubi.plugin.spark.authz.RangerTestUsers._ import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ import org.apache.kyuubi.tags.HudiTest -import org.apache.kyuubi.util.AssertionUtils.interceptContains +import org.apache.kyuubi.util.AssertionUtils.interceptEndsWith /** * Tests for RangerSparkExtensionSuite on Hudi SQL. @@ -101,24 +101,24 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |""".stripMargin)) // AlterHoodieTableAddColumnsCommand - interceptContains[AccessControlException]( + interceptEndsWith[AccessControlException]( doAs(someone, sql(s"ALTER TABLE $namespace1.$table1 ADD COLUMNS(age int)")))( s"does not have [alter] privilege on [$namespace1/$table1/age]") // AlterHoodieTableChangeColumnCommand - interceptContains[AccessControlException]( + interceptEndsWith[AccessControlException]( doAs(someone, sql(s"ALTER TABLE $namespace1.$table1 CHANGE COLUMN id id bigint")))( s"does not have [alter] privilege" + s" on [$namespace1/$table1/id]") // AlterHoodieTableDropPartitionCommand - interceptContains[AccessControlException]( + interceptEndsWith[AccessControlException]( doAs(someone, sql(s"ALTER TABLE $namespace1.$table1 DROP PARTITION (city='test')")))( s"does not have [alter] privilege" + s" on [$namespace1/$table1/city]") // AlterHoodieTableRenameCommand - interceptContains[AccessControlException]( + interceptEndsWith[AccessControlException]( doAs(someone, sql(s"ALTER TABLE $namespace1.$table1 RENAME TO $namespace1.$table2")))( s"does not have [alter] privilege" + s" on [$namespace1/$table1]") @@ -126,7 +126,7 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { // AlterTableCommand && Spark31AlterTableCommand try { sql("set hoodie.schema.on.read.enable=true") - interceptContains[AccessControlException]( + interceptEndsWith[AccessControlException]( doAs(someone, sql(s"ALTER TABLE $namespace1.$table1 ADD COLUMNS(age int)")))( s"does not have [alter] privilege on [$namespace1/$table1]") } finally { @@ -138,7 +138,7 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { test("CreateHoodieTableCommand") { withCleanTmpResources(Seq((namespace1, "database"))) { doAs(admin, sql(s"CREATE DATABASE IF NOT EXISTS $namespace1")) - interceptContains[AccessControlException]( + interceptEndsWith[AccessControlException]( doAs( someone, sql( @@ -171,7 +171,7 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |) |PARTITIONED BY(city) |""".stripMargin)) - interceptContains[AccessControlException]( + interceptEndsWith[AccessControlException]( doAs( someone, sql( @@ -210,7 +210,7 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |LIKE $namespace1.$table1 |USING HUDI |""".stripMargin - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs( someone, sql( @@ -238,7 +238,7 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |""".stripMargin)) val dropTableSql = s"DROP TABLE IF EXISTS $namespace1.$table1" - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(dropTableSql)) }(s"does not have [drop] privilege on [$namespace1/$table1]") doAs(admin, sql(dropTableSql)) @@ -263,7 +263,7 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |""".stripMargin)) val repairTableSql = s"MSCK REPAIR TABLE $namespace1.$table1" - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(repairTableSql)) }(s"does not have [alter] privilege on [$namespace1/$table1]") doAs(admin, sql(repairTableSql)) @@ -288,7 +288,7 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |""".stripMargin)) val truncateTableSql = s"TRUNCATE TABLE $namespace1.$table1" - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(truncateTableSql)) }(s"does not have [update] privilege on [$namespace1/$table1]") doAs(admin, sql(truncateTableSql)) @@ -313,19 +313,58 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |""".stripMargin)) val compactionTable = s"RUN COMPACTION ON $namespace1.$table1" - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(compactionTable)) - }(s"does not have [select] privilege on [$namespace1/$table1]") + }(s"does not have [create] privilege on [$namespace1/$table1]") doAs(admin, sql(compactionTable)) val showCompactionTable = s"SHOW COMPACTION ON $namespace1.$table1" - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(showCompactionTable)) }(s"does not have [select] privilege on [$namespace1/$table1]") doAs(admin, sql(showCompactionTable)) } } + test("CompactionHoodiePathCommand / CompactionShowHoodiePathCommand") { + withSingleCallEnabled { + withCleanTmpResources(Seq.empty) { + val path1 = "hdfs://demo/test/hudi/path" + val compactOnPath = s"RUN COMPACTION ON '$path1'" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(compactOnPath)))( + s"does not have [write] privilege on [[$path1, $path1/]]") + + val showCompactOnPath = s"SHOW COMPACTION ON '$path1'" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(showCompactOnPath)))( + s"does not have [read] privilege on [[$path1, $path1/]]") + + val path2 = "file:///demo/test/hudi/path" + val compactOnPath2 = s"RUN COMPACTION ON '$path2'" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(compactOnPath2)))( + s"does not have [write] privilege on [[$path2, $path2/]]") + + val showCompactOnPath2 = s"SHOW COMPACTION ON '$path2'" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(showCompactOnPath2)))( + s"does not have [read] privilege on [[$path2, $path2/]]") + + val path3 = "hdfs://demo/test/hudi/path" + val compactOnPath3 = s"RUN COMPACTION ON '$path3'" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(compactOnPath3)))( + s"does not have [write] privilege on [[$path3, $path3/]]") + + val showCompactOnPath3 = s"SHOW COMPACTION ON '$path3/'" + interceptEndsWith[AccessControlException]( + doAs(someone, sql(showCompactOnPath3)))( + s"does not have [read] privilege on [[$path3, $path3/]]") + } + } + } + test("InsertIntoHoodieTableCommand") { withSingleCallEnabled { withCleanTmpResources(Seq( @@ -363,7 +402,7 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |FROM $namespace1.$table2 |WHERE city = 'hangzhou' |""".stripMargin - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(insertIntoHoodieTableSql)) }(s"does not have [select] privilege on " + s"[$namespace1/$table2/id,$namespace1/$table2/name,hudi_ns/$table2/city], " + @@ -394,14 +433,14 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |""".stripMargin)) val showPartitionsSql = s"SHOW PARTITIONS $namespace1.$table1" - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(showPartitionsSql)) }(s"does not have [select] privilege on [$namespace1/$table1]") doAs(admin, sql(showPartitionsSql)) val showPartitionSpecSql = s"SHOW PARTITIONS $namespace1.$table1 PARTITION (city = 'hangzhou')" - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(showPartitionSpecSql)) }(s"does not have [select] privilege on [$namespace1/$table1/city]") doAs(admin, sql(showPartitionSpecSql)) @@ -445,13 +484,13 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |""".stripMargin)) val deleteFrom = s"DELETE FROM $namespace1.$table1 WHERE id = 10" - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(deleteFrom)) }(s"does not have [update] privilege on [$namespace1/$table1]") doAs(admin, sql(deleteFrom)) val updateSql = s"UPDATE $namespace1.$table1 SET name = 'test' WHERE id > 10" - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(updateSql)) }(s"does not have [update] privilege on [$namespace1/$table1]") doAs(admin, sql(updateSql)) @@ -465,10 +504,11 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |AND target.name == 'test' | THEN UPDATE SET id = source.id, name = source.name, city = source.city |""".stripMargin - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(mergeIntoSQL)) }(s"does not have [select] privilege on " + - s"[$namespace1/$table2/id,$namespace1/$table2/name,$namespace1/$table2/city]") + s"[$namespace1/$table2/id,$namespace1/$table2/name,$namespace1/$table2/city], " + + s"[update] privilege on [$namespace1/$table1]") doAs(admin, sql(mergeIntoSQL)) } } @@ -510,13 +550,14 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { val copy_to_table = s"CALL copy_to_table(table => '$namespace1.$table1', new_table => '$namespace1.$table2')" - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(copy_to_table)) - }(s"does not have [select] privilege on [$namespace1/$table1]") + }(s"does not have [select] privilege on [$namespace1/$table1], " + + s"[update] privilege on [$namespace1/$table2]") doAs(admin, sql(copy_to_table)) val show_table_properties = s"CALL show_table_properties(table => '$namespace1.$table1')" - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(show_table_properties)) }(s"does not have [select] privilege on [$namespace1/$table1]") doAs(admin, sql(show_table_properties)) @@ -546,7 +587,7 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { // CreateIndexCommand val createIndex = s"CREATE INDEX $index1 ON $namespace1.$table1 USING LUCENE (id)" - interceptContains[AccessControlException]( + interceptEndsWith[AccessControlException]( doAs( someone, sql(createIndex)))(s"does not have [index] privilege on [$namespace1/$table1]") @@ -554,7 +595,7 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { // RefreshIndexCommand val refreshIndex = s"REFRESH INDEX $index1 ON $namespace1.$table1" - interceptContains[AccessControlException]( + interceptEndsWith[AccessControlException]( doAs( someone, sql(refreshIndex)))(s"does not have [alter] privilege on [$namespace1/$table1]") @@ -562,7 +603,7 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { // ShowIndexesCommand val showIndex = s"SHOW INDEXES FROM TABLE $namespace1.$table1" - interceptContains[AccessControlException]( + interceptEndsWith[AccessControlException]( doAs( someone, sql(showIndex)))(s"does not have [select] privilege on [$namespace1/$table1]") @@ -570,7 +611,7 @@ class HudiCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { // DropIndexCommand val dropIndex = s"DROP INDEX $index1 ON $namespace1.$table1" - interceptContains[AccessControlException]( + interceptEndsWith[AccessControlException]( doAs( someone, sql(dropIndex)))(s"does not have [drop] privilege on [$namespace1/$table1]") diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala index 28e13aff3c0..677b3945dda 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/IcebergCatalogRangerSparkExtensionSuite.scala @@ -111,7 +111,7 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite s" on [$namespace1/$table1/id]")) withSingleCallEnabled { - interceptContains[AccessControlException](doAs(someone, sql(mergeIntoSql)))( + interceptEndsWith[AccessControlException](doAs(someone, sql(mergeIntoSql)))( if (isSparkV35OrGreater) { s"does not have [select] privilege on [$namespace1/table1/id" + s",$namespace1/$table1/name,$namespace1/$table1/city]" @@ -121,7 +121,7 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite s" [update] privilege on [$bobNamespace/$bobSelectTable]" }) - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(bob, sql(mergeIntoSql)) }(s"does not have [update] privilege on [$bobNamespace/$bobSelectTable]") } @@ -131,7 +131,7 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite test("[KYUUBI #3515] UPDATE TABLE") { // UpdateTable - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(s"UPDATE $catalogV2.$namespace1.$table1 SET city='Guangzhou' WHERE id=1")) }(if (isSparkV35OrGreater) { s"does not have [select] privilege on [$namespace1/$table1/id]" @@ -147,7 +147,7 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite test("[KYUUBI #3515] DELETE FROM TABLE") { // DeleteFromTable - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(s"DELETE FROM $catalogV2.$namespace1.$table1 WHERE id=2")) }(if (isSparkV34OrGreater) { s"does not have [select] privilege on [$namespace1/$table1/id]" @@ -155,7 +155,7 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite s"does not have [update] privilege on [$namespace1/$table1]" }) - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(bob, sql(s"DELETE FROM $catalogV2.$bobNamespace.$bobSelectTable WHERE id=2")) }(s"does not have [update] privilege on [$bobNamespace/$bobSelectTable]") @@ -264,9 +264,9 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite .foreach(i => sql(s"INSERT INTO $table VALUES ($i, 'user_$i')")) }) - interceptContains[AccessControlException](doAs(someone, sql(rewriteDataFiles1)))( + interceptEndsWith[AccessControlException](doAs(someone, sql(rewriteDataFiles1)))( s"does not have [alter] privilege on [$namespace1/$tableName]") - interceptContains[AccessControlException](doAs(someone, sql(rewriteDataFiles2)))( + interceptEndsWith[AccessControlException](doAs(someone, sql(rewriteDataFiles2)))( s"does not have [alter] privilege on [$namespace1/$tableName]") /** @@ -326,7 +326,7 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite val callRollbackToSnapshot = s"CALL $catalogV2.system.rollback_to_snapshot (table => '$table', snapshot_id => $targetSnapshotId)" - interceptContains[AccessControlException](doAs(someone, sql(callRollbackToSnapshot)))( + interceptEndsWith[AccessControlException](doAs(someone, sql(callRollbackToSnapshot)))( s"does not have [alter] privilege on [$namespace1/$tableName]") doAs(admin, sql(callRollbackToSnapshot)) } @@ -344,7 +344,7 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite s"CALL $catalogV2.system.rollback_to_timestamp (table => '$table', timestamp => TIMESTAMP '$targetTimestamp')" } - interceptContains[AccessControlException](doAs(someone, sql(callRollbackToTimestamp)))( + interceptEndsWith[AccessControlException](doAs(someone, sql(callRollbackToTimestamp)))( s"does not have [alter] privilege on [$namespace1/$tableName]") doAs(admin, sql(callRollbackToTimestamp)) } @@ -359,7 +359,7 @@ class IcebergCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite val callSetCurrentSnapshot = s"CALL $catalogV2.system.set_current_snapshot (table => '$table', snapshot_id => $targetSnapshotId)" - interceptContains[AccessControlException](doAs(someone, sql(callSetCurrentSnapshot)))( + interceptEndsWith[AccessControlException](doAs(someone, sql(callSetCurrentSnapshot)))( s"does not have [alter] privilege on [$namespace1/$tableName]") doAs(admin, sql(callSetCurrentSnapshot)) } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/PaimonCatalogRangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/PaimonCatalogRangerSparkExtensionSuite.scala index 62cd9d62732..1ea039ec1e1 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/PaimonCatalogRangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/PaimonCatalogRangerSparkExtensionSuite.scala @@ -76,7 +76,7 @@ class PaimonCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |) |""".stripMargin - interceptContains[AccessControlException] { + interceptEndsWith[AccessControlException] { doAs(someone, sql(createTable)) }(s"does not have [create] privilege on [$namespace1/$table1]") doAs(admin, createTable) diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala index c2e886f0246..9dd9613d8f9 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/RangerSparkExtensionSuite.scala @@ -17,10 +17,13 @@ package org.apache.kyuubi.plugin.spark.authz.ranger +import java.lang.reflect.UndeclaredThrowableException +import java.nio.file.Path + import scala.util.Try import org.apache.hadoop.security.UserGroupInformation -import org.apache.spark.sql.SparkSessionExtensions +import org.apache.spark.sql.{Row, SparkSessionExtensions} import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.catalog.HiveTableRelation import org.apache.spark.sql.catalyst.plans.logical.Statistics @@ -30,10 +33,11 @@ import org.scalatest.BeforeAndAfterAll // scalastyle:off import org.scalatest.funsuite.AnyFunSuite +import org.apache.kyuubi.Utils import org.apache.kyuubi.plugin.spark.authz.{AccessControlException, SparkSessionProvider} import org.apache.kyuubi.plugin.spark.authz.RangerTestNamespace._ import org.apache.kyuubi.plugin.spark.authz.RangerTestUsers._ -import org.apache.kyuubi.plugin.spark.authz.ranger.RuleAuthorization.KYUUBI_AUTHZ_TAG +import org.apache.kyuubi.plugin.spark.authz.rule.Authorization.KYUUBI_AUTHZ_TAG import org.apache.kyuubi.plugin.spark.authz.util.AuthZUtils._ import org.apache.kyuubi.util.AssertionUtils._ import org.apache.kyuubi.util.reflect.ReflectUtils._ @@ -90,6 +94,14 @@ abstract class RangerSparkExtensionSuite extends AnyFunSuite } } + protected def withTempDir(f: Path => Unit): Unit = { + val dir = Utils.createTempDir() + try f(dir) + finally { + Utils.deleteDirectoryRecursively(dir.toFile) + } + } + /** * Enables authorizing in single call mode, * and disables authorizing in single call mode after calling `f` @@ -113,12 +125,12 @@ abstract class RangerSparkExtensionSuite extends AnyFunSuite if (i == 1) { assert(logicalPlan.getTagValue(KYUUBI_AUTHZ_TAG).isEmpty) } else { - assert(logicalPlan.getTagValue(KYUUBI_AUTHZ_TAG).getOrElse(false)) + assert(logicalPlan.getTagValue(KYUUBI_AUTHZ_TAG).nonEmpty) } rule.apply(logicalPlan) } - assert(logicalPlan.getTagValue(KYUUBI_AUTHZ_TAG).getOrElse(false)) + assert(logicalPlan.getTagValue(KYUUBI_AUTHZ_TAG).nonEmpty) } test("[KYUUBI #3226]: Another session should also check even if the plan is cached.") { @@ -140,7 +152,7 @@ abstract class RangerSparkExtensionSuite extends AnyFunSuite // session1: first query, should auth once.[LogicalRelation] val df = sql(select) val plan1 = df.queryExecution.optimizedPlan - assert(plan1.getTagValue(KYUUBI_AUTHZ_TAG).getOrElse(false)) + assert(plan1.getTagValue(KYUUBI_AUTHZ_TAG).nonEmpty) // cache df.cache() @@ -148,7 +160,7 @@ abstract class RangerSparkExtensionSuite extends AnyFunSuite // session1: second query, should auth once.[InMemoryRelation] // (don't need to check in again, but it's okay to check in once) val plan2 = sql(select).queryExecution.optimizedPlan - assert(plan1 != plan2 && plan2.getTagValue(KYUUBI_AUTHZ_TAG).getOrElse(false)) + assert(plan1 != plan2 && plan2.getTagValue(KYUUBI_AUTHZ_TAG).nonEmpty) // session2: should auth once. val otherSessionDf = spark.newSession().sql(select) @@ -159,7 +171,7 @@ abstract class RangerSparkExtensionSuite extends AnyFunSuite // make sure it use cache. assert(plan3.isInstanceOf[InMemoryRelation]) // auth once only. - assert(plan3.getTagValue(KYUUBI_AUTHZ_TAG).getOrElse(false)) + assert(plan3.getTagValue(KYUUBI_AUTHZ_TAG).nonEmpty) }) } } @@ -877,7 +889,7 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { sql(s"SELECT id as new_id, name, max_scope FROM $db1.$view1".stripMargin).show())) assert(e2.getMessage.contains( s"does not have [select] privilege on " + - s"[$db1/$view1/id,$db1/$view1/name,$db1/$view1/max_scope,$db1/$view1/sum_age]")) + s"[$db1/$view1/id,$db1/$view1/name,$db1/$view1/max_scope]")) } } } @@ -889,7 +901,7 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { val df = doAs( admin, sql(s"SELECT * FROM VALUES(1, 100),(2, 200),(3, 300) AS t(id, scope)")).persist() - interceptContains[AccessControlException]( + interceptEndsWith[AccessControlException]( doAs(someone, df.write.mode("overwrite").saveAsTable(table1)))( s"does not have [create] privilege on [$defaultDb/$table1]") } @@ -913,31 +925,550 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite { |CREATE VIEW $db1.$view2 |AS |SELECT count(*) as cnt, sum(id) as sum_id FROM $db1.$table1 - """.stripMargin)) - val e1 = intercept[AccessControlException]( - doAs(someone, sql(s"SELECT count(*) FROM $db1.$table1").show())) - assert(e1.getMessage.contains( - s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope]")) + """.stripMargin)) + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(*) FROM $db1.$table1").show()))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope]") + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(*) FROM $db1.$view1").show()))( + s"does not have [select] privilege on [$db1/$view1/id,$db1/$view1/scope]") + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(*) FROM $db1.$view2").show()))( + s"does not have [select] privilege on [$db1/$view2/cnt,$db1/$view2/sum_id]") + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(id) FROM $db1.$table1 WHERE id > 10").show()))( + s"does not have [select] privilege on [$db1/$table1/id]") + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(id) FROM $db1.$view1 WHERE id > 10").show()))( + s"does not have [select] privilege on [$db1/$view1/id]") + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(sum_id) FROM $db1.$view2 WHERE sum_id > 10").show()))( + s"does not have [select] privilege on [$db1/$view2/sum_id]") + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(scope) FROM $db1.$table1 WHERE id > 10").show()))( + s"does not have [select] privilege on [$db1/$table1/scope,$db1/$table1/id]") + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(scope) FROM $db1.$view1 WHERE id > 10").show()))( + s"does not have [select] privilege on [$db1/$view1/scope,$db1/$view1/id]") + + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT count(cnt) FROM $db1.$view2 WHERE sum_id > 10").show()))( + s"does not have [select] privilege on [$db1/$view2/cnt,$db1/$view2/sum_id]") + } + } + } - val e2 = intercept[AccessControlException]( - doAs(someone, sql(s"SELECT count(*) FROM $db1.$view1").show())) - assert(e2.getMessage.contains( - s"does not have [select] privilege on [$db1/$view1/id,$db1/$view1/scope]")) - - val e3 = intercept[AccessControlException]( - doAs(someone, sql(s"SELECT count(*) FROM $db1.$view2").show())) - assert(e3.getMessage.contains( - s"does not have [select] privilege on [$db1/$view2/cnt,$db1/$view2/sum_id]")) - - val e4 = intercept[AccessControlException]( - doAs(someone, sql(s"SELECT count(*) FROM $db1.$view2 WHERE cnt > 10").show())) - assert(e4.getMessage.contains( - s"does not have [select] privilege on [$db1/$view2/cnt,$db1/$view2/sum_id]")) - - val e5 = intercept[AccessControlException]( - doAs(someone, sql(s"SELECT count(cnt) FROM $db1.$view2 WHERE cnt > 10").show())) - assert(e5.getMessage.contains( - s"does not have [select] privilege on [$db1/$view2/cnt,$db1/$view2/sum_id]")) + test("[KYUUBI #5503][AUTHZ] Check plan auth checked should not set tag to all child nodes") { + assume(isSparkV32OrGreater, "Spark 3.1 not support lateral subquery.") + val db1 = defaultDb + val table1 = "table1" + val table2 = "table2" + val perm_view = "perm_view" + withSingleCallEnabled { + withCleanTmpResources( + Seq( + (s"$db1.$table1", "table"), + (s"$db1.$table2", "table"), + (s"$db1.$perm_view", "view"))) { + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1 (id int, scope int)")) + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table2 (id int, scope int)")) + doAs(admin, sql(s"CREATE VIEW $db1.$perm_view AS SELECT * FROM $db1.$table2")) + interceptEndsWith[AccessControlException]( + doAs( + someone, + sql( + s""" + |SELECT t1.id + |FROM $db1.$table1 t1, + |LATERAL ( + | SELECT * + | FROM $db1.$perm_view t2 + | WHERE t1.id = t2.id + |) + |""".stripMargin).show()))( + s"does not have [select] privilege on " + + s"[$db1/$perm_view/id,$db1/$perm_view/scope]") + interceptEndsWith[AccessControlException]( + doAs( + permViewOnlyUser, + sql( + s""" + |SELECT t1.id + |FROM $db1.$table1 t1, + |LATERAL ( + | SELECT * + | FROM $db1.$perm_view t2 + | WHERE t1.id = t2.id + |) + |""".stripMargin).show()))( + s"does not have [select] privilege on " + + s"[$db1/$table1/id]") + + interceptEndsWith[AccessControlException]( + doAs( + someone, + sql( + s""" + |SELECT t1.id + |FROM $db1.$table1 t1, + |LATERAL ( + | SELECT * + | FROM $db1.$table2 t2 + | WHERE t1.id = t2.id + |) + |""".stripMargin).show()))( + s"does not have [select] privilege on " + + s"[$db1/$table2/id,$db1/$table2/scope]") + interceptEndsWith[AccessControlException]( + doAs( + table2OnlyUser, + sql( + s""" + |SELECT t1.id + |FROM $db1.$table1 t1, + |LATERAL ( + | SELECT * + | FROM $db1.$table2 t2 + | WHERE t1.id = t2.id + |) + |""".stripMargin).show()))( + s"does not have [select] privilege on " + + s"[$db1/$table1/id]") + } + } + } + + test("InsertIntoHiveDirCommand") { + val db1 = defaultDb + val table1 = "table1" + withTempDir { path => + withSingleCallEnabled { + withCleanTmpResources(Seq((s"$db1.$table1", "table"))) { + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1 (id int, scope int)")) + interceptEndsWith[AccessControlException](doAs( + someone, + sql( + s""" + |INSERT OVERWRITE DIRECTORY '$path' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |SELECT * FROM $db1.$table1""".stripMargin)))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope], " + + s"[write] privilege on [[$path, $path/]]") + } + } + } + } + + test("InsertIntoDataSourceDirCommand") { + val db1 = defaultDb + val table1 = "table1" + withTempDir { path => + withSingleCallEnabled { + withCleanTmpResources(Seq((s"$db1.$table1", "table"))) { + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1 (id int, scope int)")) + interceptEndsWith[AccessControlException](doAs( + someone, + sql( + s""" + |INSERT OVERWRITE DIRECTORY '$path' + |USING parquet + |SELECT * FROM $db1.$table1""".stripMargin)))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope], " + + s"[write] privilege on [[$path, $path/]]") + } + } + } + } + + test("SaveIntoDataSourceCommand") { + withTempDir { path => + withSingleCallEnabled { + val df = sql("SELECT 1 as id, 'Tony' as name") + interceptEndsWith[AccessControlException](doAs( + someone, + df.write.format("console").mode("append").save(path.toString)))( + s"does not have [write] privilege on [[$path, $path/]]") + } + } + } + + test("HadoopFsRelation") { + val db1 = defaultDb + val table1 = "table1" + withTempDir { path => + withSingleCallEnabled { + withCleanTmpResources(Seq((s"$db1.$table1", "table"))) { + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1 (id int, scope int)")) + doAs( + admin, + sql( + s""" + |INSERT OVERWRITE DIRECTORY '$path' + |USING parquet + |SELECT * FROM $db1.$table1""".stripMargin)) + + interceptEndsWith[AccessControlException]( + doAs( + someone, + sql( + s""" + |INSERT OVERWRITE DIRECTORY '$path' + |USING parquet + |SELECT * FROM $db1.$table1""".stripMargin)))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope], " + + s"[write] privilege on [[$path, $path/]]") + + doAs(admin, sql(s"SELECT * FROM parquet.`$path`".stripMargin).explain(true)) + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT * FROM parquet.`$path`".stripMargin).explain(true)))( + s"does not have [read] privilege on " + + s"[[file:$path, file:$path/]]") + } + } + } + } + + test("LoadDataCommand") { + val db1 = defaultDb + val table1 = "table1" + withSingleCallEnabled { + withTempDir { path => + withCleanTmpResources(Seq((s"$db1.$table1", "table"))) { + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1 (id int, scope int)")) + val loadDataSql = + s""" + |LOAD DATA LOCAL INPATH '$path' + |OVERWRITE INTO TABLE $db1.$table1 + |""".stripMargin + doAs(admin, sql(loadDataSql).explain(true)) + interceptEndsWith[AccessControlException]( + doAs(someone, sql(loadDataSql).explain(true)))( + s"does not have [read] privilege on [[$path, $path/]], " + + s"[update] privilege on [$db1/$table1]") + } + } + } + } + + test("Add resource command") { + withTempDir { path => + withSingleCallEnabled { + val supportedCommand = if (isSparkV32OrGreater) { + Seq("JAR", "FILE", "ARCHIVE") + } else { + Seq("JAR", "FILE") + } + supportedCommand.foreach { cmd => + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"ADD $cmd $path")))( + s"does not have [read] privilege on [[$path, $path/]]") + } + } + } + } + + test("CreateDatabaseCommand/AlterDatabaseSetLocationCommand") { + val db1 = "db1" + withSingleCallEnabled { + withTempDir { path1 => + withTempDir { path2 => + withCleanTmpResources(Seq((s"$db1", "database"))) { + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"CREATE DATABASE $db1 LOCATION '$path1'")))( + s"does not have [create] privilege on [$db1], " + + s"[write] privilege on [[$path1, $path1/]]") + doAs(admin, sql(s"CREATE DATABASE $db1 LOCATION '$path1'")) + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"ALTER DATABASE $db1 SET LOCATION '$path2'")))( + s"does not have [alter] privilege on [$db1], " + + s"[write] privilege on [[$path2, $path2/]]") + val e = intercept[UndeclaredThrowableException]( + doAs(admin, sql(s"ALTER DATABASE $db1 SET LOCATION '$path2'"))) + assert(e.getCause.getMessage.contains("does not support altering database location")) + } + } + } + } + } + + test("AlterTableSetLocationCommand/AlterTableAddPartitionCommand") { + val db1 = defaultDb + val table1 = "table1" + val table2 = "table2" + withSingleCallEnabled { + withTempDir { path1 => + withCleanTmpResources(Seq((s"$db1.$table1", "table"), (s"$db1.$table2", "table"))) { + doAs( + admin, + sql( + s""" + |CREATE TABLE IF NOT EXISTS $db1.$table1( + |id int, + |scope int, + |day string) + |PARTITIONED BY (day) + |""".stripMargin)) + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"ALTER TABLE $db1.$table1 SET LOCATION '$path1'")))( + s"does not have [alter] privilege on [$db1/$table1], " + + s"[write] privilege on [[$path1, $path1/]]") + + withTempDir { path2 => + withTempDir { path3 => + interceptEndsWith[AccessControlException]( + doAs( + someone, + sql( + s""" + |ALTER TABLE $db1.$table1 + |ADD + |PARTITION (day='2023-01-01') LOCATION '$path2' + |PARTITION (day='2023-01-02') LOCATION '$path3' + |""".stripMargin)))( + s"does not have [alter] privilege on [$db1/$table1/day], " + + s"[write] privilege on [[$path2, $path2/],[$path3, $path3/]]") + } + } + } + } + } + } + + test("Table Command location privilege") { + val db1 = defaultDb + val table1 = "table1" + val table2 = "table2" + withSingleCallEnabled { + withTempDir { path => + withCleanTmpResources(Seq((s"$db1.$table1", "table"), (s"$db1.$table2", "table"))) { + interceptEndsWith[AccessControlException](doAs( + someone, + sql( + s""" + |CREATE TABLE IF NOT EXISTS $db1.$table1(id int, scope int) + |LOCATION '$path'""".stripMargin)))( + if (!isSparkV35OrGreater) { + s"does not have [create] privilege on [$db1/$table1], " + + s"[write] privilege on [[$path, $path/]]" + } else { + s"does not have [create] privilege on [$db1/$table1], " + + s"[write] privilege on [[file://$path, file://$path/]]" + }) + doAs( + admin, + sql( + s""" + |CREATE TABLE IF NOT EXISTS $db1.$table1(id int, scope int) + |LOCATION '$path'""".stripMargin)) + interceptEndsWith[AccessControlException]( + doAs( + someone, + sql( + s""" + |CREATE TABLE $db1.$table2 + |LIKE $db1.$table1 + |LOCATION '$path' + |""".stripMargin)))( + s"does not have [select] privilege on [$db1/$table1], " + + s"[create] privilege on [$db1/$table2], " + + s"[write] privilege on [[$path, $path/]]") + interceptEndsWith[AccessControlException]( + doAs( + someone, + sql( + s""" + |CREATE TABLE $db1.$table2 + |LOCATION '$path' + |AS + |SELECT * FROM $db1.$table1 + |""".stripMargin)))( + if (!isSparkV35OrGreater) { + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope], " + + s"[create] privilege on [$db1/$table2/id,$db1/$table2/scope], " + + s"[write] privilege on [[$path, $path/]]" + } else { + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/scope], " + + s"[create] privilege on [$db1/$table2/id,$db1/$table2/scope], " + + s"[write] privilege on [[file://$path, file://$path/]]" + }) + } + } + } + } + + test("[KYUUBI #5677][AUTHZ] Typeof expression miss column information") { + val db1 = defaultDb + val table1 = "table1" + withSingleCallEnabled { + withCleanTmpResources(Seq((s"$db1.$table1", "table"))) { + doAs( + admin, + sql( + s""" + |CREATE TABLE IF NOT EXISTS $db1.$table1( + |id int, + |scope int, + |day string) + |""".stripMargin)) + doAs(admin, sql(s"INSERT INTO $db1.$table1 SELECT 1, 2, 'TONY'")) + interceptEndsWith[AccessControlException]( + doAs( + someone, + sql(s"SELECT typeof(id), typeof(typeof(day)) FROM $db1.$table1").collect()))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/day]") + interceptEndsWith[AccessControlException]( + doAs( + someone, + sql( + s""" + |SELECT + |typeof(cast(id as string)), + |typeof(substring(day, 1, 3)) + |FROM $db1.$table1""".stripMargin).collect()))( + s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/day]") + checkAnswer( + admin, + s""" + |SELECT + |typeof(id), + |typeof(typeof(day)), + |typeof(cast(id as string)), + |typeof(substring(day, 1, 3)) + |FROM $db1.$table1""".stripMargin, + Seq(Row("int", "string", "string", "string"))) + } + } + } + + test("[KYUUBI #5692][Bug] Authz not skip explain command") { + val db1 = defaultDb + val table1 = "table1" + withSingleCallEnabled { + withCleanTmpResources(Seq((s"$db1.$table1", "table"))) { + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1 (id int, scope int)")) + val explainSql = + s""" + |EXPLAIN + |SELECT id FROM $db1.$table1 + |""".stripMargin + doAs(admin, sql(explainSql)) + val result = doAs(someone, sql(explainSql).collect()).head.getString(0) + assert(!result.contains("Error occurred during query planning")) + assert(!result.contains(s"does not have [select] privilege on [$db1/$table1/id]")) + interceptEndsWith[AccessControlException]( + doAs(someone, sql(s"SELECT id FROM $db1.$table1").collect()))( + s"does not have [select] privilege on [$db1/$table1/id]") + } + } + } + + test("[KYUUBI #5793][BUG] PVM with nested scala-subquery should not src table privilege") { + val db1 = defaultDb + val table1 = "table1" + val table2 = "table2" + val table3 = "table3" + val view1 = "perm_view" + withSingleCallEnabled { + withCleanTmpResources( + Seq( + (s"$db1.$table1", "table"), + (s"$db1.$table2", "table"), + (s"$db1.$table3", "table"), + (s"$db1.$view1", "view"))) { + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1(id int, scope int)")) + doAs( + admin, + sql( + s""" + | CREATE TABLE IF NOT EXISTS $db1.$table2( + | id int, + | name string, + | age int, + | scope int) + | """.stripMargin)) + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table3(id int, scope int)")) + doAs( + admin, + sql( + s""" + |CREATE VIEW $db1.$view1 + |AS + |SELECT id, name, max(scope) as max_scope, sum(age) sum_age + |FROM $db1.$table2 + |WHERE scope in ( + | SELECT max(scope) max_scope + | FROM $db1.$table1 + | WHERE id IN (SELECT id FROM $db1.$table3) + |) + |GROUP BY id, name + |""".stripMargin)) + + checkAnswer(permViewOnlyUser, s"SELECT * FROM $db1.$view1", Array.empty[Row]) + } + } + } + + test("[KYUUBI #5884] PVM should inherit MultiInstance and wrap with new exprId") { + val db1 = defaultDb + val table1 = "table1" + val perm_view = "perm_view" + val view1 = "view1" + val view2 = "view2" + val view3 = "view3" + withSingleCallEnabled { + withCleanTmpResources(Seq.empty) { + sql("set spark.sql.legacy.storeAnalyzedPlanForView=true") + doAs(admin, sql(s"CREATE TABLE IF NOT EXISTS $db1.$table1(id int, scope int)")) + doAs(admin, sql(s"CREATE VIEW $db1.$perm_view AS SELECT * FROM $db1.$table1")) + + doAs( + admin, + sql( + s""" + |CREATE OR REPLACE TEMPORARY VIEW $view1 AS + |SELECT * + |FROM $db1.$perm_view + |WHERE id > 10 + |""".stripMargin)) + + doAs( + admin, + sql( + s""" + |CREATE OR REPLACE TEMPORARY VIEW $view2 AS + |SELECT * + |FROM $view1 + |WHERE scope < 10 + |""".stripMargin)) + + doAs( + admin, + sql( + s""" + |CREATE OR REPLACE TEMPORARY VIEW $view3 AS + |SELECT * + |FROM $view1 + |WHERE scope is not null + |""".stripMargin)) + + interceptContains[AccessControlException]( + doAs( + someone, + sql( + s""" + |SELECT a.*, b.scope as new_scope + |FROM $view2 a + |JOIN $view3 b + |ON a.id == b.id + |""".stripMargin).collect()))(s"does not have [select] privilege on " + + s"[$db1/$perm_view/id,$db1/$perm_view/scope,$db1/$perm_view/scope,$db1/$perm_view/id]") } } } diff --git a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AuthzConfigurationCheckerSuite.scala b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/rule/AuthzConfigurationCheckerSuite.scala similarity index 92% rename from extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AuthzConfigurationCheckerSuite.scala rename to extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/rule/AuthzConfigurationCheckerSuite.scala index cd5757e545b..10fa0af9e1c 100644 --- a/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/ranger/AuthzConfigurationCheckerSuite.scala +++ b/extensions/spark/kyuubi-spark-authz/src/test/scala/org/apache/kyuubi/plugin/spark/authz/rule/AuthzConfigurationCheckerSuite.scala @@ -15,13 +15,15 @@ * limitations under the License. */ -package org.apache.kyuubi.plugin.spark.authz.ranger +package org.apache.kyuubi.plugin.spark.authz.rule import org.scalatest.BeforeAndAfterAll // scalastyle:off import org.scalatest.funsuite.AnyFunSuite import org.apache.kyuubi.plugin.spark.authz.{AccessControlException, SparkSessionProvider} +import org.apache.kyuubi.plugin.spark.authz.ranger.RuleAuthorization +import org.apache.kyuubi.plugin.spark.authz.rule.config.AuthzConfigurationChecker class AuthzConfigurationCheckerSuite extends AnyFunSuite with SparkSessionProvider with BeforeAndAfterAll { diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/KyuubiTPCDSResults.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/KyuubiTPCDSResults.scala new file mode 100644 index 00000000000..b119190091e --- /dev/null +++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/KyuubiTPCDSResults.scala @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.spark.connector.tpcds + +import java.lang.{Iterable => JIterable} +import java.lang.reflect.InvocationTargetException +import java.util.{Iterator => JIterator} + +import com.google.common.collect.AbstractIterator +import io.trino.tpcds._ +import io.trino.tpcds.`type`.{Decimal => TPCDSDecimal} +import io.trino.tpcds.row.generator.RowGenerator +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.GenericInternalRow +import org.apache.spark.sql.catalyst.util.{DateTimeUtils, RebaseDateTime} +import org.apache.spark.sql.types.{CharType, DateType, Decimal, DecimalType, IntegerType, LongType, StringType, StructType, VarcharType} +import org.apache.spark.unsafe.types.UTF8String + +import org.apache.kyuubi.spark.connector.tpcds.KyuubiResultsIterator.{FALSE_STRING, TRUE_STRING} +import org.apache.kyuubi.spark.connector.tpcds.row.KyuubiTableRows + +class KyuubiTPCDSResults( + val table: Table, + val startingRowNumber: Long, + val rowCount: Long, + val session: Session, + val schema: StructType) extends JIterable[InternalRow] { + + override def iterator: JIterator[InternalRow] = + new KyuubiResultsIterator(table, startingRowNumber, rowCount, session, schema) +} + +object KyuubiTPCDSResults { + def constructResults(table: Table, session: Session, schema: StructType): KyuubiTPCDSResults = { + val chunkBoundaries = io.trino.tpcds.Parallel.splitWork(table, session) + new KyuubiTPCDSResults( + table, + chunkBoundaries.getFirstRow(), + chunkBoundaries.getLastRow(), + session, + schema) + } +} + +class KyuubiResultsIterator( + val table: Table, + val startingRowNumber: Long, + val endingRowNumber: Long, + val session: Session, + val sparkSchema: StructType) extends AbstractIterator[InternalRow] { + private var rowNumber: Long = 0L + private var rowGenerator: RowGenerator = _ + private var parentRowGenerator: Option[RowGenerator] = None + private var childRowGenerator: Option[RowGenerator] = None + + try { + require(table != null, "table is null") + require(session != null, "session is null") + require(startingRowNumber >= 1, s"starting row number is less than 1: $startingRowNumber") + require( + endingRowNumber <= session.getScaling.getRowCount(table), + s"starting row number is greater than the total rows in $table: $endingRowNumber") + rowNumber = startingRowNumber + rowGenerator = table.getRowGeneratorClass().getDeclaredConstructor().newInstance() + parentRowGenerator = if (table.isChild()) { + Some(table.getParent().getRowGeneratorClass().getDeclaredConstructor().newInstance()) + } else None + childRowGenerator = if (table.hasChild()) { + Some(table.getChild().getRowGeneratorClass().getDeclaredConstructor().newInstance()) + } else None + } catch { + case e @ (_: NoSuchMethodException | + _: InstantiationException | + _: InvocationTargetException | + _: IllegalAccessException) => + throw new TpcdsException(e.toString()); + } + skipRowsUntilStartingRowNumber(startingRowNumber) + + private def skipRowsUntilStartingRowNumber(startingRowNumber: Long): Unit = { + rowGenerator.skipRowsUntilStartingRowNumber(startingRowNumber) + parentRowGenerator.foreach(_.skipRowsUntilStartingRowNumber(startingRowNumber)) + childRowGenerator.foreach(_.skipRowsUntilStartingRowNumber(startingRowNumber)) + } + + override protected def computeNext(): InternalRow = { + if (rowNumber > endingRowNumber) { + return endOfData + } + val result = rowGenerator.generateRowAndChildRows( + rowNumber, + session, + parentRowGenerator.orNull, + childRowGenerator.orNull) + var row: InternalRow = null + if (!result.getRowAndChildRows.isEmpty) { + row = toInternalRow(KyuubiTableRows.getValues(result.getRowAndChildRows.get(0))) + } + + if (result.shouldEndRow) { + rowStop() + rowNumber += 1 + } + if (result.getRowAndChildRows().isEmpty()) { + row = computeNext() + } + row + } + + private def rowStop(): Unit = { + rowGenerator.consumeRemainingSeedsForRow() + parentRowGenerator.foreach(_.consumeRemainingSeedsForRow()) + childRowGenerator.foreach(_.consumeRemainingSeedsForRow()) + } + + private val reusedRow = new Array[Any](sparkSchema.length) + + def toInternalRow(values: Array[Any]): InternalRow = { + var i = 0 + while (i < values.length) { + reusedRow(i) = (values(i), sparkSchema(i).dataType) match { + case (None | null, _) => null + case (Some(Options.DEFAULT_NULL_STRING), _) => null + case (Some(v: Boolean), _) => if (v) TRUE_STRING else FALSE_STRING + case (Some(v: Int), IntegerType) => v + case (Some(v: Long), IntegerType) => v.toInt + case (Some(v: Int), LongType) => v.toLong + case (Some(v: Long), LongType) => v + case (Some(v: Long), DateType) => + RebaseDateTime.rebaseJulianToGregorianDays(v.toInt) - DateTimeUtils.JULIAN_DAY_OF_EPOCH + case (Some(v), StringType) => UTF8String.fromString(v.toString) + case (Some(v), CharType(_)) => UTF8String.fromString(v.toString) + case (Some(v), VarcharType(_)) => UTF8String.fromString(v.toString) + case (Some(v: TPCDSDecimal), t: DecimalType) => + Decimal(v.getNumber, t.precision, t.scale) + case (Some(v: Int), t: DecimalType) => + val decimal = Decimal(v) + decimal.changePrecision(t.precision, t.scale) + decimal + case (Some(v), dt) => throw new IllegalArgumentException( + s"value: $v, value class: ${v.getClass.getName} type: $dt") + } + i += 1 + } + new GenericInternalRow(reusedRow) + } +} + +object KyuubiResultsIterator { + private val TRUE_STRING = UTF8String.fromString("Y") + private val FALSE_STRING = UTF8String.fromString("N") +} diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSBatchScan.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSBatchScan.scala index 291031c53c9..919e43342ac 100644 --- a/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSBatchScan.scala +++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSBatchScan.scala @@ -17,20 +17,15 @@ package org.apache.kyuubi.spark.connector.tpcds -import java.time.LocalDate -import java.time.format.DateTimeFormatter import java.util.OptionalLong -import scala.collection.JavaConverters._ - import io.trino.tpcds._ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.connector.read._ import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.UTF8String -case class TPCDSTableChuck(table: String, scale: Double, parallelism: Int, index: Int) +case class TPCDSTableChunk(table: String, scale: Double, parallelism: Int, index: Int) extends InputPartition class TPCDSBatchScan( @@ -62,10 +57,10 @@ class TPCDSBatchScan( override def readSchema: StructType = schema override def planInputPartitions: Array[InputPartition] = - (1 to parallelism).map { i => TPCDSTableChuck(table.getName, scale, parallelism, i) }.toArray + (1 to parallelism).map { i => TPCDSTableChunk(table.getName, scale, parallelism, i) }.toArray def createReaderFactory: PartitionReaderFactory = (partition: InputPartition) => { - val chuck = partition.asInstanceOf[TPCDSTableChuck] + val chuck = partition.asInstanceOf[TPCDSTableChunk] new TPCDSPartitionReader(chuck.table, chuck.scale, chuck.parallelism, chuck.index, schema) } @@ -90,32 +85,9 @@ class TPCDSPartitionReader( opt.toSession.withChunkNumber(index) } - private lazy val dateFmt: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd") - - private val reusedRow = new Array[Any](schema.length) - private val iterator = Results - .constructResults(chuckInfo.getOnlyTableToGenerate, chuckInfo) - .iterator.asScala - .map { _.get(0).asScala } // the 1st row is specific table row - .map { stringRow => - var i = 0 - while (i < stringRow.length) { - reusedRow(i) = (stringRow(i), schema(i).dataType) match { - case (null, _) => null - case (Options.DEFAULT_NULL_STRING, _) => null - case (v, IntegerType) => v.toInt - case (v, LongType) => v.toLong - case (v, DateType) => LocalDate.parse(v, dateFmt).toEpochDay.toInt - case (v, StringType) => UTF8String.fromString(v) - case (v, CharType(_)) => UTF8String.fromString(v) - case (v, VarcharType(_)) => UTF8String.fromString(v) - case (v, DecimalType()) => Decimal(v) - case (v, dt) => throw new IllegalArgumentException(s"value: $v, type: $dt") - } - i += 1 - } - InternalRow(reusedRow: _*) - } + private val iterator = KyuubiTPCDSResults + .constructResults(chuckInfo.getOnlyTableToGenerate, chuckInfo, schema) + .iterator private var currentRow: InternalRow = _ diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSConf.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSConf.scala index dbd22dc1a97..3edbfaebf22 100644 --- a/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSConf.scala +++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSConf.scala @@ -81,5 +81,5 @@ object TPCDSConf { val TPCDS_CONNECTOR_READ_CONF_PREFIX = s"$TPCDS_CONNECTOR_CONF_PREFIX.read" val MAX_PARTITION_BYTES_CONF = "maxPartitionBytes" - val MAX_PARTITION_BYTES_DEFAULT = "128m" + val MAX_PARTITION_BYTES_DEFAULT = "384m" } diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/row/KyuubiTableRows.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/row/KyuubiTableRows.scala new file mode 100644 index 00000000000..544498d6e1e --- /dev/null +++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/main/scala/org/apache/kyuubi/spark/connector/tpcds/row/KyuubiTableRows.scala @@ -0,0 +1,1549 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.spark.connector.tpcds.row + +import io.trino.tpcds.`type`.{Address, Decimal => TPCDSDecimal, Pricing} +import io.trino.tpcds.generator.CallCenterGeneratorColumn._ +import io.trino.tpcds.generator.CatalogPageGeneratorColumn._ +import io.trino.tpcds.generator.CatalogReturnsGeneratorColumn._ +import io.trino.tpcds.generator.CatalogSalesGeneratorColumn._ +import io.trino.tpcds.generator.CustomerAddressGeneratorColumn._ +import io.trino.tpcds.generator.CustomerDemographicsGeneratorColumn._ +import io.trino.tpcds.generator.CustomerGeneratorColumn._ +import io.trino.tpcds.generator.DateDimGeneratorColumn._ +import io.trino.tpcds.generator.DbgenVersionGeneratorColumn._ +import io.trino.tpcds.generator.GeneratorColumn +import io.trino.tpcds.generator.HouseholdDemographicsGeneratorColumn._ +import io.trino.tpcds.generator.IncomeBandGeneratorColumn._ +import io.trino.tpcds.generator.InventoryGeneratorColumn._ +import io.trino.tpcds.generator.ItemGeneratorColumn._ +import io.trino.tpcds.generator.PromotionGeneratorColumn._ +import io.trino.tpcds.generator.ReasonGeneratorColumn._ +import io.trino.tpcds.generator.ShipModeGeneratorColumn._ +import io.trino.tpcds.generator.StoreGeneratorColumn._ +import io.trino.tpcds.generator.StoreReturnsGeneratorColumn._ +import io.trino.tpcds.generator.StoreSalesGeneratorColumn._ +import io.trino.tpcds.generator.TimeDimGeneratorColumn._ +import io.trino.tpcds.generator.WarehouseGeneratorColumn._ +import io.trino.tpcds.generator.WebPageGeneratorColumn._ +import io.trino.tpcds.generator.WebReturnsGeneratorColumn._ +import io.trino.tpcds.generator.WebSalesGeneratorColumn._ +import io.trino.tpcds.generator.WebSiteGeneratorColumn._ +import io.trino.tpcds.row.{CallCenterRow, CatalogPageRow, CatalogReturnsRow, CatalogSalesRow, CustomerAddressRow, CustomerDemographicsRow, CustomerRow, DateDimRow, DbgenVersionRow, HouseholdDemographicsRow, IncomeBandRow, InventoryRow, ItemRow, PromotionRow, ReasonRow, ShipModeRow, StoreReturnsRow, StoreRow, StoreSalesRow, TableRow, TableRowWithNulls, TimeDimRow, WarehouseRow, WebPageRow, WebReturnsRow, WebSalesRow, WebSiteRow} + +import org.apache.kyuubi.spark.connector.tpcds.row.KyuubiTPCDSTableRowWithNullsUtils._ +import org.apache.kyuubi.util.reflect.{DynFields, DynMethods} + +object KyuubiTableRows { + + implicit class StoreRowImplicits(storeRow: StoreRow) { + def getStoreSk: Long = StoreRowImplicits.storeSk.get(storeRow) + def getStoreId: String = StoreRowImplicits.storeId.get(storeRow) + def getRecStartDateId: Long = StoreRowImplicits.recStartDateId.get(storeRow) + def getRecEndDateId: Long = StoreRowImplicits.recEndDateId.get(storeRow) + def getClosedDateId: Long = StoreRowImplicits.closedDateId.get(storeRow) + def getStoreName: String = StoreRowImplicits.storeName.get(storeRow) + def getEmployees: Int = StoreRowImplicits.employees.get(storeRow) + def getFloorSpace: Int = StoreRowImplicits.floorSpace.get(storeRow) + def getHours: String = StoreRowImplicits.hours.get(storeRow) + def getStoreManager: String = StoreRowImplicits.storeManager.get(storeRow) + def getMarketId: Int = StoreRowImplicits.marketId.get(storeRow) + def getDTaxPercentage: TPCDSDecimal = StoreRowImplicits.dTaxPercentage.get(storeRow) + def getGeographyClass: String = StoreRowImplicits.geographyClass.get(storeRow) + def getMarketDesc: String = StoreRowImplicits.marketDesc.get(storeRow) + def getMarketManager: String = StoreRowImplicits.marketManager.get(storeRow) + def getDivisionId: Long = StoreRowImplicits.divisionId.get(storeRow) + def getDivisionName: String = StoreRowImplicits.divisionName.get(storeRow) + def getCompanyId: Long = StoreRowImplicits.companyId.get(storeRow) + def getCompanyName: String = StoreRowImplicits.companyName.get(storeRow) + def getAddress: Address = StoreRowImplicits.address.get(storeRow) + } + object StoreRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[StoreRow], field) + .buildChecked[T]() + + lazy val storeSk = invoke[Long]("storeSk") + lazy val storeId = invoke[String]("storeId") + lazy val recStartDateId = invoke[Long]("recStartDateId") + lazy val recEndDateId = invoke[Long]("recEndDateId") + lazy val closedDateId = invoke[Long]("closedDateId") + lazy val storeName = invoke[String]("storeName") + lazy val employees = invoke[Int]("employees") + lazy val floorSpace = invoke[Int]("floorSpace") + lazy val hours = invoke[String]("hours") + lazy val storeManager = invoke[String]("storeManager") + lazy val marketId = invoke[Int]("marketId") + lazy val dTaxPercentage = invoke[TPCDSDecimal]("dTaxPercentage") + lazy val geographyClass = invoke[String]("geographyClass") + lazy val marketDesc = invoke[String]("marketDesc") + lazy val marketManager = invoke[String]("marketManager") + lazy val divisionId = invoke[Long]("divisionId") + lazy val divisionName = invoke[String]("divisionName") + lazy val companyId = invoke[Long]("companyId") + lazy val companyName = invoke[String]("companyName") + lazy val address = invoke[Address]("address") + + def values(row: StoreRow): Array[Any] = Array( + getOrNullForKey(row, row.getStoreSk, W_STORE_SK), + getOrNull(row, row.getStoreId, W_STORE_ID), + getDateOrNullFromJulianDays(row, row.getRecStartDateId, W_STORE_REC_START_DATE_ID), + getDateOrNullFromJulianDays(row, row.getRecEndDateId, W_STORE_REC_END_DATE_ID), + getOrNullForKey(row, row.getClosedDateId, W_STORE_CLOSED_DATE_ID), + getOrNull(row, row.getStoreName, W_STORE_NAME), + getOrNull(row, row.getEmployees, W_STORE_EMPLOYEES), + getOrNull(row, row.getFloorSpace, W_STORE_FLOOR_SPACE), + getOrNull(row, row.getHours, W_STORE_HOURS), + getOrNull(row, row.getStoreManager, W_STORE_MANAGER), + getOrNull(row, row.getMarketId, W_STORE_MARKET_ID), + getOrNull(row, row.getGeographyClass, W_STORE_GEOGRAPHY_CLASS), + getOrNull(row, row.getMarketDesc, W_STORE_MARKET_DESC), + getOrNull(row, row.getMarketManager, W_STORE_MARKET_MANAGER), + getOrNullForKey(row, row.getDivisionId, W_STORE_DIVISION_ID), + getOrNull(row, row.getDivisionName, W_STORE_DIVISION_NAME), + getOrNullForKey(row, row.getCompanyId, W_STORE_COMPANY_ID), + getOrNull(row, row.getCompanyName, W_STORE_COMPANY_NAME), + getOrNull(row, row.getAddress.getStreetNumber, W_STORE_ADDRESS_STREET_NUM), + getOrNull(row, row.getAddress.getStreetName, W_STORE_ADDRESS_STREET_NAME1), + getOrNull(row, row.getAddress.getStreetType, W_STORE_ADDRESS_STREET_TYPE), + getOrNull(row, row.getAddress.getSuiteNumber, W_STORE_ADDRESS_SUITE_NUM), + getOrNull(row, row.getAddress.getCity, W_STORE_ADDRESS_CITY), + getOrNull(row, row.getAddress.getCounty, W_STORE_ADDRESS_COUNTY), + getOrNull(row, row.getAddress.getState, W_STORE_ADDRESS_STATE), + getOrNull( + row, + java.lang.String.format("%05d", row.getAddress.getZip.asInstanceOf[Object]), + W_STORE_ADDRESS_ZIP), + getOrNull(row, row.getAddress.getCountry, W_STORE_ADDRESS_COUNTRY), + getOrNull(row, row.getAddress.getGmtOffset, W_STORE_ADDRESS_GMT_OFFSET), + getOrNull(row, row.getDTaxPercentage, W_STORE_TAX_PERCENTAGE)) + } + + implicit class ReasonRowImplicits(reasonRow: ReasonRow) { + def getRReasonSk: Long = ReasonRowImplicits.rReasonSk.get(reasonRow) + def getRReasonId: String = ReasonRowImplicits.rReasonId.get(reasonRow) + def getRReasonDescription: String = ReasonRowImplicits.rReasonDescription.get(reasonRow) + } + + object ReasonRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[ReasonRow], field) + .buildChecked[T]() + lazy val rReasonSk = invoke[Long]("rReasonSk") + lazy val rReasonId = invoke[String]("rReasonId") + lazy val rReasonDescription = invoke[String]("rReasonDescription") + + def values(row: ReasonRow): Array[Any] = Array( + getOrNullForKey(row, row.getRReasonSk, R_REASON_SK), + getOrNull(row, row.getRReasonId, R_REASON_ID), + getOrNull(row, row.getRReasonDescription, R_REASON_DESCRIPTION)) + } + + implicit class DbgenVersionRowImplicits(dbgenVersionRow: DbgenVersionRow) { + def getDvVersion: String = DbgenVersionRowImplicits.dvVersion.get(dbgenVersionRow) + def getDvCreateDate: String = DbgenVersionRowImplicits.dvCreateDate.get(dbgenVersionRow) + def getDvCreateTime: String = DbgenVersionRowImplicits.dvCreateTime.get(dbgenVersionRow) + def getDvCmdlineArgs: String = DbgenVersionRowImplicits.dvCmdlineArgs.get(dbgenVersionRow) + } + + object DbgenVersionRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[DbgenVersionRow], field) + .buildChecked[T]() + + lazy val dvVersion = invoke[String]("dvVersion") + lazy val dvCreateDate = invoke[String]("dvCreateDate") + lazy val dvCreateTime = invoke[String]("dvCreateTime") + lazy val dvCmdlineArgs = invoke[String]("dvCmdlineArgs") + + def values(row: DbgenVersionRow): Array[Any] = Array( + getOrNull(row, row.getDvVersion, DV_VERSION), + getOrNull(row, row.getDvCreateDate, DV_CREATE_DATE), + getOrNull(row, row.getDvCreateTime, DV_CREATE_TIME), + getOrNull(row, row.getDvCmdlineArgs, DV_CMDLINE_ARGS)) + } + + implicit class ShipModeRowImplicits(shipModeRow: ShipModeRow) { + def getSmShipModeSk: Long = ShipModeRowImplicits.smShipModeSk.get(shipModeRow) + def getSmShipModeId: String = ShipModeRowImplicits.smShipModeId.get(shipModeRow) + def getSmType: String = ShipModeRowImplicits.smType.get(shipModeRow) + def getSmCode: String = ShipModeRowImplicits.smCode.get(shipModeRow) + def getSmCarrier: String = ShipModeRowImplicits.smCarrier.get(shipModeRow) + def getSmContract: String = ShipModeRowImplicits.smContract.get(shipModeRow) + } + + object ShipModeRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[ShipModeRow], field) + .buildChecked[T]() + + lazy val smShipModeSk = invoke[Long]("smShipModeSk") + lazy val smShipModeId = invoke[String]("smShipModeId") + lazy val smType = invoke[String]("smType") + lazy val smCode = invoke[String]("smCode") + lazy val smCarrier = invoke[String]("smCarrier") + lazy val smContract = invoke[String]("smContract") + + def values(row: ShipModeRow): Array[Any] = Array( + getOrNullForKey(row, row.getSmShipModeSk, SM_SHIP_MODE_SK), + getOrNull(row, row.getSmShipModeId, SM_SHIP_MODE_ID), + getOrNull(row, row.getSmType, SM_TYPE), + getOrNull(row, row.getSmCode, SM_CODE), + getOrNull(row, row.getSmCarrier, SM_CARRIER), + getOrNull(row, row.getSmContract, SM_CONTRACT)) + } + + implicit class IncomeBandRowImplicits(incomeBandRow: IncomeBandRow) { + def getIbIncomeBandId: Int = IncomeBandRowImplicits.ibIncomeBandId.get(incomeBandRow) + def getIbLowerBound: Int = IncomeBandRowImplicits.ibLowerBound.get(incomeBandRow) + def getIbUpperBound: Int = IncomeBandRowImplicits.ibUpperBound.get(incomeBandRow) + } + + object IncomeBandRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[IncomeBandRow], field) + .buildChecked[T]() + + lazy val ibIncomeBandId = invoke[Int]("ibIncomeBandId") + lazy val ibLowerBound = invoke[Int]("ibLowerBound") + lazy val ibUpperBound = invoke[Int]("ibUpperBound") + + def values(row: IncomeBandRow): Array[Any] = Array( + getOrNull(row, row.getIbIncomeBandId, IB_INCOME_BAND_ID), + getOrNull(row, row.getIbLowerBound, IB_LOWER_BOUND), + getOrNull(row, row.getIbUpperBound, IB_UPPER_BOUND)) + } + + implicit class ItemRowImplicits(itemRow: ItemRow) { + def getIItemSk: Long = ItemRowImplicits.iItemSk.get(itemRow) + def getIItemId: String = ItemRowImplicits.iItemId.get(itemRow) + def getIRecStartDateId: Long = ItemRowImplicits.iRecStartDateId.get(itemRow) + def getIRecEndDateId: Long = ItemRowImplicits.iRecEndDateId.get(itemRow) + def getIItemDesc: String = ItemRowImplicits.iItemDesc.get(itemRow) + def getICurrentPrice: TPCDSDecimal = ItemRowImplicits.iCurrentPrice.get(itemRow) + def getIWholesaleCost: TPCDSDecimal = ItemRowImplicits.iWholesaleCost.get(itemRow) + def getIBrandId: Long = ItemRowImplicits.iBrandId.get(itemRow) + def getIBrand: String = ItemRowImplicits.iBrand.get(itemRow) + def getIClassId: Long = ItemRowImplicits.iClassId.get(itemRow) + def getIClass: String = ItemRowImplicits.iClass.get(itemRow) + def getICategoryId: Long = ItemRowImplicits.iCategoryId.get(itemRow) + def getICategory: String = ItemRowImplicits.iCategory.get(itemRow) + def getIManufactId: Long = ItemRowImplicits.iManufactId.get(itemRow) + def getIManufact: String = ItemRowImplicits.iManufact.get(itemRow) + def getISize: String = ItemRowImplicits.iSize.get(itemRow) + def getIFormulation: String = ItemRowImplicits.iFormulation.get(itemRow) + def getIColor: String = ItemRowImplicits.iColor.get(itemRow) + def getIUnits: String = ItemRowImplicits.iUnits.get(itemRow) + def getIContainer: String = ItemRowImplicits.iContainer.get(itemRow) + def getIManagerId: Long = ItemRowImplicits.iManagerId.get(itemRow) + def getIProductName: String = ItemRowImplicits.iProductName.get(itemRow) + def getIPromoSk: Long = ItemRowImplicits.iPromoSk.get(itemRow) + } + + object ItemRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[ItemRow], field) + .buildChecked[T]() + + lazy val iItemSk = invoke[Long]("iItemSk") + lazy val iItemId = invoke[String]("iItemId") + lazy val iRecStartDateId = invoke[Long]("iRecStartDateId") + lazy val iRecEndDateId = invoke[Long]("iRecEndDateId") + lazy val iItemDesc = invoke[String]("iItemDesc") + lazy val iCurrentPrice = invoke[TPCDSDecimal]("iCurrentPrice") + lazy val iWholesaleCost = invoke[TPCDSDecimal]("iWholesaleCost") + lazy val iBrandId = invoke[Long]("iBrandId") + lazy val iBrand = invoke[String]("iBrand") + lazy val iClassId = invoke[Long]("iClassId") + lazy val iClass = invoke[String]("iClass") + lazy val iCategoryId = invoke[Long]("iCategoryId") + lazy val iCategory = invoke[String]("iCategory") + lazy val iManufactId = invoke[Long]("iManufactId") + lazy val iManufact = invoke[String]("iManufact") + lazy val iSize = invoke[String]("iSize") + lazy val iFormulation = invoke[String]("iFormulation") + lazy val iColor = invoke[String]("iColor") + lazy val iUnits = invoke[String]("iUnits") + lazy val iContainer = invoke[String]("iContainer") + lazy val iManagerId = invoke[Long]("iManagerId") + lazy val iProductName = invoke[String]("iProductName") + lazy val iPromoSk = invoke[Long]("iPromoSk") + + def values(row: ItemRow): Array[Any] = Array( + getOrNullForKey(row, row.getIItemSk, I_ITEM_SK), + getOrNull(row, row.getIItemId, I_ITEM_ID), + getDateOrNullFromJulianDays(row, row.getIRecStartDateId, I_REC_START_DATE_ID), + getDateOrNullFromJulianDays(row, row.getIRecEndDateId, I_REC_END_DATE_ID), + getOrNull(row, row.getIItemDesc, I_ITEM_DESC), + getOrNull(row, row.getICurrentPrice, I_CURRENT_PRICE), + getOrNull(row, row.getIWholesaleCost, I_WHOLESALE_COST), + getOrNullForKey(row, row.getIBrandId, I_BRAND_ID), + getOrNull(row, row.getIBrand, I_BRAND), + getOrNullForKey(row, row.getIClassId, I_CLASS_ID), + getOrNull(row, row.getIClass, I_CLASS), + getOrNullForKey(row, row.getICategoryId, I_CATEGORY_ID), + getOrNull(row, row.getICategory, I_CATEGORY), + getOrNullForKey(row, row.getIManufactId, I_MANUFACT_ID), + getOrNull(row, row.getIManufact, I_MANUFACT), + getOrNull(row, row.getISize, I_SIZE), + getOrNull(row, row.getIFormulation, I_FORMULATION), + getOrNull(row, row.getIColor, I_COLOR), + getOrNull(row, row.getIUnits, I_UNITS), + getOrNull(row, row.getIContainer, I_CONTAINER), + getOrNullForKey(row, row.getIManagerId, I_MANAGER_ID), + getOrNull(row, row.getIProductName, I_PRODUCT_NAME)) + } + + implicit class CustomerDemographicsRowImplicits( + customerDemographicsRow: CustomerDemographicsRow) { + def getCdDemoSk: Long = CustomerDemographicsRowImplicits.cdDemoSk.get(customerDemographicsRow) + def getCdGender: String = CustomerDemographicsRowImplicits.cdGender.get(customerDemographicsRow) + def getCdMaritalStatus: String = + CustomerDemographicsRowImplicits.cdMaritalStatus.get(customerDemographicsRow) + def getCdEducationStatus: String = + CustomerDemographicsRowImplicits.cdEducationStatus.get(customerDemographicsRow) + def getCdPurchaseEstimate: Int = + CustomerDemographicsRowImplicits.cdPurchaseEstimate.get(customerDemographicsRow) + def getCdCreditRating: String = + CustomerDemographicsRowImplicits.cdCreditRating.get(customerDemographicsRow) + def getCdDepCount: Int = + CustomerDemographicsRowImplicits.cdDepCount.get(customerDemographicsRow) + def getCdDepEmployedCount: Int = + CustomerDemographicsRowImplicits.cdDepEmployedCount.get(customerDemographicsRow) + def getCdDepCollegeCount: Int = + CustomerDemographicsRowImplicits.cdDepCollegeCount.get(customerDemographicsRow) + } + + object CustomerDemographicsRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[CustomerDemographicsRow], field) + .buildChecked[T]() + + lazy val cdDemoSk = invoke[Long]("cdDemoSk") + lazy val cdGender = invoke[String]("cdGender") + lazy val cdMaritalStatus = invoke[String]("cdMaritalStatus") + lazy val cdEducationStatus = invoke[String]("cdEducationStatus") + lazy val cdPurchaseEstimate = invoke[Int]("cdPurchaseEstimate") + lazy val cdCreditRating = invoke[String]("cdCreditRating") + lazy val cdDepCount = invoke[Int]("cdDepCount") + lazy val cdDepEmployedCount = invoke[Int]("cdDepEmployedCount") + lazy val cdDepCollegeCount = invoke[Int]("cdDepCollegeCount") + + def values(row: CustomerDemographicsRow): Array[Any] = Array( + getOrNullForKey(row, row.getCdDemoSk, CD_DEMO_SK), + getOrNull(row, row.getCdGender, CD_GENDER), + getOrNull(row, row.getCdMaritalStatus, CD_MARITAL_STATUS), + getOrNull(row, row.getCdEducationStatus, CD_EDUCATION_STATUS), + getOrNull(row, row.getCdPurchaseEstimate, CD_PURCHASE_ESTIMATE), + getOrNull(row, row.getCdCreditRating, CD_CREDIT_RATING), + getOrNull(row, row.getCdDepCount, CD_DEP_COUNT), + getOrNull(row, row.getCdDepEmployedCount, CD_DEP_EMPLOYED_COUNT), + getOrNull(row, row.getCdDepCollegeCount, CD_DEP_COLLEGE_COUNT)) + } + + implicit class TimeDimRowImplicits(timeDimRow: TimeDimRow) { + def getTTimeSk: Long = TimeDimRowImplicits.tTimeSk.get(timeDimRow) + def getTTimeId: String = TimeDimRowImplicits.tTimeId.get(timeDimRow) + def getTTime: Int = TimeDimRowImplicits.tTime.get(timeDimRow) + def getTHour: Int = TimeDimRowImplicits.tHour.get(timeDimRow) + def getTMinute: Int = TimeDimRowImplicits.tMinute.get(timeDimRow) + def getTSecond: Int = TimeDimRowImplicits.tSecond.get(timeDimRow) + def getTAmPm: String = TimeDimRowImplicits.tAmPm.get(timeDimRow) + def getTShift: String = TimeDimRowImplicits.tShift.get(timeDimRow) + def getTSubShift: String = TimeDimRowImplicits.tSubShift.get(timeDimRow) + def getTMealTime: String = TimeDimRowImplicits.tMealTime.get(timeDimRow) + } + + object TimeDimRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[TimeDimRow], field) + .buildChecked[T]() + + lazy val tTimeSk = invoke[Long]("tTimeSk") + lazy val tTimeId = invoke[String]("tTimeId") + lazy val tTime = invoke[Int]("tTime") + lazy val tHour = invoke[Int]("tHour") + lazy val tMinute = invoke[Int]("tMinute") + lazy val tSecond = invoke[Int]("tSecond") + lazy val tAmPm = invoke[String]("tAmPm") + lazy val tShift = invoke[String]("tShift") + lazy val tSubShift = invoke[String]("tSubShift") + lazy val tMealTime = invoke[String]("tMealTime") + + def values(row: TimeDimRow): Array[Any] = Array( + getOrNullForKey(row, row.getTTimeSk, T_TIME_SK), + getOrNull(row, row.getTTimeId, T_TIME_ID), + getOrNull(row, row.getTTime, T_TIME), + getOrNull(row, row.getTHour, T_HOUR), + getOrNull(row, row.getTMinute, T_MINUTE), + getOrNull(row, row.getTSecond, T_SECOND), + getOrNull(row, row.getTAmPm, T_AM_PM), + getOrNull(row, row.getTShift, T_SHIFT), + getOrNull(row, row.getTSubShift, T_SUB_SHIFT), + getOrNull(row, row.getTMealTime, T_MEAL_TIME)) + } + + implicit class WebSiteRowImplicits(webSiteRow: WebSiteRow) { + def getWebSiteSk: Long = WebSiteRowImplicits.webSiteSk.get(webSiteRow) + def getWebSiteId: String = WebSiteRowImplicits.webSiteId.get(webSiteRow) + def getWebRecStartDateId: Long = WebSiteRowImplicits.webRecStartDateId.get(webSiteRow) + def getWebRecEndDateId: Long = WebSiteRowImplicits.webRecEndDateId.get(webSiteRow) + def getWebName: String = WebSiteRowImplicits.webName.get(webSiteRow) + def getWebOpenDate: Long = WebSiteRowImplicits.webOpenDate.get(webSiteRow) + def getWebCloseDate: Long = WebSiteRowImplicits.webCloseDate.get(webSiteRow) + def getWebClass: String = WebSiteRowImplicits.webClass.get(webSiteRow) + def getWebManager: String = WebSiteRowImplicits.webManager.get(webSiteRow) + def getWebMarketId: Int = WebSiteRowImplicits.webMarketId.get(webSiteRow) + def getWebMarketClass: String = WebSiteRowImplicits.webMarketClass.get(webSiteRow) + def getWebMarketDesc: String = WebSiteRowImplicits.webMarketDesc.get(webSiteRow) + def getWebMarketManager: String = WebSiteRowImplicits.webMarketManager.get(webSiteRow) + def getWebCompanyId: Int = WebSiteRowImplicits.webCompanyId.get(webSiteRow) + def getWebCompanyName: String = WebSiteRowImplicits.webCompanyName.get(webSiteRow) + def getWebAddress: Address = WebSiteRowImplicits.webAddress.get(webSiteRow) + def getWebTaxPercentage: TPCDSDecimal = WebSiteRowImplicits.webTaxPercentage.get(webSiteRow) + } + + object WebSiteRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[WebSiteRow], field) + .buildChecked[T]() + + lazy val webSiteSk = invoke[Long]("webSiteSk") + lazy val webSiteId = invoke[String]("webSiteId") + lazy val webRecStartDateId = invoke[Long]("webRecStartDateId") + lazy val webRecEndDateId = invoke[Long]("webRecEndDateId") + lazy val webName = invoke[String]("webName") + lazy val webOpenDate = invoke[Long]("webOpenDate") + lazy val webCloseDate = invoke[Long]("webCloseDate") + lazy val webClass = invoke[String]("webClass") + lazy val webManager = invoke[String]("webManager") + lazy val webMarketId = invoke[Int]("webMarketId") + lazy val webMarketClass = invoke[String]("webMarketClass") + lazy val webMarketDesc = invoke[String]("webMarketDesc") + lazy val webMarketManager = invoke[String]("webMarketManager") + lazy val webCompanyId = invoke[Int]("webCompanyId") + lazy val webCompanyName = invoke[String]("webCompanyName") + lazy val webAddress = invoke[Address]("webAddress") + lazy val webTaxPercentage = invoke[TPCDSDecimal]("webTaxPercentage") + + def values(row: WebSiteRow): Array[Any] = Array( + getOrNullForKey(row, row.getWebSiteSk, WEB_SITE_SK), + getOrNull(row, row.getWebSiteId, WEB_SITE_ID), + getDateOrNullFromJulianDays(row, row.getWebRecStartDateId, WEB_REC_START_DATE_ID), + getDateOrNullFromJulianDays(row, row.getWebRecEndDateId, WEB_REC_END_DATE_ID), + getOrNull(row, row.getWebName, WEB_NAME), + getOrNullForKey(row, row.getWebOpenDate, WEB_OPEN_DATE), + getOrNullForKey(row, row.getWebCloseDate, WEB_CLOSE_DATE), + getOrNull(row, row.getWebClass, WEB_CLASS), + getOrNull(row, row.getWebManager, WEB_MANAGER), + getOrNull(row, row.getWebMarketId, WEB_MARKET_ID), + getOrNull(row, row.getWebMarketClass, WEB_MARKET_CLASS), + getOrNull(row, row.getWebMarketDesc, WEB_MARKET_DESC), + getOrNull(row, row.getWebMarketManager, WEB_MARKET_MANAGER), + getOrNull(row, row.getWebCompanyId, WEB_COMPANY_ID), + getOrNull(row, row.getWebCompanyName, WEB_COMPANY_NAME), + getOrNull(row, row.getWebAddress.getStreetNumber(), WEB_ADDRESS_STREET_NUM), + getOrNull(row, row.getWebAddress.getStreetName(), WEB_ADDRESS_STREET_NAME1), + getOrNull(row, row.getWebAddress.getStreetType(), WEB_ADDRESS_STREET_TYPE), + getOrNull(row, row.getWebAddress.getSuiteNumber(), WEB_ADDRESS_SUITE_NUM), + getOrNull(row, row.getWebAddress.getCity(), WEB_ADDRESS_CITY), + getOrNull(row, row.getWebAddress.getCounty(), WEB_ADDRESS_COUNTY), + getOrNull(row, row.getWebAddress.getState(), WEB_ADDRESS_STATE), + getOrNull( + row, + java.lang.String.format("%05d", row.getWebAddress.getZip().asInstanceOf[Object]), + WEB_ADDRESS_ZIP), + getOrNull(row, row.getWebAddress.getCountry(), WEB_ADDRESS_COUNTRY), + getOrNull(row, row.getWebAddress.getGmtOffset(), WEB_ADDRESS_GMT_OFFSET), + getOrNull(row, row.getWebTaxPercentage, WEB_TAX_PERCENTAGE)) + } + + implicit class HouseholdDemographicsRowImplicits( + householdDemographicsRow: HouseholdDemographicsRow) { + def getHdDemoSk: Long = HouseholdDemographicsRowImplicits.hdDemoSk.get(householdDemographicsRow) + def getHdIncomeBandId: Long = + HouseholdDemographicsRowImplicits.hdIncomeBandId.get(householdDemographicsRow) + def getHdBuyPotential: String = + HouseholdDemographicsRowImplicits.hdBuyPotential.get(householdDemographicsRow) + def getHdDepCount: Int = + HouseholdDemographicsRowImplicits.hdDepCount.get(householdDemographicsRow) + def getHdVehicleCount: Int = + HouseholdDemographicsRowImplicits.hdVehicleCount.get(householdDemographicsRow) + } + + object HouseholdDemographicsRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[HouseholdDemographicsRow], field) + .buildChecked[T]() + + lazy val hdDemoSk = invoke[Long]("hdDemoSk") + lazy val hdIncomeBandId = invoke[Long]("hdIncomeBandId") + lazy val hdBuyPotential = invoke[String]("hdBuyPotential") + lazy val hdDepCount = invoke[Int]("hdDepCount") + lazy val hdVehicleCount = invoke[Int]("hdVehicleCount") + + def values(row: HouseholdDemographicsRow): Array[Any] = Array( + getOrNullForKey(row, row.getHdDemoSk, HD_DEMO_SK), + getOrNullForKey(row, row.getHdIncomeBandId, HD_INCOME_BAND_ID), + getOrNull(row, row.getHdBuyPotential, HD_BUY_POTENTIAL), + getOrNull(row, row.getHdDepCount, HD_DEP_COUNT), + getOrNull(row, row.getHdVehicleCount, HD_VEHICLE_COUNT)) + } + + implicit class PromotionRowImplicits(promotionRow: PromotionRow) { + def getPPromoSk: Long = PromotionRowImplicits.pPromoSk.get(promotionRow) + def getPPromoId: String = PromotionRowImplicits.pPromoId.get(promotionRow) + def getPStartDateId: Long = PromotionRowImplicits.pStartDateId.get(promotionRow) + def getPEndDateId: Long = PromotionRowImplicits.pEndDateId.get(promotionRow) + def getPItemSk: Long = PromotionRowImplicits.pItemSk.get(promotionRow) + def getPCost: TPCDSDecimal = PromotionRowImplicits.pCost.get(promotionRow) + def getPResponseTarget: Int = PromotionRowImplicits.pResponseTarget.get(promotionRow) + def getPPromoName: String = PromotionRowImplicits.pPromoName.get(promotionRow) + def isPChannelDmail: Boolean = PromotionRowImplicits.pChannelDmail.get(promotionRow) + def isPChannelEmail: Boolean = PromotionRowImplicits.pChannelEmail.get(promotionRow) + def isPChannelCatalog: Boolean = PromotionRowImplicits.pChannelCatalog.get(promotionRow) + def isPChannelTv: Boolean = PromotionRowImplicits.pChannelTv.get(promotionRow) + def isPChannelRadio: Boolean = PromotionRowImplicits.pChannelRadio.get(promotionRow) + def isPChannelPress: Boolean = PromotionRowImplicits.pChannelPress.get(promotionRow) + def isPChannelEvent: Boolean = PromotionRowImplicits.pChannelEvent.get(promotionRow) + def isPChannelDemo: Boolean = PromotionRowImplicits.pChannelDemo.get(promotionRow) + def getPChannelDetails: String = PromotionRowImplicits.pChannelDetails.get(promotionRow) + def getPPurpose: String = PromotionRowImplicits.pPurpose.get(promotionRow) + def isPDiscountActive: Boolean = PromotionRowImplicits.pDiscountActive.get(promotionRow) + } + + object PromotionRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[PromotionRow], field) + .buildChecked[T]() + + lazy val pPromoSk = invoke[Long]("pPromoSk") + lazy val pPromoId = invoke[String]("pPromoId") + lazy val pStartDateId = invoke[Long]("pStartDateId") + lazy val pEndDateId = invoke[Long]("pEndDateId") + lazy val pItemSk = invoke[Long]("pItemSk") + lazy val pCost = invoke[TPCDSDecimal]("pCost") + lazy val pResponseTarget = invoke[Int]("pResponseTarget") + lazy val pPromoName = invoke[String]("pPromoName") + lazy val pChannelDmail = invoke[Boolean]("pChannelDmail") + lazy val pChannelEmail = invoke[Boolean]("pChannelEmail") + lazy val pChannelCatalog = invoke[Boolean]("pChannelCatalog") + lazy val pChannelTv = invoke[Boolean]("pChannelTv") + lazy val pChannelRadio = invoke[Boolean]("pChannelRadio") + lazy val pChannelPress = invoke[Boolean]("pChannelPress") + lazy val pChannelEvent = invoke[Boolean]("pChannelEvent") + lazy val pChannelDemo = invoke[Boolean]("pChannelDemo") + lazy val pChannelDetails = invoke[String]("pChannelDetails") + lazy val pPurpose = invoke[String]("pPurpose") + lazy val pDiscountActive = invoke[Boolean]("pDiscountActive") + + def values(row: PromotionRow): Array[Any] = Array( + getOrNullForKey(row, row.getPPromoSk, P_PROMO_SK), + getOrNull(row, row.getPPromoId, P_PROMO_ID), + getOrNullForKey(row, row.getPStartDateId, P_START_DATE_ID), + getOrNullForKey(row, row.getPEndDateId, P_END_DATE_ID), + getOrNullForKey(row, row.getPItemSk, P_ITEM_SK), + getOrNull(row, row.getPCost, P_COST), + getOrNull(row, row.getPResponseTarget, P_RESPONSE_TARGET), + getOrNull(row, row.getPPromoName, P_PROMO_NAME), + getOrNullForBoolean(row, row.isPChannelDmail, P_CHANNEL_DMAIL), + getOrNullForBoolean(row, row.isPChannelEmail, P_CHANNEL_EMAIL), + getOrNullForBoolean(row, row.isPChannelCatalog, P_CHANNEL_CATALOG), + getOrNullForBoolean(row, row.isPChannelTv, P_CHANNEL_TV), + getOrNullForBoolean(row, row.isPChannelRadio, P_CHANNEL_RADIO), + getOrNullForBoolean(row, row.isPChannelPress, P_CHANNEL_PRESS), + getOrNullForBoolean(row, row.isPChannelEvent, P_CHANNEL_EVENT), + getOrNullForBoolean(row, row.isPChannelDemo, P_CHANNEL_DEMO), + getOrNull(row, row.getPChannelDetails, P_CHANNEL_DETAILS), + getOrNull(row, row.getPPurpose, P_PURPOSE), + getOrNullForBoolean(row, row.isPDiscountActive, P_DISCOUNT_ACTIVE)) + } + + implicit class CatalogPageRowImplicits(catalogPageRow: CatalogPageRow) { + def getCpCatalogPageSk: Long = CatalogPageRowImplicits.cpCatalogPageSk.get(catalogPageRow) + def getCpCatalogPageId: String = CatalogPageRowImplicits.cpCatalogPageId.get(catalogPageRow) + def getCpStartDateId: Long = CatalogPageRowImplicits.cpStartDateId.get(catalogPageRow) + def getCpEndDateId: Long = CatalogPageRowImplicits.cpEndDateId.get(catalogPageRow) + def getCpDepartment: String = CatalogPageRowImplicits.cpDepartment.get(catalogPageRow) + def getCpCatalogNumber: Int = CatalogPageRowImplicits.cpCatalogNumber.get(catalogPageRow) + def getCpCatalogPageNumber: Int = + CatalogPageRowImplicits.cpCatalogPageNumber.get(catalogPageRow) + def getCpDescription: String = CatalogPageRowImplicits.cpDescription.get(catalogPageRow) + def getCpType: String = CatalogPageRowImplicits.cpType.get(catalogPageRow) + } + + object CatalogPageRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[CatalogPageRow], field) + .buildChecked[T]() + + lazy val cpCatalogPageSk = invoke[Long]("cpCatalogPageSk") + lazy val cpCatalogPageId = invoke[String]("cpCatalogPageId") + lazy val cpStartDateId = invoke[Long]("cpStartDateId") + lazy val cpEndDateId = invoke[Long]("cpEndDateId") + lazy val cpDepartment = invoke[String]("cpDepartment") + lazy val cpCatalogNumber = invoke[Int]("cpCatalogNumber") + lazy val cpCatalogPageNumber = invoke[Int]("cpCatalogPageNumber") + lazy val cpDescription = invoke[String]("cpDescription") + lazy val cpType = invoke[String]("cpType") + + def values(row: CatalogPageRow): Array[Any] = Array( + getOrNullForKey(row, row.getCpCatalogPageSk, CP_CATALOG_PAGE_SK), + getOrNull(row, row.getCpCatalogPageId, CP_CATALOG_PAGE_ID), + getOrNullForKey(row, row.getCpStartDateId, CP_START_DATE_ID), + getOrNullForKey(row, row.getCpEndDateId, CP_END_DATE_ID), + getOrNull(row, row.getCpDepartment, CP_DEPARTMENT), + getOrNull(row, row.getCpCatalogNumber, CP_CATALOG_NUMBER), + getOrNull(row, row.getCpCatalogPageNumber, CP_CATALOG_PAGE_NUMBER), + getOrNull(row, row.getCpDescription, CP_DESCRIPTION), + getOrNull(row, row.getCpType, CP_TYPE)) + } + + implicit class WebSalesRowImplicits(webSalesRow: WebSalesRow) { + def getWsSoldDateSk: Long = WebSalesRowImplicits.wsSoldDateSk.get(webSalesRow) + def getWsSoldTimeSk: Long = WebSalesRowImplicits.wsSoldTimeSk.get(webSalesRow) + def getWsShipDateSk: Long = WebSalesRowImplicits.wsShipDateSk.get(webSalesRow) + def getWsItemSk: Long = WebSalesRowImplicits.wsItemSk.get(webSalesRow) + def getWsBillCustomerSk: Long = WebSalesRowImplicits.wsBillCustomerSk.get(webSalesRow) + def getWsBillCdemoSk: Long = WebSalesRowImplicits.wsBillCdemoSk.get(webSalesRow) + def getWsBillHdemoSk: Long = WebSalesRowImplicits.wsBillHdemoSk.get(webSalesRow) + def getWsBillAddrSk: Long = WebSalesRowImplicits.wsBillAddrSk.get(webSalesRow) + def getWsShipCustomerSk: Long = WebSalesRowImplicits.wsShipCustomerSk.get(webSalesRow) + def getWsShipCdemoSk: Long = WebSalesRowImplicits.wsShipCdemoSk.get(webSalesRow) + def getWsShipHdemoSk: Long = WebSalesRowImplicits.wsShipHdemoSk.get(webSalesRow) + def getWsShipAddrSk: Long = WebSalesRowImplicits.wsShipAddrSk.get(webSalesRow) + def getWsWebPageSk: Long = WebSalesRowImplicits.wsWebPageSk.get(webSalesRow) + def getWsWebSiteSk: Long = WebSalesRowImplicits.wsWebSiteSk.get(webSalesRow) + def getWsShipModeSk: Long = WebSalesRowImplicits.wsShipModeSk.get(webSalesRow) + def getWsWarehouseSk: Long = WebSalesRowImplicits.wsWarehouseSk.get(webSalesRow) + def getWsPromoSk: Long = WebSalesRowImplicits.wsPromoSk.get(webSalesRow) + def getWsOrderNumber: Long = WebSalesRowImplicits.wsOrderNumber.get(webSalesRow) + def getWsPricing: Pricing = WebSalesRowImplicits.wsPricing.get(webSalesRow) + } + + object WebSalesRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[WebSalesRow], field) + .buildChecked[T]() + + lazy val wsSoldDateSk = invoke[Long]("wsSoldDateSk") + lazy val wsSoldTimeSk = invoke[Long]("wsSoldTimeSk") + lazy val wsShipDateSk = invoke[Long]("wsShipDateSk") + lazy val wsItemSk = invoke[Long]("wsItemSk") + lazy val wsBillCustomerSk = invoke[Long]("wsBillCustomerSk") + lazy val wsBillCdemoSk = invoke[Long]("wsBillCdemoSk") + lazy val wsBillHdemoSk = invoke[Long]("wsBillHdemoSk") + lazy val wsBillAddrSk = invoke[Long]("wsBillAddrSk") + lazy val wsShipCustomerSk = invoke[Long]("wsShipCustomerSk") + lazy val wsShipCdemoSk = invoke[Long]("wsShipCdemoSk") + lazy val wsShipHdemoSk = invoke[Long]("wsShipHdemoSk") + lazy val wsShipAddrSk = invoke[Long]("wsShipAddrSk") + lazy val wsWebPageSk = invoke[Long]("wsWebPageSk") + lazy val wsWebSiteSk = invoke[Long]("wsWebSiteSk") + lazy val wsShipModeSk = invoke[Long]("wsShipModeSk") + lazy val wsWarehouseSk = invoke[Long]("wsWarehouseSk") + lazy val wsPromoSk = invoke[Long]("wsPromoSk") + lazy val wsOrderNumber = invoke[Long]("wsOrderNumber") + lazy val wsPricing = invoke[Pricing]("wsPricing") + + def values(row: WebSalesRow): Array[Any] = Array( + getOrNullForKey(row, row.getWsSoldDateSk, WS_SOLD_DATE_SK), + getOrNullForKey(row, row.getWsSoldTimeSk, WS_SOLD_TIME_SK), + getOrNullForKey(row, row.getWsShipDateSk, WS_SHIP_DATE_SK), + getOrNullForKey(row, row.getWsItemSk, WS_ITEM_SK), + getOrNullForKey(row, row.getWsBillCustomerSk, WS_BILL_CUSTOMER_SK), + getOrNullForKey(row, row.getWsBillCdemoSk, WS_BILL_CDEMO_SK), + getOrNullForKey(row, row.getWsBillHdemoSk, WS_BILL_HDEMO_SK), + getOrNullForKey(row, row.getWsBillAddrSk, WS_BILL_ADDR_SK), + getOrNullForKey(row, row.getWsShipCustomerSk, WS_SHIP_CUSTOMER_SK), + getOrNullForKey(row, row.getWsShipCdemoSk, WS_SHIP_CDEMO_SK), + getOrNullForKey(row, row.getWsShipHdemoSk, WS_SHIP_HDEMO_SK), + getOrNullForKey(row, row.getWsShipAddrSk, WS_SHIP_ADDR_SK), + getOrNullForKey(row, row.getWsWebPageSk, WS_WEB_PAGE_SK), + getOrNullForKey(row, row.getWsWebSiteSk, WS_WEB_SITE_SK), + getOrNullForKey(row, row.getWsShipModeSk, WS_SHIP_MODE_SK), + getOrNullForKey(row, row.getWsWarehouseSk, WS_WAREHOUSE_SK), + getOrNullForKey(row, row.getWsPromoSk, WS_PROMO_SK), + getOrNullForKey(row, row.getWsOrderNumber, WS_ORDER_NUMBER), + getOrNull(row, row.getWsPricing.getQuantity(), WS_PRICING_QUANTITY), + getOrNull(row, row.getWsPricing.getWholesaleCost(), WS_PRICING_WHOLESALE_COST), + getOrNull(row, row.getWsPricing.getListPrice(), WS_PRICING_LIST_PRICE), + getOrNull(row, row.getWsPricing.getSalesPrice(), WS_PRICING_SALES_PRICE), + getOrNull(row, row.getWsPricing.getExtDiscountAmount(), WS_PRICING_EXT_DISCOUNT_AMT), + getOrNull(row, row.getWsPricing.getExtSalesPrice(), WS_PRICING_EXT_SALES_PRICE), + getOrNull(row, row.getWsPricing.getExtWholesaleCost(), WS_PRICING_EXT_WHOLESALE_COST), + getOrNull(row, row.getWsPricing.getExtListPrice(), WS_PRICING_EXT_LIST_PRICE), + getOrNull(row, row.getWsPricing.getExtTax(), WS_PRICING_EXT_TAX), + getOrNull(row, row.getWsPricing.getCouponAmount(), WS_PRICING_COUPON_AMT), + getOrNull(row, row.getWsPricing.getExtShipCost(), WS_PRICING_EXT_SHIP_COST), + getOrNull(row, row.getWsPricing.getNetPaid(), WS_PRICING_NET_PAID), + getOrNull(row, row.getWsPricing.getNetPaidIncludingTax(), WS_PRICING_NET_PAID_INC_TAX), + getOrNull(row, row.getWsPricing.getNetPaidIncludingShipping(), WS_PRICING_NET_PAID_INC_SHIP), + getOrNull( + row, + row.getWsPricing.getNetPaidIncludingShippingAndTax(), + WS_PRICING_NET_PAID_INC_SHIP_TAX), + getOrNull(row, row.getWsPricing.getNetProfit(), WS_PRICING_NET_PROFIT)) + } + + implicit class StoreSalesRowImplicits(storeSalesRow: StoreSalesRow) { + def getSsSoldDateSk: Long = StoreSalesRowImplicits.ssSoldDateSk.get(storeSalesRow) + def getSsSoldTimeSk: Long = StoreSalesRowImplicits.ssSoldTimeSk.get(storeSalesRow) + def getSsSoldItemSk: Long = StoreSalesRowImplicits.ssSoldItemSk.get(storeSalesRow) + def getSsSoldCustomerSk: Long = StoreSalesRowImplicits.ssSoldCustomerSk.get(storeSalesRow) + def getSsSoldCdemoSk: Long = StoreSalesRowImplicits.ssSoldCdemoSk.get(storeSalesRow) + def getSsSoldHdemoSk: Long = StoreSalesRowImplicits.ssSoldHdemoSk.get(storeSalesRow) + def getSsSoldAddrSk: Long = StoreSalesRowImplicits.ssSoldAddrSk.get(storeSalesRow) + def getSsSoldStoreSk: Long = StoreSalesRowImplicits.ssSoldStoreSk.get(storeSalesRow) + def getSsSoldPromoSk: Long = StoreSalesRowImplicits.ssSoldPromoSk.get(storeSalesRow) + def getSsTicketNumber: Long = StoreSalesRowImplicits.ssTicketNumber.get(storeSalesRow) + def getSsPricing: Pricing = StoreSalesRowImplicits.ssPricing.get(storeSalesRow) + } + + object StoreSalesRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[StoreSalesRow], field) + .buildChecked[T]() + + lazy val ssSoldDateSk = invoke[Long]("ssSoldDateSk") + lazy val ssSoldTimeSk = invoke[Long]("ssSoldTimeSk") + lazy val ssSoldItemSk = invoke[Long]("ssSoldItemSk") + lazy val ssSoldCustomerSk = invoke[Long]("ssSoldCustomerSk") + lazy val ssSoldCdemoSk = invoke[Long]("ssSoldCdemoSk") + lazy val ssSoldHdemoSk = invoke[Long]("ssSoldHdemoSk") + lazy val ssSoldAddrSk = invoke[Long]("ssSoldAddrSk") + lazy val ssSoldStoreSk = invoke[Long]("ssSoldStoreSk") + lazy val ssSoldPromoSk = invoke[Long]("ssSoldPromoSk") + lazy val ssTicketNumber = invoke[Long]("ssTicketNumber") + lazy val ssPricing = invoke[Pricing]("ssPricing") + + def values(row: StoreSalesRow): Array[Any] = Array( + getOrNullForKey(row, row.getSsSoldDateSk, SS_SOLD_DATE_SK), + getOrNullForKey(row, row.getSsSoldTimeSk, SS_SOLD_TIME_SK), + getOrNullForKey(row, row.getSsSoldItemSk, SS_SOLD_ITEM_SK), + getOrNullForKey(row, row.getSsSoldCustomerSk, SS_SOLD_CUSTOMER_SK), + getOrNullForKey(row, row.getSsSoldCdemoSk, SS_SOLD_CDEMO_SK), + getOrNullForKey(row, row.getSsSoldHdemoSk, SS_SOLD_HDEMO_SK), + getOrNullForKey(row, row.getSsSoldAddrSk, SS_SOLD_ADDR_SK), + getOrNullForKey(row, row.getSsSoldStoreSk, SS_SOLD_STORE_SK), + getOrNullForKey(row, row.getSsSoldPromoSk, SS_SOLD_PROMO_SK), + getOrNullForKey(row, row.getSsTicketNumber, SS_TICKET_NUMBER), + getOrNull(row, row.getSsPricing.getQuantity(), SS_PRICING_QUANTITY), + getOrNull(row, row.getSsPricing.getWholesaleCost(), SS_PRICING_WHOLESALE_COST), + getOrNull(row, row.getSsPricing.getListPrice(), SS_PRICING_LIST_PRICE), + getOrNull(row, row.getSsPricing.getSalesPrice(), SS_PRICING_SALES_PRICE), + getOrNull(row, row.getSsPricing.getCouponAmount(), SS_PRICING_COUPON_AMT), + getOrNull(row, row.getSsPricing.getExtSalesPrice(), SS_PRICING_EXT_SALES_PRICE), + getOrNull(row, row.getSsPricing.getExtWholesaleCost(), SS_PRICING_EXT_WHOLESALE_COST), + getOrNull(row, row.getSsPricing.getExtListPrice(), SS_PRICING_EXT_LIST_PRICE), + getOrNull(row, row.getSsPricing.getExtTax(), SS_PRICING_EXT_TAX), + getOrNull(row, row.getSsPricing.getCouponAmount(), SS_PRICING_COUPON_AMT), + getOrNull(row, row.getSsPricing.getNetPaid(), SS_PRICING_NET_PAID), + getOrNull(row, row.getSsPricing.getNetPaidIncludingTax(), SS_PRICING_NET_PAID_INC_TAX), + getOrNull(row, row.getSsPricing.getNetProfit(), SS_PRICING_NET_PROFIT)) + } + + implicit class InventoryRowImplicits(inventoryRow: InventoryRow) { + def getInvDateSk: Long = InventoryRowImplicits.invDateSk.get(inventoryRow) + def getInvItemSk: Long = InventoryRowImplicits.invItemSk.get(inventoryRow) + def getInvWarehouseSk: Long = InventoryRowImplicits.invWarehouseSk.get(inventoryRow) + def getInvQuantityOnHand: Int = InventoryRowImplicits.invQuantityOnHand.get(inventoryRow) + } + + object InventoryRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[InventoryRow], field) + .buildChecked[T]() + + lazy val invDateSk = invoke[Long]("invDateSk") + lazy val invItemSk = invoke[Long]("invItemSk") + lazy val invWarehouseSk = invoke[Long]("invWarehouseSk") + lazy val invQuantityOnHand = invoke[Int]("invQuantityOnHand") + + def values(row: InventoryRow): Array[Any] = Array( + getOrNullForKey(row, row.getInvDateSk, INV_DATE_SK), + getOrNullForKey(row, row.getInvItemSk, INV_ITEM_SK), + getOrNullForKey(row, row.getInvWarehouseSk, INV_WAREHOUSE_SK), + getOrNull(row, row.getInvQuantityOnHand, INV_QUANTITY_ON_HAND)) + } + + implicit class WebReturnsRowImplicits(webReturnsRow: WebReturnsRow) { + def getWrReturnedDateSk: Long = WebReturnsRowImplicits.wrReturnedDateSk.get(webReturnsRow) + def getWrReturnedTimeSk: Long = WebReturnsRowImplicits.wrReturnedTimeSk.get(webReturnsRow) + def getWrItemSk: Long = WebReturnsRowImplicits.wrItemSk.get(webReturnsRow) + def getWrRefundedCustomerSk: Long = + WebReturnsRowImplicits.wrRefundedCustomerSk.get(webReturnsRow) + def getWrRefundedCdemoSk: Long = WebReturnsRowImplicits.wrRefundedCdemoSk.get(webReturnsRow) + def getWrRefundedHdemoSk: Long = WebReturnsRowImplicits.wrRefundedHdemoSk.get(webReturnsRow) + def getWrRefundedAddrSk: Long = WebReturnsRowImplicits.wrRefundedAddrSk.get(webReturnsRow) + def getWrReturningCustomerSk: Long = + WebReturnsRowImplicits.wrReturningCustomerSk.get(webReturnsRow) + def getWrReturningCdemoSk: Long = WebReturnsRowImplicits.wrReturningCdemoSk.get(webReturnsRow) + def getWrReturningHdemoSk: Long = WebReturnsRowImplicits.wrReturningHdemoSk.get(webReturnsRow) + def getWrReturningAddrSk: Long = WebReturnsRowImplicits.wrReturningAddrSk.get(webReturnsRow) + def getWrWebPageSk: Long = WebReturnsRowImplicits.wrWebPageSk.get(webReturnsRow) + def getWrReasonSk: Long = WebReturnsRowImplicits.wrReasonSk.get(webReturnsRow) + def getWrOrderNumber: Long = WebReturnsRowImplicits.wrOrderNumber.get(webReturnsRow) + def getWrPricing: Pricing = WebReturnsRowImplicits.wrPricing.get(webReturnsRow) + } + + object WebReturnsRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[WebReturnsRow], field) + .buildChecked[T]() + + lazy val wrReturnedDateSk = invoke[Long]("wrReturnedDateSk") + lazy val wrReturnedTimeSk = invoke[Long]("wrReturnedTimeSk") + lazy val wrItemSk = invoke[Long]("wrItemSk") + lazy val wrRefundedCustomerSk = invoke[Long]("wrRefundedCustomerSk") + lazy val wrRefundedCdemoSk = invoke[Long]("wrRefundedCdemoSk") + lazy val wrRefundedHdemoSk = invoke[Long]("wrRefundedHdemoSk") + lazy val wrRefundedAddrSk = invoke[Long]("wrRefundedAddrSk") + lazy val wrReturningCustomerSk = invoke[Long]("wrReturningCustomerSk") + lazy val wrReturningCdemoSk = invoke[Long]("wrReturningCdemoSk") + lazy val wrReturningHdemoSk = invoke[Long]("wrReturningHdemoSk") + lazy val wrReturningAddrSk = invoke[Long]("wrReturningAddrSk") + lazy val wrWebPageSk = invoke[Long]("wrWebPageSk") + lazy val wrReasonSk = invoke[Long]("wrReasonSk") + lazy val wrOrderNumber = invoke[Long]("wrOrderNumber") + lazy val wrPricing = invoke[Pricing]("wrPricing") + + def values(row: WebReturnsRow): Array[Any] = Array( + getOrNullForKey(row, row.getWrReturnedDateSk, WR_RETURNED_DATE_SK), + getOrNullForKey(row, row.getWrReturnedTimeSk, WR_RETURNED_TIME_SK), + getOrNullForKey(row, row.getWrItemSk, WR_ITEM_SK), + getOrNullForKey(row, row.getWrRefundedCustomerSk, WR_REFUNDED_CUSTOMER_SK), + getOrNullForKey(row, row.getWrRefundedCdemoSk, WR_REFUNDED_CDEMO_SK), + getOrNullForKey(row, row.getWrRefundedHdemoSk, WR_REFUNDED_HDEMO_SK), + getOrNullForKey(row, row.getWrRefundedAddrSk, WR_REFUNDED_ADDR_SK), + getOrNullForKey(row, row.getWrReturningCustomerSk, WR_RETURNING_CUSTOMER_SK), + getOrNullForKey(row, row.getWrReturningCdemoSk, WR_RETURNING_CDEMO_SK), + getOrNullForKey(row, row.getWrReturningHdemoSk, WR_RETURNING_HDEMO_SK), + getOrNullForKey(row, row.getWrReturningAddrSk, WR_RETURNING_ADDR_SK), + getOrNullForKey(row, row.getWrWebPageSk, WR_WEB_PAGE_SK), + getOrNullForKey(row, row.getWrReasonSk, WR_REASON_SK), + getOrNullForKey(row, row.getWrOrderNumber, WR_ORDER_NUMBER), + getOrNull(row, row.getWrPricing.getQuantity(), WR_PRICING_QUANTITY), + getOrNull(row, row.getWrPricing.getNetPaid(), WR_PRICING_NET_PAID), + getOrNull(row, row.getWrPricing.getExtTax(), WR_PRICING_EXT_TAX), + getOrNull(row, row.getWrPricing.getNetPaidIncludingTax(), WR_PRICING_NET_PAID_INC_TAX), + getOrNull(row, row.getWrPricing.getFee(), WR_PRICING_FEE), + getOrNull(row, row.getWrPricing.getExtShipCost(), WR_PRICING_EXT_SHIP_COST), + getOrNull(row, row.getWrPricing.getRefundedCash(), WR_PRICING_REFUNDED_CASH), + getOrNull(row, row.getWrPricing.getReversedCharge(), WR_PRICING_REVERSED_CHARGE), + getOrNull(row, row.getWrPricing.getStoreCredit(), WR_PRICING_STORE_CREDIT), + getOrNull(row, row.getWrPricing.getNetLoss(), WR_PRICING_NET_LOSS)) + } + + implicit class WarehouseRowImplicits(warehouseRow: WarehouseRow) { + def getWWarehouseSk: Long = WarehouseRowImplicits.wWarehouseSk.get(warehouseRow) + def getWWarehouseId: String = WarehouseRowImplicits.wWarehouseId.get(warehouseRow) + def getWWarehouseName: String = WarehouseRowImplicits.wWarehouseName.get(warehouseRow) + def getWWarehouseSqFt: Int = WarehouseRowImplicits.wWarehouseSqFt.get(warehouseRow) + def getWAddress: Address = WarehouseRowImplicits.wAddress.get(warehouseRow) + } + + object WarehouseRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[WarehouseRow], field) + .buildChecked[T]() + + lazy val wWarehouseSk = invoke[Long]("wWarehouseSk") + lazy val wWarehouseId = invoke[String]("wWarehouseId") + lazy val wWarehouseName = invoke[String]("wWarehouseName") + lazy val wWarehouseSqFt = invoke[Int]("wWarehouseSqFt") + lazy val wAddress = invoke[Address]("wAddress") + + def values(row: WarehouseRow): Array[Any] = Array( + getOrNullForKey(row, row.getWWarehouseSk, W_WAREHOUSE_SK), + getOrNull(row, row.getWWarehouseId, W_WAREHOUSE_ID), + getOrNull(row, row.getWWarehouseName, W_WAREHOUSE_NAME), + getOrNull(row, row.getWWarehouseSqFt, W_WAREHOUSE_SQ_FT), + getOrNull(row, row.getWAddress.getStreetNumber(), W_ADDRESS_STREET_NUM), + getOrNull(row, row.getWAddress.getStreetName(), W_ADDRESS_STREET_NAME1), + getOrNull(row, row.getWAddress.getStreetType(), W_ADDRESS_STREET_TYPE), + getOrNull(row, row.getWAddress.getSuiteNumber(), W_ADDRESS_SUITE_NUM), + getOrNull(row, row.getWAddress.getCity(), W_ADDRESS_CITY), + getOrNull(row, row.getWAddress.getCounty(), W_ADDRESS_COUNTY), + getOrNull(row, row.getWAddress.getState(), W_ADDRESS_STATE), + getOrNull( + row, + java.lang.String.format("%05d", row.getWAddress.getZip.asInstanceOf[Object]), + W_ADDRESS_ZIP), + getOrNull(row, row.getWAddress.getCountry(), W_ADDRESS_COUNTRY), + getOrNull(row, row.getWAddress.getGmtOffset(), W_ADDRESS_GMT_OFFSET)) + } + + implicit class CustomerRowImplicits(customerRow: CustomerRow) { + def getCCustomerSk: Long = CustomerRowImplicits.cCustomerSk.get(customerRow) + def getCCustomerId: String = CustomerRowImplicits.cCustomerId.get(customerRow) + def getCCurrentCdemoSk: Long = CustomerRowImplicits.cCurrentCdemoSk.get(customerRow) + def getCCurrentHdemoSk: Long = CustomerRowImplicits.cCurrentHdemoSk.get(customerRow) + def getCCurrentAddrSk: Long = CustomerRowImplicits.cCurrentAddrSk.get(customerRow) + def getCFirstShiptoDateId: Int = CustomerRowImplicits.cFirstShiptoDateId.get(customerRow) + def getCFirstSalesDateId: Int = CustomerRowImplicits.cFirstSalesDateId.get(customerRow) + def getCSalutation: String = CustomerRowImplicits.cSalutation.get(customerRow) + def getCFirstName: String = CustomerRowImplicits.cFirstName.get(customerRow) + def getCLastName: String = CustomerRowImplicits.cLastName.get(customerRow) + def isCPreferredCustFlag: Boolean = CustomerRowImplicits.cPreferredCustFlag.get(customerRow) + def getCBirthDay: Int = CustomerRowImplicits.cBirthDay.get(customerRow) + def getCBirthMonth: Int = CustomerRowImplicits.cBirthMonth.get(customerRow) + def getCBirthYear: Int = CustomerRowImplicits.cBirthYear.get(customerRow) + def getCBirthCountry: String = CustomerRowImplicits.cBirthCountry.get(customerRow) + def getCLogin: String = CustomerRowImplicits.cLogin.get(customerRow) + def getCEmailAddress: String = CustomerRowImplicits.cEmailAddress.get(customerRow) + def getCLastReviewDate: Int = CustomerRowImplicits.cLastReviewDate.get(customerRow) + } + + object CustomerRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[CustomerRow], field) + .buildChecked[T]() + + lazy val cCustomerSk = invoke[Long]("cCustomerSk") + lazy val cCustomerId = invoke[String]("cCustomerId") + lazy val cCurrentCdemoSk = invoke[Long]("cCurrentCdemoSk") + lazy val cCurrentHdemoSk = invoke[Long]("cCurrentHdemoSk") + lazy val cCurrentAddrSk = invoke[Long]("cCurrentAddrSk") + lazy val cFirstShiptoDateId = invoke[Int]("cFirstShiptoDateId") + lazy val cFirstSalesDateId = invoke[Int]("cFirstSalesDateId") + lazy val cSalutation = invoke[String]("cSalutation") + lazy val cFirstName = invoke[String]("cFirstName") + lazy val cLastName = invoke[String]("cLastName") + lazy val cPreferredCustFlag = invoke[Boolean]("cPreferredCustFlag") + lazy val cBirthDay = invoke[Int]("cBirthDay") + lazy val cBirthMonth = invoke[Int]("cBirthMonth") + lazy val cBirthYear = invoke[Int]("cBirthYear") + lazy val cBirthCountry = invoke[String]("cBirthCountry") + lazy val cLogin = invoke[String]("cLogin") + lazy val cEmailAddress = invoke[String]("cEmailAddress") + lazy val cLastReviewDate = invoke[Int]("cLastReviewDate") + + def values(row: CustomerRow): Array[Any] = Array( + getOrNullForKey(row, row.getCCustomerSk, C_CUSTOMER_SK), + getOrNull(row, row.getCCustomerId, C_CUSTOMER_ID), + getOrNullForKey(row, row.getCCurrentCdemoSk, C_CURRENT_CDEMO_SK), + getOrNullForKey(row, row.getCCurrentHdemoSk, C_CURRENT_HDEMO_SK), + getOrNullForKey(row, row.getCCurrentAddrSk, C_CURRENT_ADDR_SK), + getOrNull(row, row.getCFirstShiptoDateId, C_FIRST_SHIPTO_DATE_ID), + getOrNull(row, row.getCFirstSalesDateId, C_FIRST_SALES_DATE_ID), + getOrNull(row, row.getCSalutation, C_SALUTATION), + getOrNull(row, row.getCFirstName, C_FIRST_NAME), + getOrNull(row, row.getCLastName, C_LAST_NAME), + getOrNullForBoolean(row, row.isCPreferredCustFlag, C_PREFERRED_CUST_FLAG), + getOrNull(row, row.getCBirthDay, C_BIRTH_DAY), + getOrNull(row, row.getCBirthMonth, C_BIRTH_MONTH), + getOrNull(row, row.getCBirthYear, C_BIRTH_YEAR), + getOrNull(row, row.getCBirthCountry, C_BIRTH_COUNTRY), + row.getCLogin, + getOrNull(row, row.getCEmailAddress, C_EMAIL_ADDRESS), + getOrNull(row, row.getCLastReviewDate, C_LAST_REVIEW_DATE)) + } + + implicit class StoreReturnsRowImplicits(storeReturnsRow: StoreReturnsRow) { + def getSrReturnedDateSk: Long = StoreReturnsRowImplicits.srReturnedDateSk.get(storeReturnsRow) + def getSrReturnedTimeSk: Long = StoreReturnsRowImplicits.srReturnedTimeSk.get(storeReturnsRow) + def getSrItemSk: Long = StoreReturnsRowImplicits.srItemSk.get(storeReturnsRow) + def getSrCustomerSk: Long = StoreReturnsRowImplicits.srCustomerSk.get(storeReturnsRow) + def getSrCdemoSk: Long = StoreReturnsRowImplicits.srCdemoSk.get(storeReturnsRow) + def getSrHdemoSk: Long = StoreReturnsRowImplicits.srHdemoSk.get(storeReturnsRow) + def getSrAddrSk: Long = StoreReturnsRowImplicits.srAddrSk.get(storeReturnsRow) + def getSrStoreSk: Long = StoreReturnsRowImplicits.srStoreSk.get(storeReturnsRow) + def getSrReasonSk: Long = StoreReturnsRowImplicits.srReasonSk.get(storeReturnsRow) + def getSrTicketNumber: Long = StoreReturnsRowImplicits.srTicketNumber.get(storeReturnsRow) + def getSrPricing: Pricing = StoreReturnsRowImplicits.srPricing.get(storeReturnsRow) + } + + object StoreReturnsRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[StoreReturnsRow], field) + .buildChecked[T]() + + lazy val srReturnedDateSk = invoke[Long]("srReturnedDateSk") + lazy val srReturnedTimeSk = invoke[Long]("srReturnedTimeSk") + lazy val srItemSk = invoke[Long]("srItemSk") + lazy val srCustomerSk = invoke[Long]("srCustomerSk") + lazy val srCdemoSk = invoke[Long]("srCdemoSk") + lazy val srHdemoSk = invoke[Long]("srHdemoSk") + lazy val srAddrSk = invoke[Long]("srAddrSk") + lazy val srStoreSk = invoke[Long]("srStoreSk") + lazy val srReasonSk = invoke[Long]("srReasonSk") + lazy val srTicketNumber = invoke[Long]("srTicketNumber") + lazy val srPricing = invoke[Pricing]("srPricing") + + def values(row: StoreReturnsRow): Array[Any] = Array( + getOrNullForKey(row, row.getSrReturnedDateSk, SR_RETURNED_DATE_SK), + getOrNullForKey(row, row.getSrReturnedTimeSk, SR_RETURNED_TIME_SK), + getOrNullForKey(row, row.getSrItemSk, SR_ITEM_SK), + getOrNullForKey(row, row.getSrCustomerSk, SR_CUSTOMER_SK), + getOrNullForKey(row, row.getSrCdemoSk, SR_CDEMO_SK), + getOrNullForKey(row, row.getSrHdemoSk, SR_HDEMO_SK), + getOrNullForKey(row, row.getSrAddrSk, SR_ADDR_SK), + getOrNullForKey(row, row.getSrStoreSk, SR_STORE_SK), + getOrNullForKey(row, row.getSrReasonSk, SR_REASON_SK), + getOrNullForKey(row, row.getSrTicketNumber, SR_TICKET_NUMBER), + getOrNull(row, row.getSrPricing.getQuantity(), SR_PRICING_QUANTITY), + getOrNull(row, row.getSrPricing.getNetPaid(), SR_PRICING_NET_PAID), + getOrNull(row, row.getSrPricing.getExtTax(), SR_PRICING_EXT_TAX), + getOrNull(row, row.getSrPricing.getNetPaidIncludingTax(), SR_PRICING_NET_PAID_INC_TAX), + getOrNull(row, row.getSrPricing.getFee(), SR_PRICING_FEE), + getOrNull(row, row.getSrPricing.getExtShipCost(), SR_PRICING_EXT_SHIP_COST), + getOrNull(row, row.getSrPricing.getRefundedCash(), SR_PRICING_REFUNDED_CASH), + getOrNull(row, row.getSrPricing.getReversedCharge(), SR_PRICING_REVERSED_CHARGE), + getOrNull(row, row.getSrPricing.getStoreCredit(), SR_PRICING_STORE_CREDIT), + getOrNull(row, row.getSrPricing.getNetLoss(), SR_PRICING_NET_LOSS)) + } + + implicit class CatalogReturnsRowImplicits(catalogReturnsRow: CatalogReturnsRow) { + def getCrReturnedDateSk: Long = + CatalogReturnsRowImplicits.crReturnedDateSk.get(catalogReturnsRow) + def getCrReturnedTimeSk: Long = + CatalogReturnsRowImplicits.crReturnedTimeSk.get(catalogReturnsRow) + def getCrItemSk: Long = CatalogReturnsRowImplicits.crItemSk.get(catalogReturnsRow) + def getCrRefundedCustomerSk: Long = + CatalogReturnsRowImplicits.crRefundedCustomerSk.get(catalogReturnsRow) + def getCrRefundedCdemoSk: Long = + CatalogReturnsRowImplicits.crRefundedCdemoSk.get(catalogReturnsRow) + def getCrRefundedHdemoSk: Long = + CatalogReturnsRowImplicits.crRefundedHdemoSk.get(catalogReturnsRow) + def getCrRefundedAddrSk: Long = + CatalogReturnsRowImplicits.crRefundedAddrSk.get(catalogReturnsRow) + def getCrReturningCustomerSk: Long = + CatalogReturnsRowImplicits.crReturningCustomerSk.get(catalogReturnsRow) + def getCrReturningCdemoSk: Long = + CatalogReturnsRowImplicits.crReturningCdemoSk.get(catalogReturnsRow) + def getCrReturningHdemoSk: Long = + CatalogReturnsRowImplicits.crReturningHdemoSk.get(catalogReturnsRow) + def getCrReturningAddrSk: Long = + CatalogReturnsRowImplicits.crReturningAddrSk.get(catalogReturnsRow) + def getCrCallCenterSk: Long = CatalogReturnsRowImplicits.crCallCenterSk.get(catalogReturnsRow) + def getCrCatalogPageSk: Long = CatalogReturnsRowImplicits.crCatalogPageSk.get(catalogReturnsRow) + def getCrShipModeSk: Long = CatalogReturnsRowImplicits.crShipModeSk.get(catalogReturnsRow) + def getCrWarehouseSk: Long = CatalogReturnsRowImplicits.crWarehouseSk.get(catalogReturnsRow) + def getCrReasonSk: Long = CatalogReturnsRowImplicits.crReasonSk.get(catalogReturnsRow) + def getCrOrderNumber: Long = CatalogReturnsRowImplicits.crOrderNumber.get(catalogReturnsRow) + def getCrPricing: Pricing = CatalogReturnsRowImplicits.crPricing.get(catalogReturnsRow) + } + + object CatalogReturnsRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[CatalogReturnsRow], field) + .buildChecked[T]() + + lazy val crReturnedDateSk = invoke[Long]("crReturnedDateSk") + lazy val crReturnedTimeSk = invoke[Long]("crReturnedTimeSk") + lazy val crItemSk = invoke[Long]("crItemSk") + lazy val crRefundedCustomerSk = invoke[Long]("crRefundedCustomerSk") + lazy val crRefundedCdemoSk = invoke[Long]("crRefundedCdemoSk") + lazy val crRefundedHdemoSk = invoke[Long]("crRefundedHdemoSk") + lazy val crRefundedAddrSk = invoke[Long]("crRefundedAddrSk") + lazy val crReturningCustomerSk = invoke[Long]("crReturningCustomerSk") + lazy val crReturningCdemoSk = invoke[Long]("crReturningCdemoSk") + lazy val crReturningHdemoSk = invoke[Long]("crReturningHdemoSk") + lazy val crReturningAddrSk = invoke[Long]("crReturningAddrSk") + lazy val crCallCenterSk = invoke[Long]("crCallCenterSk") + lazy val crCatalogPageSk = invoke[Long]("crCatalogPageSk") + lazy val crShipModeSk = invoke[Long]("crShipModeSk") + lazy val crWarehouseSk = invoke[Long]("crWarehouseSk") + lazy val crReasonSk = invoke[Long]("crReasonSk") + lazy val crOrderNumber = invoke[Long]("crOrderNumber") + lazy val crPricing = invoke[Pricing]("crPricing") + + def values(row: CatalogReturnsRow): Array[Any] = Array( + getOrNullForKey(row, row.getCrReturnedDateSk, CR_RETURNED_DATE_SK), + getOrNullForKey(row, row.getCrReturnedTimeSk, CR_RETURNED_TIME_SK), + getOrNullForKey(row, row.getCrItemSk, CR_ITEM_SK), + getOrNullForKey(row, row.getCrRefundedCustomerSk, CR_REFUNDED_CUSTOMER_SK), + getOrNullForKey(row, row.getCrRefundedCdemoSk, CR_REFUNDED_CDEMO_SK), + getOrNullForKey(row, row.getCrRefundedHdemoSk, CR_REFUNDED_HDEMO_SK), + getOrNullForKey(row, row.getCrRefundedAddrSk, CR_REFUNDED_ADDR_SK), + getOrNullForKey(row, row.getCrReturningCustomerSk, CR_RETURNING_CUSTOMER_SK), + getOrNullForKey(row, row.getCrReturningCdemoSk, CR_RETURNING_CDEMO_SK), + getOrNullForKey(row, row.getCrReturningHdemoSk, CR_RETURNING_HDEMO_SK), + getOrNullForKey(row, row.getCrReturningAddrSk, CR_RETURNING_ADDR_SK), + getOrNullForKey(row, row.getCrCallCenterSk, CR_CALL_CENTER_SK), + getOrNullForKey(row, row.getCrCatalogPageSk, CR_CATALOG_PAGE_SK), + getOrNullForKey(row, row.getCrShipModeSk, CR_SHIP_MODE_SK), + getOrNullForKey(row, row.getCrWarehouseSk, CR_WAREHOUSE_SK), + getOrNullForKey(row, row.getCrReasonSk, CR_REASON_SK), + getOrNull(row, row.getCrOrderNumber, CR_ORDER_NUMBER), + getOrNull(row, row.getCrPricing.getQuantity(), CR_PRICING_QUANTITY), + getOrNull(row, row.getCrPricing.getNetPaid(), CR_PRICING_NET_PAID), + getOrNull(row, row.getCrPricing.getExtTax(), CR_PRICING_EXT_TAX), + getOrNull(row, row.getCrPricing.getNetPaidIncludingTax(), CR_PRICING_NET_PAID_INC_TAX), + getOrNull(row, row.getCrPricing.getFee(), CR_PRICING_FEE), + getOrNull(row, row.getCrPricing.getExtShipCost(), CR_PRICING_EXT_SHIP_COST), + getOrNull(row, row.getCrPricing.getRefundedCash(), CR_PRICING_REFUNDED_CASH), + getOrNull(row, row.getCrPricing.getReversedCharge(), CR_PRICING_REVERSED_CHARGE), + getOrNull(row, row.getCrPricing.getStoreCredit(), CR_PRICING_STORE_CREDIT), + getOrNull(row, row.getCrPricing.getNetLoss(), CR_PRICING_NET_LOSS)) + } + + implicit class CatalogSalesRowImplicits(catalogSalesRow: CatalogSalesRow) { + def getCsSoldDateSk: Long = CatalogSalesRowImplicits.csSoldDateSk.get(catalogSalesRow) + def getCsSoldTimeSk: Long = CatalogSalesRowImplicits.csSoldTimeSk.get(catalogSalesRow) + def getCsShipDateSk: Long = CatalogSalesRowImplicits.csShipDateSk.get(catalogSalesRow) + def getCsBillCustomerSk: Long = CatalogSalesRowImplicits.csBillCustomerSk.get(catalogSalesRow) + def getCsBillCdemoSk: Long = CatalogSalesRowImplicits.csBillCdemoSk.get(catalogSalesRow) + def getCsBillHdemoSk: Long = CatalogSalesRowImplicits.csBillHdemoSk.get(catalogSalesRow) + def getCsBillAddrSk: Long = CatalogSalesRowImplicits.csBillAddrSk.get(catalogSalesRow) + def getCsShipCustomerSk: Long = CatalogSalesRowImplicits.csShipCustomerSk.get(catalogSalesRow) + def getCsShipCdemoSk: Long = CatalogSalesRowImplicits.csShipCdemoSk.get(catalogSalesRow) + def getCsShipHdemoSk: Long = CatalogSalesRowImplicits.csShipHdemoSk.get(catalogSalesRow) + def getCsShipAddrSk: Long = CatalogSalesRowImplicits.csShipAddrSk.get(catalogSalesRow) + def getCsCallCenterSk: Long = CatalogSalesRowImplicits.csCallCenterSk.get(catalogSalesRow) + def getCsCatalogPageSk: Long = CatalogSalesRowImplicits.csCatalogPageSk.get(catalogSalesRow) + def getCsShipModeSk: Long = CatalogSalesRowImplicits.csShipModeSk.get(catalogSalesRow) + def getCsWarehouseSk: Long = CatalogSalesRowImplicits.csWarehouseSk.get(catalogSalesRow) + def getCsSoldItemSk: Long = CatalogSalesRowImplicits.csSoldItemSk.get(catalogSalesRow) + def getCsPromoSk: Long = CatalogSalesRowImplicits.csPromoSk.get(catalogSalesRow) + def getCsOrderNumber: Long = CatalogSalesRowImplicits.csOrderNumber.get(catalogSalesRow) + def getCsPricing: Pricing = CatalogSalesRowImplicits.csPricing.get(catalogSalesRow) + } + + object CatalogSalesRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[CatalogSalesRow], field) + .buildChecked[T]() + + lazy val csSoldDateSk = invoke[Long]("csSoldDateSk") + lazy val csSoldTimeSk = invoke[Long]("csSoldTimeSk") + lazy val csShipDateSk = invoke[Long]("csShipDateSk") + lazy val csBillCustomerSk = invoke[Long]("csBillCustomerSk") + lazy val csBillCdemoSk = invoke[Long]("csBillCdemoSk") + lazy val csBillHdemoSk = invoke[Long]("csBillHdemoSk") + lazy val csBillAddrSk = invoke[Long]("csBillAddrSk") + lazy val csShipCustomerSk = invoke[Long]("csShipCustomerSk") + lazy val csShipCdemoSk = invoke[Long]("csShipCdemoSk") + lazy val csShipHdemoSk = invoke[Long]("csShipHdemoSk") + lazy val csShipAddrSk = invoke[Long]("csShipAddrSk") + lazy val csCallCenterSk = invoke[Long]("csCallCenterSk") + lazy val csCatalogPageSk = invoke[Long]("csCatalogPageSk") + lazy val csShipModeSk = invoke[Long]("csShipModeSk") + lazy val csWarehouseSk = invoke[Long]("csWarehouseSk") + lazy val csSoldItemSk = invoke[Long]("csSoldItemSk") + lazy val csPromoSk = invoke[Long]("csPromoSk") + lazy val csOrderNumber = invoke[Long]("csOrderNumber") + lazy val csPricing = invoke[Pricing]("csPricing") + + def values(row: CatalogSalesRow): Array[Any] = Array( + getOrNullForKey(row, row.getCsSoldDateSk, CS_SOLD_DATE_SK), + getOrNullForKey(row, row.getCsSoldTimeSk, CS_SOLD_TIME_SK), + getOrNullForKey(row, row.getCsShipDateSk, CS_SHIP_DATE_SK), + getOrNullForKey(row, row.getCsBillCustomerSk, CS_BILL_CUSTOMER_SK), + getOrNullForKey(row, row.getCsBillCdemoSk, CS_BILL_CDEMO_SK), + getOrNullForKey(row, row.getCsBillHdemoSk, CS_BILL_HDEMO_SK), + getOrNullForKey(row, row.getCsBillAddrSk, CS_BILL_ADDR_SK), + getOrNullForKey(row, row.getCsShipCustomerSk, CS_SHIP_CUSTOMER_SK), + getOrNullForKey(row, row.getCsShipCdemoSk, CS_SHIP_CDEMO_SK), + getOrNullForKey(row, row.getCsShipHdemoSk, CS_SHIP_HDEMO_SK), + getOrNullForKey(row, row.getCsShipAddrSk, CS_SHIP_ADDR_SK), + getOrNullForKey(row, row.getCsCallCenterSk, CS_CALL_CENTER_SK), + getOrNullForKey(row, row.getCsCatalogPageSk, CS_CATALOG_PAGE_SK), + getOrNullForKey(row, row.getCsShipModeSk, CS_SHIP_MODE_SK), + getOrNull(row, row.getCsWarehouseSk, CS_WAREHOUSE_SK), + getOrNullForKey(row, row.getCsSoldItemSk, CS_SOLD_ITEM_SK), + getOrNullForKey(row, row.getCsPromoSk, CS_PROMO_SK), + getOrNull(row, row.getCsOrderNumber, CS_ORDER_NUMBER), + getOrNull(row, row.getCsPricing.getQuantity(), CS_PRICING_QUANTITY), + getOrNull(row, row.getCsPricing.getWholesaleCost(), CS_PRICING_WHOLESALE_COST), + getOrNull(row, row.getCsPricing.getListPrice(), CS_PRICING_LIST_PRICE), + getOrNull(row, row.getCsPricing.getSalesPrice(), CS_PRICING_SALES_PRICE), + getOrNull(row, row.getCsPricing.getExtDiscountAmount(), CS_PRICING_EXT_DISCOUNT_AMOUNT), + getOrNull(row, row.getCsPricing.getExtSalesPrice(), CS_PRICING_EXT_SALES_PRICE), + getOrNull(row, row.getCsPricing.getExtWholesaleCost(), CS_PRICING_EXT_WHOLESALE_COST), + getOrNull(row, row.getCsPricing.getExtListPrice(), CS_PRICING_EXT_LIST_PRICE), + getOrNull(row, row.getCsPricing.getExtTax(), CS_PRICING_EXT_TAX), + getOrNull(row, row.getCsPricing.getCouponAmount(), CS_PRICING_COUPON_AMT), + getOrNull(row, row.getCsPricing.getExtShipCost(), CS_PRICING_EXT_SHIP_COST), + getOrNull(row, row.getCsPricing.getNetPaid(), CS_PRICING_NET_PAID), + getOrNull(row, row.getCsPricing.getNetPaidIncludingTax(), CS_PRICING_NET_PAID_INC_TAX), + getOrNull(row, row.getCsPricing.getNetPaidIncludingShipping(), CS_PRICING_NET_PAID_INC_SHIP), + getOrNull( + row, + row.getCsPricing.getNetPaidIncludingShippingAndTax(), + CS_PRICING_NET_PAID_INC_SHIP_TAX), + getOrNull(row, row.getCsPricing.getNetProfit(), CS_PRICING_NET_PROFIT)) + } + + implicit class WebPageRowImplicits(webPageRow: WebPageRow) { + def getWpPageSk: Long = WebPageRowImplicits.wpPageSk.get(webPageRow) + def getWpPageId: String = WebPageRowImplicits.wpPageId.get(webPageRow) + def getWpRecStartDateId: Long = WebPageRowImplicits.wpRecStartDateId.get(webPageRow) + def getWpRecEndDateId: Long = WebPageRowImplicits.wpRecEndDateId.get(webPageRow) + def getWpCreationDateSk: Long = WebPageRowImplicits.wpCreationDateSk.get(webPageRow) + def getWpAccessDateSk: Long = WebPageRowImplicits.wpAccessDateSk.get(webPageRow) + def isWpAutogenFlag: Boolean = WebPageRowImplicits.wpAutogenFlag.get(webPageRow) + def getWpCustomerSk: Long = WebPageRowImplicits.wpCustomerSk.get(webPageRow) + def getWpUrl: String = WebPageRowImplicits.wpUrl.get(webPageRow) + def getWpType: String = WebPageRowImplicits.wpType.get(webPageRow) + def getWpCharCount: Int = WebPageRowImplicits.wpCharCount.get(webPageRow) + def getWpLinkCount: Int = WebPageRowImplicits.wpLinkCount.get(webPageRow) + def getWpImageCount: Int = WebPageRowImplicits.wpImageCount.get(webPageRow) + def getWpMaxAdCount: Int = WebPageRowImplicits.wpMaxAdCount.get(webPageRow) + } + + object WebPageRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[WebPageRow], field) + .buildChecked[T]() + + lazy val wpPageSk = invoke[Long]("wpPageSk") + lazy val wpPageId = invoke[String]("wpPageId") + lazy val wpRecStartDateId = invoke[Long]("wpRecStartDateId") + lazy val wpRecEndDateId = invoke[Long]("wpRecEndDateId") + lazy val wpCreationDateSk = invoke[Long]("wpCreationDateSk") + lazy val wpAccessDateSk = invoke[Long]("wpAccessDateSk") + lazy val wpAutogenFlag = invoke[Boolean]("wpAutogenFlag") + lazy val wpCustomerSk = invoke[Long]("wpCustomerSk") + lazy val wpUrl = invoke[String]("wpUrl") + lazy val wpType = invoke[String]("wpType") + lazy val wpCharCount = invoke[Int]("wpCharCount") + lazy val wpLinkCount = invoke[Int]("wpLinkCount") + lazy val wpImageCount = invoke[Int]("wpImageCount") + lazy val wpMaxAdCount = invoke[Int]("wpMaxAdCount") + + def values(row: WebPageRow): Array[Any] = Array( + getOrNullForKey(row, row.getWpPageSk, WP_PAGE_SK), + getOrNull(row, row.getWpPageId, WP_PAGE_ID), + getDateOrNullFromJulianDays(row, row.getWpRecStartDateId, WP_REC_START_DATE_ID), + getDateOrNullFromJulianDays(row, row.getWpRecEndDateId, WP_REC_END_DATE_ID), + getOrNullForKey(row, row.getWpCreationDateSk, WP_CREATION_DATE_SK), + getOrNullForKey(row, row.getWpAccessDateSk, WP_ACCESS_DATE_SK), + getOrNullForBoolean(row, row.isWpAutogenFlag, WP_AUTOGEN_FLAG), + getOrNullForKey(row, row.getWpCustomerSk, WP_CUSTOMER_SK), + getOrNull(row, row.getWpUrl, WP_URL), + getOrNull(row, row.getWpType, WP_TYPE), + getOrNull(row, row.getWpCharCount, WP_CHAR_COUNT), + getOrNull(row, row.getWpLinkCount, WP_LINK_COUNT), + getOrNull(row, row.getWpImageCount, WP_IMAGE_COUNT), + getOrNull(row, row.getWpMaxAdCount, WP_MAX_AD_COUNT)) + } + + implicit class CallCenterRowImplicits(callCenterRow: CallCenterRow) { + def getCcCallCenterSk: Long = CallCenterRowImplicits.ccCallCenterSk.get(callCenterRow) + def getCcCallCenterId: String = CallCenterRowImplicits.ccCallCenterId.get(callCenterRow) + def getCcRecStartDateId: Long = CallCenterRowImplicits.ccRecStartDateId.get(callCenterRow) + def getCcRecEndDateId: Long = CallCenterRowImplicits.ccRecEndDateId.get(callCenterRow) + def getCcClosedDateId: Long = CallCenterRowImplicits.ccClosedDateId.get(callCenterRow) + def getCcOpenDateId: Long = CallCenterRowImplicits.ccOpenDateId.get(callCenterRow) + def getCcName: String = CallCenterRowImplicits.ccName.get(callCenterRow) + def getCcClass: String = CallCenterRowImplicits.ccClass.get(callCenterRow) + def getCcEmployees: Int = CallCenterRowImplicits.ccEmployees.get(callCenterRow) + def getCcSqFt: Int = CallCenterRowImplicits.ccSqFt.get(callCenterRow) + def getCcHours: String = CallCenterRowImplicits.ccHours.get(callCenterRow) + def getCcManager: String = CallCenterRowImplicits.ccManager.get(callCenterRow) + def getCcMarketId: Int = CallCenterRowImplicits.ccMarketId.get(callCenterRow) + def getCcMarketClass: String = CallCenterRowImplicits.ccMarketClass.get(callCenterRow) + def getCcMarketDesc: String = CallCenterRowImplicits.ccMarketDesc.get(callCenterRow) + def getCcMarketManager: String = CallCenterRowImplicits.ccMarketManager.get(callCenterRow) + def getCcDivisionId: Int = CallCenterRowImplicits.ccDivisionId.get(callCenterRow) + def getCcDivisionName: String = CallCenterRowImplicits.ccDivisionName.get(callCenterRow) + def getCcCompany: Int = CallCenterRowImplicits.ccCompany.get(callCenterRow) + def getCcCompanyName: String = CallCenterRowImplicits.ccCompanyName.get(callCenterRow) + def getCcAddress: Address = CallCenterRowImplicits.ccAddress.get(callCenterRow) + def getCcTaxPercentage: TPCDSDecimal = CallCenterRowImplicits.ccTaxPercentage.get(callCenterRow) + } + + object CallCenterRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[CallCenterRow], field) + .buildChecked[T]() + + lazy val ccCallCenterSk = invoke[Long]("ccCallCenterSk") + lazy val ccCallCenterId = invoke[String]("ccCallCenterId") + lazy val ccRecStartDateId = invoke[Long]("ccRecStartDateId") + lazy val ccRecEndDateId = invoke[Long]("ccRecEndDateId") + lazy val ccClosedDateId = invoke[Long]("ccClosedDateId") + lazy val ccOpenDateId = invoke[Long]("ccOpenDateId") + lazy val ccName = invoke[String]("ccName") + lazy val ccClass = invoke[String]("ccClass") + lazy val ccEmployees = invoke[Int]("ccEmployees") + lazy val ccSqFt = invoke[Int]("ccSqFt") + lazy val ccHours = invoke[String]("ccHours") + lazy val ccManager = invoke[String]("ccManager") + lazy val ccMarketId = invoke[Int]("ccMarketId") + lazy val ccMarketClass = invoke[String]("ccMarketClass") + lazy val ccMarketDesc = invoke[String]("ccMarketDesc") + lazy val ccMarketManager = invoke[String]("ccMarketManager") + lazy val ccDivisionId = invoke[Int]("ccDivisionId") + lazy val ccDivisionName = invoke[String]("ccDivisionName") + lazy val ccCompany = invoke[Int]("ccCompany") + lazy val ccCompanyName = invoke[String]("ccCompanyName") + lazy val ccAddress = invoke[Address]("ccAddress") + lazy val ccTaxPercentage = invoke[TPCDSDecimal]("ccTaxPercentage") + + def values(row: CallCenterRow): Array[Any] = Array( + getOrNullForKey(row, row.getCcCallCenterSk, CC_CALL_CENTER_SK), + getOrNull(row, row.getCcCallCenterId, CC_CALL_CENTER_ID), + getDateOrNullFromJulianDays(row, row.getCcRecStartDateId, CC_REC_START_DATE_ID), + getDateOrNullFromJulianDays(row, row.getCcRecEndDateId, CC_REC_END_DATE_ID), + getOrNullForKey(row, row.getCcClosedDateId, CC_CLOSED_DATE_ID), + getOrNullForKey(row, row.getCcOpenDateId, CC_OPEN_DATE_ID), + getOrNull(row, row.getCcName, CC_NAME), + getOrNull(row, row.getCcClass, CC_CLASS), + getOrNull(row, row.getCcEmployees, CC_EMPLOYEES), + getOrNull(row, row.getCcSqFt, CC_SQ_FT), + getOrNull(row, row.getCcHours, CC_HOURS), + getOrNull(row, row.getCcManager, CC_MANAGER), + getOrNull(row, row.getCcMarketId, CC_MARKET_ID), + getOrNull(row, row.getCcMarketClass, CC_MARKET_CLASS), + getOrNull(row, row.getCcMarketDesc, CC_MARKET_DESC), + getOrNull(row, row.getCcMarketManager, CC_MARKET_MANAGER), + getOrNull(row, row.getCcDivisionId, CC_DIVISION), + getOrNull(row, row.getCcDivisionName, CC_DIVISION_NAME), + getOrNull(row, row.getCcCompany, CC_COMPANY), + getOrNull(row, row.getCcCompanyName, CC_COMPANY_NAME), + getOrNull(row, row.getCcAddress.getStreetNumber, CC_STREET_NUMBER), + getOrNull(row, row.getCcAddress.getStreetName, CC_STREET_NAME), + getOrNull(row, row.getCcAddress.getStreetType, CC_STREET_TYPE), + getOrNull(row, row.getCcAddress.getSuiteNumber, CC_SUITE_NUMBER), + getOrNull(row, row.getCcAddress.getCity, CC_CITY), + getOrNull(row, row.getCcAddress.getCounty, CC_ADDRESS), + getOrNull(row, row.getCcAddress.getState, CC_STATE), + getOrNull( + row, + java.lang.String.format("%05d", row.getCcAddress.getZip.asInstanceOf[Object]), + CC_ZIP), + getOrNull(row, row.getCcAddress.getCountry, CC_COUNTRY), + getOrNull(row, row.getCcAddress.getGmtOffset, CC_GMT_OFFSET), + getOrNull(row, row.getCcTaxPercentage, CC_TAX_PERCENTAGE)) + } + + implicit class CustomerAddressRowImplicits(customerAddressRow: CustomerAddressRow) { + def getCaAddrSk: Long = CustomerAddressRowImplicits.caAddrSk.get(customerAddressRow) + def getCaAddrId: String = CustomerAddressRowImplicits.caAddrId.get(customerAddressRow) + def getCaAddress: Address = CustomerAddressRowImplicits.caAddress.get(customerAddressRow) + def getCaLocationType: String = + CustomerAddressRowImplicits.caLocationType.get(customerAddressRow) + } + + object CustomerAddressRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[CustomerAddressRow], field) + .buildChecked[T]() + + lazy val caAddrSk = invoke[Long]("caAddrSk") + lazy val caAddrId = invoke[String]("caAddrId") + lazy val caAddress = invoke[Address]("caAddress") + lazy val caLocationType = invoke[String]("caLocationType") + + def values(row: CustomerAddressRow): Array[Any] = Array( + getOrNullForKey(row, row.getCaAddrSk, CA_ADDRESS_SK), + getOrNull(row, row.getCaAddrId, CA_ADDRESS_ID), + getOrNull(row, row.getCaAddress.getStreetNumber(), CA_ADDRESS_STREET_NUM), + getOrNull(row, row.getCaAddress.getStreetName(), CA_ADDRESS_STREET_NAME), + getOrNull(row, row.getCaAddress.getStreetType(), CA_ADDRESS_STREET_TYPE), + getOrNull(row, row.getCaAddress.getSuiteNumber(), CA_ADDRESS_SUITE_NUM), + getOrNull(row, row.getCaAddress.getCity(), CA_ADDRESS_CITY), + getOrNull(row, row.getCaAddress.getCounty(), CA_ADDRESS_COUNTY), + getOrNull(row, row.getCaAddress.getState(), CA_ADDRESS_STATE), + getOrNull( + row, + java.lang.String.format("%05d", row.getCaAddress.getZip.asInstanceOf[Object]), + CA_ADDRESS_ZIP), + getOrNull(row, row.getCaAddress.getCountry(), CA_ADDRESS_COUNTRY), + getOrNull(row, row.getCaAddress.getGmtOffset(), CA_ADDRESS_GMT_OFFSET), + getOrNull(row, row.getCaLocationType, CA_LOCATION_TYPE)) + } + + implicit class DateDimRowImplicits(dateDimRow: DateDimRow) { + def getDDateSk: Long = DateDimRowImplicits.dDateSk.get(dateDimRow) + def getDDateId: String = DateDimRowImplicits.dDateId.get(dateDimRow) + def getDMonthSeq: Int = DateDimRowImplicits.dMonthSeq.get(dateDimRow) + def getDWeekSeq: Int = DateDimRowImplicits.dWeekSeq.get(dateDimRow) + def getDQuarterSeq: Int = DateDimRowImplicits.dQuarterSeq.get(dateDimRow) + def getDYear: Int = DateDimRowImplicits.dYear.get(dateDimRow) + def getDDow: Int = DateDimRowImplicits.dDow.get(dateDimRow) + def getDMoy: Int = DateDimRowImplicits.dMoy.get(dateDimRow) + def getDDom: Int = DateDimRowImplicits.dDom.get(dateDimRow) + def getDQoy: Int = DateDimRowImplicits.dQoy.get(dateDimRow) + def getDFyYear: Int = DateDimRowImplicits.dFyYear.get(dateDimRow) + def getDFyQuarterSeq: Int = DateDimRowImplicits.dFyQuarterSeq.get(dateDimRow) + def getDFyWeekSeq: Int = DateDimRowImplicits.dFyWeekSeq.get(dateDimRow) + def getDDayName: String = DateDimRowImplicits.dDayName.get(dateDimRow) + def isDHoliday: Boolean = DateDimRowImplicits.dHoliday.get(dateDimRow) + def isDWeekend: Boolean = DateDimRowImplicits.dWeekend.get(dateDimRow) + def isDFollowingHoliday: Boolean = DateDimRowImplicits.dFollowingHoliday.get(dateDimRow) + def getDFirstDom: Int = DateDimRowImplicits.dFirstDom.get(dateDimRow) + def getDLastDom: Int = DateDimRowImplicits.dLastDom.get(dateDimRow) + def getDSameDayLy: Int = DateDimRowImplicits.dSameDayLy.get(dateDimRow) + def getDSameDayLq: Int = DateDimRowImplicits.dSameDayLq.get(dateDimRow) + def isDCurrentDay: Boolean = DateDimRowImplicits.dCurrentDay.get(dateDimRow) + def isDCurrentWeek: Boolean = DateDimRowImplicits.dCurrentWeek.get(dateDimRow) + def isDCurrentMonth: Boolean = DateDimRowImplicits.dCurrentMonth.get(dateDimRow) + def isDCurrentQuarter: Boolean = DateDimRowImplicits.dCurrentQuarter.get(dateDimRow) + def isDCurrentYear: Boolean = DateDimRowImplicits.dCurrentYear.get(dateDimRow) + } + + object DateDimRowImplicits { + def invoke[T](field: String): DynFields.UnboundField[T] = + DynFields.builder() + .hiddenImpl(classOf[DateDimRow], field) + .buildChecked[T]() + + lazy val dDateSk = invoke[Long]("dDateSk") + lazy val dDateId = invoke[String]("dDateId") + lazy val dMonthSeq = invoke[Int]("dMonthSeq") + lazy val dWeekSeq = invoke[Int]("dWeekSeq") + lazy val dQuarterSeq = invoke[Int]("dQuarterSeq") + lazy val dYear = invoke[Int]("dYear") + lazy val dDow = invoke[Int]("dDow") + lazy val dMoy = invoke[Int]("dMoy") + lazy val dDom = invoke[Int]("dDom") + lazy val dQoy = invoke[Int]("dQoy") + lazy val dFyYear = invoke[Int]("dFyYear") + lazy val dFyQuarterSeq = invoke[Int]("dFyQuarterSeq") + lazy val dFyWeekSeq = invoke[Int]("dFyWeekSeq") + lazy val dDayName = invoke[String]("dDayName") + lazy val dHoliday = invoke[Boolean]("dHoliday") + lazy val dWeekend = invoke[Boolean]("dWeekend") + lazy val dFollowingHoliday = invoke[Boolean]("dFollowingHoliday") + lazy val dFirstDom = invoke[Int]("dFirstDom") + lazy val dLastDom = invoke[Int]("dLastDom") + lazy val dSameDayLy = invoke[Int]("dSameDayLy") + lazy val dSameDayLq = invoke[Int]("dSameDayLq") + lazy val dCurrentDay = invoke[Boolean]("dCurrentDay") + lazy val dCurrentWeek = invoke[Boolean]("dCurrentWeek") + lazy val dCurrentMonth = invoke[Boolean]("dCurrentMonth") + lazy val dCurrentQuarter = invoke[Boolean]("dCurrentQuarter") + lazy val dCurrentYear = invoke[Boolean]("dCurrentYear") + + def values(row: DateDimRow): Array[Any] = Array( + getOrNullForKey(row, row.getDDateSk, D_DATE_SK), + getOrNull(row, row.getDDateId, D_DATE_ID), + getDateOrNullFromJulianDays(row, row.getDDateSk, D_DATE_SK), + getOrNull(row, row.getDMonthSeq, D_MONTH_SEQ), + getOrNull(row, row.getDWeekSeq, D_WEEK_SEQ), + getOrNull(row, row.getDQuarterSeq, D_QUARTER_SEQ), + getOrNull(row, row.getDYear, D_YEAR), + getOrNull(row, row.getDDow, D_DOW), + getOrNull(row, row.getDMoy, D_MOY), + getOrNull(row, row.getDDom, D_DOM), + getOrNull(row, row.getDQoy, D_QOY), + getOrNull(row, row.getDFyYear, D_FY_YEAR), + getOrNull(row, row.getDFyQuarterSeq, D_FY_QUARTER_SEQ), + getOrNull(row, row.getDFyWeekSeq, D_FY_WEEK_SEQ), + getOrNull(row, row.getDDayName, D_DAY_NAME), + getOrNull( + row, + java.lang.String.format( + "%4dQ%d", + row.getDYear.asInstanceOf[Object], + row.getDQoy.asInstanceOf[Object]), + D_QUARTER_NAME), + getOrNullForBoolean(row, row.isDHoliday, D_HOLIDAY), + getOrNullForBoolean(row, row.isDWeekend, D_WEEKEND), + getOrNullForBoolean(row, row.isDFollowingHoliday, D_FOLLOWING_HOLIDAY), + getOrNull(row, row.getDFirstDom, D_FIRST_DOM), + getOrNull(row, row.getDLastDom, D_LAST_DOM), + getOrNull(row, row.getDSameDayLy, D_SAME_DAY_LY), + getOrNull(row, row.getDSameDayLq, D_SAME_DAY_LQ), + getOrNullForBoolean(row, row.isDCurrentDay, D_CURRENT_DAY), + getOrNullForBoolean(row, row.isDCurrentWeek, D_CURRENT_WEEK), + getOrNullForBoolean(row, row.isDCurrentMonth, D_CURRENT_MONTH), + getOrNullForBoolean(row, row.isDCurrentQuarter, D_CURRENT_QUARTER), + getOrNullForBoolean(row, row.isDCurrentYear, D_CURRENT_YEAR)) + } + + def getValues: TableRow => Array[Any] = { + case row: StoreRow => StoreRowImplicits.values(row) + case row: ReasonRow => ReasonRowImplicits.values(row) + case row: DbgenVersionRow => DbgenVersionRowImplicits.values(row) + case row: ShipModeRow => ShipModeRowImplicits.values(row) + case row: IncomeBandRow => IncomeBandRowImplicits.values(row) + case row: ItemRow => ItemRowImplicits.values(row) + case row: CustomerDemographicsRow => CustomerDemographicsRowImplicits.values(row) + case row: TimeDimRow => TimeDimRowImplicits.values(row) + case row: WebSiteRow => WebSiteRowImplicits.values(row) + case row: HouseholdDemographicsRow => HouseholdDemographicsRowImplicits.values(row) + case row: PromotionRow => PromotionRowImplicits.values(row) + case row: CatalogPageRow => CatalogPageRowImplicits.values(row) + case row: WebSalesRow => WebSalesRowImplicits.values(row) + case row: StoreSalesRow => StoreSalesRowImplicits.values(row) + case row: InventoryRow => InventoryRowImplicits.values(row) + case row: WebReturnsRow => WebReturnsRowImplicits.values(row) + case row: WarehouseRow => WarehouseRowImplicits.values(row) + case row: CustomerRow => CustomerRowImplicits.values(row) + case row: StoreReturnsRow => StoreReturnsRowImplicits.values(row) + case row: CatalogReturnsRow => CatalogReturnsRowImplicits.values(row) + case row: CatalogSalesRow => CatalogSalesRowImplicits.values(row) + case row: WebPageRow => WebPageRowImplicits.values(row) + case row: CallCenterRow => CallCenterRowImplicits.values(row) + case row: CustomerAddressRow => CustomerAddressRowImplicits.values(row) + case row: DateDimRow => DateDimRowImplicits.values(row) + } +} + +object KyuubiTPCDSTableRowWithNullsUtils { + private lazy val isNullMethod = DynMethods.builder("isNull") + .hiddenImpl( + classOf[TableRowWithNulls], + classOf[GeneratorColumn]) + .build() + + private def isNull( + row: TableRow, + column: GeneratorColumn): Boolean = isNullMethod.invoke[Boolean](row, column) + + def getDateOrNullFromJulianDays( + row: TableRow, + value: Long, + column: GeneratorColumn): Option[Long] = { + if (isNull(row, column) || value < 0) None else Some(value) + } + + def getOrNullForKey(row: TableRow, value: Long, column: GeneratorColumn): Option[Long] = { + if (isNull(row, column) || value == -1) None else Some(value) + } + + def getOrNull[T](row: TableRow, value: T, column: GeneratorColumn): Option[T] = { + if (isNull(row, column)) None else Some(value) + } + + def getOrNullForBoolean( + row: TableRow, + value: Boolean, + column: GeneratorColumn): Option[Boolean] = { + if (isNull(row, column)) None else Some(value) + } +} diff --git a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala index f5c6563e770..0eed970a4cd 100644 --- a/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala +++ b/extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSCatalogSuite.scala @@ -19,6 +19,8 @@ package org.apache.kyuubi.spark.connector.tpcds import org.apache.spark.SparkConf import org.apache.spark.sql.{AnalysisException, SparkSession} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.DataTypes import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.kyuubi.KyuubiFunSuite @@ -77,42 +79,6 @@ class TPCDSCatalogSuite extends KyuubiFunSuite { } } - test("tpcds.tiny count") { - val sparkConf = new SparkConf() - .setMaster("local[*]") - .set("spark.ui.enabled", "false") - .set("spark.sql.catalogImplementation", "in-memory") - .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) - .set("spark.sql.cbo.enabled", "true") - .set("spark.sql.cbo.planStats.enabled", "true") - withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => - assert(spark.table("tpcds.tiny.call_center").count === 2) - assert(spark.table("tpcds.tiny.catalog_page").count === 11718) - assert(spark.table("tpcds.tiny.catalog_returns").count === 8923) - assert(spark.table("tpcds.tiny.catalog_sales").count === 89807) - assert(spark.table("tpcds.tiny.customer").count === 1000) - assert(spark.table("tpcds.tiny.customer_address").count === 1000) - assert(spark.table("tpcds.tiny.customer_demographics").count === 1920800) - assert(spark.table("tpcds.tiny.date_dim").count === 73049) - assert(spark.table("tpcds.tiny.household_demographics").count === 7200) - assert(spark.table("tpcds.tiny.income_band").count === 20) - assert(spark.table("tpcds.tiny.inventory").count === 261261) - assert(spark.table("tpcds.tiny.item").count === 2000) - assert(spark.table("tpcds.tiny.promotion").count === 3) - assert(spark.table("tpcds.tiny.reason").count === 1) - assert(spark.table("tpcds.tiny.ship_mode").count === 20) - assert(spark.table("tpcds.tiny.store").count === 2) - assert(spark.table("tpcds.tiny.store_returns").count === 11925) - assert(spark.table("tpcds.tiny.store_sales").count === 120527) - assert(spark.table("tpcds.tiny.time_dim").count === 86400) - assert(spark.table("tpcds.tiny.warehouse").count === 1) - assert(spark.table("tpcds.tiny.web_page").count === 2) - assert(spark.table("tpcds.tiny.web_returns").count === 1152) - assert(spark.table("tpcds.tiny.web_sales").count === 11876) - assert(spark.table("tpcds.tiny.web_site").count === 2) - } - } - test("tpcds.sf1 stats") { val sparkConf = new SparkConf() .setMaster("local[*]") @@ -174,4 +140,69 @@ class TPCDSCatalogSuite extends KyuubiFunSuite { || exception.message.contains("TABLE_OR_VIEW_NOT_FOUND")) } } + + test("tpcds.tiny count and checksum") { + val sparkConf = new SparkConf() + .setMaster("local[*]") + .set("spark.ui.enabled", "false") + .set("spark.sql.catalogImplementation", "in-memory") + .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) + .set("spark.sql.cbo.enabled", "true") + .set("spark.sql.cbo.planStats.enabled", "true") + withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + tableInfo.foreach { + case (table, (expectCount, expectChecksum)) => + val (count, checksum) = countAndchecksum(spark, table) + assert(count == expectCount) + assert(checksum == expectChecksum, s"table $table") + } + } + } + + def countAndchecksum(spark: SparkSession, tableName: String): (String, String) = { + val df = spark.table(tableName) + val cols = df.schema.map { field => + concat( + when(col(field.name).isNull, lit('\u0000').cast("string")) + .otherwise(col(field.name).cast("string")), + lit('\u0001').cast("string")) + } + + df.select( + crc32(concat(cols: _*)) + .cast(DataTypes.createDecimalType(38, 0)) + .as("row_checksum")) + .agg( + count("*").cast("string").as("count"), + sum("row_checksum").cast("string").as("checksum")) + .collect() + .map(r => (r.getString(0), r.getString(1))) + .head + } + + private val tableInfo = Seq( + ("tpcds.tiny.call_center", ("2", "4584365911")), + ("tpcds.tiny.catalog_page", ("11718", "25416854987711")), + ("tpcds.tiny.catalog_returns", ("8923", "19045021547122")), + ("tpcds.tiny.catalog_sales", ("89807", "192355655243815")), + ("tpcds.tiny.customer", ("1000", "2120827330356")), + ("tpcds.tiny.customer_address", ("1000", "2161077976693")), + ("tpcds.tiny.customer_demographics", ("1920800", "4124183189708148")), + ("tpcds.tiny.date_dim", ("73049", "156926081012862")), + ("tpcds.tiny.household_demographics", ("7200", "15494873325812")), + ("tpcds.tiny.income_band", ("20", "41180951007")), + ("tpcds.tiny.inventory", ("261261", "561290989772724")), + ("tpcds.tiny.item", ("2000", "4254103006936")), + ("tpcds.tiny.promotion", ("3", "4984911899")), + ("tpcds.tiny.reason", ("1", "365440741")), + ("tpcds.tiny.ship_mode", ("20", "52349078860")), + ("tpcds.tiny.store", ("2", "2964682289")), + ("tpcds.tiny.store_returns", ("11925", "25400972943896")), + ("tpcds.tiny.store_sales", ("120527", "259296406856838")), + ("tpcds.tiny.time_dim", ("86400", "186045071019485")), + ("tpcds.tiny.warehouse", ("1", "2956768503")), + ("tpcds.tiny.web_page", ("2", "3215766118")), + ("tpcds.tiny.web_returns", ("1152", "2464383243098")), + ("tpcds.tiny.web_sales", ("11876", "25458905770096")), + ("tpcds.tiny.web_site", ("2", "3798438288"))) } diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q1.output.hash b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q1.output.hash index 2d33a627d0b..74f1f32ede1 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q1.output.hash +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q1.output.hash @@ -15,4 +15,4 @@ * limitations under the License. */ --2130215201 +-1796738616 diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q1.sql b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q1.sql index 5031eb86c10..b9b382350a2 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q1.sql +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q1.sql @@ -19,13 +19,13 @@ select l_returnflag, l_linestatus, - sum(l_quantity) as sum_qty, - sum(l_extendedprice) as sum_base_price, - sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, - sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, - avg(l_quantity) as avg_qty, - avg(l_extendedprice) as avg_price, - avg(l_discount) as avg_disc, + round(sum(l_quantity), 2) as sum_qty, + round(sum(l_extendedprice), 2) as sum_base_price, + round(sum(l_extendedprice * (1 - l_discount)), 2) as sum_disc_price, + round(sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)), 2) as sum_charge, + round(avg(l_quantity), 2) as avg_qty, + round(avg(l_extendedprice), 2) as avg_price, + round(avg(l_discount), 2) as avg_disc, count(*) as count_order from lineitem diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q10.output.hash b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q10.output.hash index 4a6454e519c..7b922f50772 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q10.output.hash +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q10.output.hash @@ -15,4 +15,4 @@ * limitations under the License. */ --4090660469 +-730770831 diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q10.sql b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q10.sql index 87854a9ad60..89dd5247a1d 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q10.sql +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q10.sql @@ -19,7 +19,7 @@ select c_custkey, c_name, - sum(l_extendedprice * (1 - l_discount)) as revenue, + round(sum(l_extendedprice * (1 - l_discount)), 1) as revenue, c_acctbal, n_name, c_address, diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q14.output.hash b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q14.output.hash index 1b8cf626c59..e0595360402 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q14.output.hash +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q14.output.hash @@ -15,4 +15,4 @@ * limitations under the License. */ -47333415 +799857942 diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q14.sql b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q14.sql index 4c5c485fe30..4aa7d13cab8 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q14.sql +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q14.sql @@ -17,11 +17,11 @@ -- using default substitutions select - 100.00 * sum(case + round(100.00 * sum(case when p_type like 'PROMO%' then l_extendedprice * (1 - l_discount) else 0 - end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue + end) / sum(l_extendedprice * (1 - l_discount)), 2) as promo_revenue from lineitem, part diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q15.output.hash b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q15.output.hash index 8dc6c1f42b0..a29b0607dfc 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q15.output.hash +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q15.output.hash @@ -15,4 +15,4 @@ * limitations under the License. */ --2021679095 +-1401614325 diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q15.sql b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q15.sql index 52519586db1..11b9815fedc 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q15.sql +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q15.sql @@ -34,7 +34,7 @@ select s_name, s_address, s_phone, - total_revenue + round(total_revenue, 2) as total_revenue from supplier, revenue0 diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q22.output.hash b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q22.output.hash index d931acdadfa..1d53348d252 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q22.output.hash +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q22.output.hash @@ -15,4 +15,4 @@ * limitations under the License. */ -2111900859 +2123615405 diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q22.sql b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q22.sql index a9dea75f786..d504a839758 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q22.sql +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q22.sql @@ -19,7 +19,7 @@ select cntrycode, count(*) as numcust, - sum(c_acctbal) as totacctbal + round(sum(c_acctbal), 2) as totacctbal from ( select diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q5.output.hash b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q5.output.hash index d608f5fdd77..5320ce21257 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q5.output.hash +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q5.output.hash @@ -15,4 +15,4 @@ * limitations under the License. */ -3717321142 +-2755325540 diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q5.sql b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q5.sql index fc998961c3b..847f372cce5 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q5.sql +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q5.sql @@ -18,7 +18,7 @@ select n_name, - sum(l_extendedprice * (1 - l_discount)) as revenue + round(sum(l_extendedprice * (1 - l_discount)), 2) as revenue from customer, orders, diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q6.output.hash b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q6.output.hash index c9efccd9e0c..fa86974aa11 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q6.output.hash +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q6.output.hash @@ -15,4 +15,4 @@ * limitations under the License. */ -2062248569 +223845550 diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q6.sql b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q6.sql index efd8a50110e..3f9a7209446 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q6.sql +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q6.sql @@ -17,7 +17,7 @@ -- using default substitutions select - sum(l_extendedprice * l_discount) as revenue + round(sum(l_extendedprice * l_discount), 2) as revenue from lineitem where diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q7.output.hash b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q7.output.hash index 02a826f090c..29d09404524 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q7.output.hash +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q7.output.hash @@ -15,4 +15,4 @@ * limitations under the License. */ --1955579146 +-95255706 diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q7.sql b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q7.sql index 3d932e660b5..7c565e6cdf6 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q7.sql +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q7.sql @@ -20,7 +20,7 @@ select supp_nation, cust_nation, l_year, - sum(volume) as revenue + round(sum(volume), 2) as revenue from ( select diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q9.output.hash b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q9.output.hash index 9f13f120a53..561cdadbd5f 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q9.output.hash +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q9.output.hash @@ -15,4 +15,4 @@ * limitations under the License. */ -10861514367 +-12715678387 diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q9.sql b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q9.sql index a5f2d89cb23..8ff49a38a5b 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q9.sql +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/main/resources/kyuubi/tpch/q9.sql @@ -19,7 +19,7 @@ select nation, o_year, - sum(amount) as sum_profit + round(sum(amount), 2) as sum_profit from ( select diff --git a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHQuerySuite.scala b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHQuerySuite.scala index a409a5fe927..c651d930043 100644 --- a/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHQuerySuite.scala +++ b/extensions/spark/kyuubi-spark-connector-tpch/src/test/scala/org/apache/kyuubi/spark/connector/tpch/TPCHQuerySuite.scala @@ -31,18 +31,18 @@ import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSessi /** * To run this test suite: * {{{ - * KYUUBI_UPDATE=0 dev/gen/gen_tpcdh_queries.sh + * KYUUBI_UPDATE=0 dev/gen/gen_tpch_queries.sh * }}} * * To re-generate golden files for this suite: * {{{ - * dev/gen/gen_tpcdh_queries.sh + * dev/gen/gen_tpch_queries.sh * }}} */ @Slow class TPCHQuerySuite extends KyuubiFunSuite { - val queries: Set[String] = (1 to 22).map(i => s"q$i").toSet + val queries: List[String] = (1 to 22).map(i => s"q$i").toList test("run query on tiny") { val viewSuffix = "view" @@ -59,20 +59,15 @@ class TPCHQuerySuite extends KyuubiFunSuite { in.close() queryName -> queryContent }.foreach { case (name, sql) => - try { - val result = spark.sql(sql).collect() - val schema = spark.sql(sql).schema - val schemaDDL = LICENSE_HEADER + schema.toDDL + "\n" - spark.createDataFrame(result.toList.asJava, schema).createTempView(s"$name$viewSuffix") - val sumHashResult = LICENSE_HEADER + spark.sql( - s"select sum(hash(*)) from $name$viewSuffix").collect().head.get(0) + "\n" - val tuple = generateGoldenFiles("kyuubi/tpch", name, schemaDDL, sumHashResult) - assert(schemaDDL == tuple._1) - assert(sumHashResult == tuple._2) - } catch { - case cause: Throwable => - fail(name, cause) - } + val result = spark.sql(sql).collect() + val schema = spark.sql(sql).schema + val schemaDDL = LICENSE_HEADER + schema.toDDL + "\n" + spark.createDataFrame(result.toList.asJava, schema).createTempView(s"$name$viewSuffix") + val sumHashResult = LICENSE_HEADER + spark.sql( + s"select sum(hash(*)) from $name$viewSuffix").collect().head.get(0) + "\n" + val tuple = generateGoldenFiles("kyuubi/tpch", name, schemaDDL, sumHashResult) + assert(schemaDDL == tuple._1, s"query $name schema not match") + assert(sumHashResult == tuple._2, s"query $name result not match") } } } diff --git a/extensions/spark/kyuubi-spark-lineage/src/test/scala/org/apache/kyuubi/plugin/lineage/helper/SparkSQLLineageParserHelperSuite.scala b/extensions/spark/kyuubi-spark-lineage/src/test/scala/org/apache/kyuubi/plugin/lineage/helper/SparkSQLLineageParserHelperSuite.scala index 3c19163db42..76127437983 100644 --- a/extensions/spark/kyuubi-spark-lineage/src/test/scala/org/apache/kyuubi/plugin/lineage/helper/SparkSQLLineageParserHelperSuite.scala +++ b/extensions/spark/kyuubi-spark-lineage/src/test/scala/org/apache/kyuubi/plugin/lineage/helper/SparkSQLLineageParserHelperSuite.scala @@ -472,32 +472,36 @@ class SparkSQLLineageParserHelperSuite extends KyuubiFunSuite val tableDirectory = getClass.getResource("/").getPath + "table_directory" val directory = File(tableDirectory).createDirectory() val ret0 = extractLineage(s""" - |INSERT OVERWRITE DIRECTORY '$directory.path' + |INSERT OVERWRITE DIRECTORY '${directory.path}' |USING parquet |SELECT * FROM test_db0.test_table_part0""".stripMargin) assert(ret0 == Lineage( List(s"$DEFAULT_CATALOG.test_db0.test_table_part0"), - List(s"""`$directory.path`"""), + List(s"""`${directory.path}`"""), List( - (s"""`$directory.path`.key""", Set(s"$DEFAULT_CATALOG.test_db0.test_table_part0.key")), - (s"""`$directory.path`.value""", Set(s"$DEFAULT_CATALOG.test_db0.test_table_part0.value")), - (s"""`$directory.path`.pid""", Set(s"$DEFAULT_CATALOG.test_db0.test_table_part0.pid"))))) + (s"""`${directory.path}`.key""", Set(s"$DEFAULT_CATALOG.test_db0.test_table_part0.key")), + ( + s"""`${directory.path}`.value""", + Set(s"$DEFAULT_CATALOG.test_db0.test_table_part0.value")), + (s"""`${directory.path}`.pid""", Set(s"$DEFAULT_CATALOG.test_db0.test_table_part0.pid"))))) } test("columns lineage extract - InsertIntoHiveDirCommand") { val tableDirectory = getClass.getResource("/").getPath + "table_directory" val directory = File(tableDirectory).createDirectory() val ret0 = extractLineage(s""" - |INSERT OVERWRITE DIRECTORY '$directory.path' + |INSERT OVERWRITE DIRECTORY '${directory.path}' |USING parquet |SELECT * FROM test_db0.test_table_part0""".stripMargin) assert(ret0 == Lineage( List(s"$DEFAULT_CATALOG.test_db0.test_table_part0"), - List(s"""`$directory.path`"""), + List(s"""`${directory.path}`"""), List( - (s"""`$directory.path`.key""", Set(s"$DEFAULT_CATALOG.test_db0.test_table_part0.key")), - (s"""`$directory.path`.value""", Set(s"$DEFAULT_CATALOG.test_db0.test_table_part0.value")), - (s"""`$directory.path`.pid""", Set(s"$DEFAULT_CATALOG.test_db0.test_table_part0.pid"))))) + (s"""`${directory.path}`.key""", Set(s"$DEFAULT_CATALOG.test_db0.test_table_part0.key")), + ( + s"""`${directory.path}`.value""", + Set(s"$DEFAULT_CATALOG.test_db0.test_table_part0.value")), + (s"""`${directory.path}`.pid""", Set(s"$DEFAULT_CATALOG.test_db0.test_table_part0.pid"))))) } test("columns lineage extract - InsertIntoHiveTable") { diff --git a/externals/kyuubi-chat-engine/pom.xml b/externals/kyuubi-chat-engine/pom.xml index 3639ceed329..0633143b9fa 100644 --- a/externals/kyuubi-chat-engine/pom.xml +++ b/externals/kyuubi-chat-engine/pom.xml @@ -65,6 +65,11 @@ test + + com.squareup.retrofit2 + converter-jackson + ${retrofit.version} + diff --git a/externals/kyuubi-chat-engine/src/main/java/org/apache/kyuubi/engine/chat/ernie/enums/ChatMessageRole.java b/externals/kyuubi-chat-engine/src/main/java/org/apache/kyuubi/engine/chat/ernie/enums/ChatMessageRole.java new file mode 100644 index 00000000000..8c8921fbdf0 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/java/org/apache/kyuubi/engine/chat/ernie/enums/ChatMessageRole.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.ernie.enums; + +public enum ChatMessageRole { + FUNCTION("function"), + + USER("user"), + + ASSISTANT("assistant"); + + private final String value; + + private ChatMessageRole(String value) { + this.value = value; + } + + public String value() { + return this.value; + } + + @Override + public String toString() { + return "ChatMessageRole{" + "value='" + value + '\'' + '}'; + } +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/api/ApiHttpException.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/api/ApiHttpException.scala new file mode 100644 index 00000000000..ba54f97c840 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/api/ApiHttpException.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.api + +class ApiHttpException(statusCode: Int, message: String, exception: Exception) + extends Exception(message, exception) {} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/api/ErnieBotApi.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/api/ErnieBotApi.scala new file mode 100644 index 00000000000..8593f65e51a --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/api/ErnieBotApi.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.api + +import io.reactivex.Single +import retrofit2.http.{Body, Path, POST, Query} + +import org.apache.kyuubi.engine.chat.ernie.bean.{ChatCompletionRequest, ChatCompletionResult} + +trait ErnieBotApi { + @POST("/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/{model}") + def createChatCompletion( + @Path("model") model: String, + @Query("access_token") accessToken: String, + @Body chatCompletionRequest: ChatCompletionRequest): Single[ChatCompletionResult] +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/ChatCompletionRequest.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/ChatCompletionRequest.scala new file mode 100644 index 00000000000..6cc3a6706bf --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/ChatCompletionRequest.scala @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.ernie.bean + +import java.lang.{Double => JDouble} +import java.util.{List => JList} + +import com.fasterxml.jackson.annotation.JsonProperty + +case class ChatCompletionRequest( + @JsonProperty("messages") messages: JList[ChatMessage], + @JsonProperty("functions") functions: JList[Function] = null, + @JsonProperty("temperature") temperature: JDouble = null, + @JsonProperty("top_p") topP: JDouble = null, + @JsonProperty("penalty_score") presenceScore: JDouble = null, + @JsonProperty("stream") stream: Boolean = false, + @JsonProperty("system") system: String = null, + @JsonProperty("stop") stop: JList[String] = null, + @JsonProperty("disable_search") disableSearch: Boolean = false, + @JsonProperty("enable_citation") enableCitation: Boolean = false, + @JsonProperty("user_id") userId: String = null) diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/ChatCompletionResult.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/ChatCompletionResult.scala new file mode 100644 index 00000000000..e029882c5e4 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/ChatCompletionResult.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.ernie.bean + +import java.lang.{Long => JLong} + +import com.fasterxml.jackson.annotation.JsonProperty + +case class ChatCompletionResult( + @JsonProperty("id") id: String, + @JsonProperty("object") obj: String, + @JsonProperty("created") created: JLong, + @JsonProperty("sentence_id") sentenceId: JLong, + @JsonProperty("is_end") isEnd: Boolean, + @JsonProperty("is_truncated") isTruncated: Boolean, + @JsonProperty("finish_reason") finishReason: String, + @JsonProperty("search_info") searchInfo: SearchInfo, + @JsonProperty("result") result: String, + @JsonProperty("need_clear_history") needClearHistory: Boolean, + @JsonProperty("ban_round") banRound: JLong, + @JsonProperty("usage") usage: Usage, + @JsonProperty("function_call") functionCall: FunctionCall, + @JsonProperty("error_msg") errorMsg: String, + @JsonProperty("error_code") errorCode: JLong) diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/ChatMessage.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/ChatMessage.scala new file mode 100644 index 00000000000..d2b33222d94 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/ChatMessage.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.ernie.bean + +import com.fasterxml.jackson.annotation.JsonProperty + +case class ChatMessage( + @JsonProperty("role") role: String, + @JsonProperty("content") content: String, + @JsonProperty("name") name: String, + @JsonProperty("function_call") functionCall: FunctionCall = null) diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/Example.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/Example.scala new file mode 100644 index 00000000000..fe25383dec4 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/Example.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.ernie.bean + +import com.fasterxml.jackson.annotation.JsonProperty + +case class Example( + @JsonProperty("role") role: String, + @JsonProperty("name") name: String, + @JsonProperty("content") content: String = null, + @JsonProperty("function_call") functionCall: FunctionCall = null) diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/Function.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/Function.scala new file mode 100644 index 00000000000..b0ad975bfa1 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/Function.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.ernie.bean + +import java.util.{List => JList} + +import com.fasterxml.jackson.annotation.JsonProperty + +case class Function( + @JsonProperty("name") name: String, + @JsonProperty("description") description: String, + @JsonProperty("parameters") parameters: Object, + @JsonProperty("responses") responses: Object = null, + @JsonProperty("examples") examples: JList[Example] = null) diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/FunctionCall.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/FunctionCall.scala new file mode 100644 index 00000000000..a6b66d78f8b --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/FunctionCall.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.ernie.bean + +import com.fasterxml.jackson.annotation.JsonProperty + +case class FunctionCall( + @JsonProperty("name") name: String, + @JsonProperty("thoughts") thoughts: String, + @JsonProperty("arguments") arguments: String = null) diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/PluginUsage.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/PluginUsage.scala new file mode 100644 index 00000000000..dd406c775a8 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/PluginUsage.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.ernie.bean + +import java.lang.{Long => JLong} + +import com.fasterxml.jackson.annotation.JsonProperty + +case class PluginUsage( + @JsonProperty("name") name: String, + @JsonProperty("parse_tokens") parseTokens: JLong, + @JsonProperty("abstract_tokens") abstractTokens: JLong, + @JsonProperty("search_tokens") searchTokens: JLong, + @JsonProperty("total_tokens") totalTokens: JLong) diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/SearchInfo.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/SearchInfo.scala new file mode 100644 index 00000000000..f97aa6c5863 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/SearchInfo.scala @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.ernie.bean + +import java.lang.{Long => JLong} +import java.util.{List => JList} + +import com.fasterxml.jackson.annotation.JsonProperty + +case class SearchInfo( + @JsonProperty("is_beset") isBeset: JLong, + @JsonProperty("rewrite_query") rewriteQuery: String, + @JsonProperty("search_results") searchResults: JList[SearchResult]) diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/SearchResult.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/SearchResult.scala new file mode 100644 index 00000000000..76b02be9243 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/SearchResult.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.ernie.bean + +import com.fasterxml.jackson.annotation.JsonProperty + +case class SearchResult( + @JsonProperty("index") index: java.lang.Long, + @JsonProperty("url") url: String, + @JsonProperty("title") title: String, + @JsonProperty("datasource_id") datasourceId: String) diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/Usage.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/Usage.scala new file mode 100644 index 00000000000..4696943be6b --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/bean/Usage.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.ernie.bean + +import java.lang.{Long => JLong} +import java.util.{List => JList} + +import com.fasterxml.jackson.annotation.JsonProperty + +case class Usage( + @JsonProperty("prompt_tokens") promptTokens: JLong, + @JsonProperty("completion_tokens") completionTokens: JLong, + @JsonProperty("total_tokens") totalTokens: JLong, + @JsonProperty("plugins") plugins: JList[PluginUsage]) diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/service/ErnieBotService.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/service/ErnieBotService.scala new file mode 100644 index 00000000000..61f56421abe --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/ernie/service/ErnieBotService.scala @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.ernie.service + +import java.io.IOException +import java.time.Duration +import java.util.concurrent.TimeUnit + +import com.fasterxml.jackson.annotation.JsonInclude +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper, PropertyNamingStrategy} +import io.reactivex.Single +import okhttp3.{ConnectionPool, OkHttpClient} +import retrofit2.{HttpException, Retrofit} +import retrofit2.adapter.rxjava2.RxJava2CallAdapterFactory +import retrofit2.converter.jackson.JacksonConverterFactory + +import org.apache.kyuubi.engine.chat.api.{ApiHttpException, ErnieBotApi} +import org.apache.kyuubi.engine.chat.ernie.bean.{ChatCompletionRequest, ChatCompletionResult} + +class ErnieBotService(api: ErnieBotApi) { + + def execute[T](apiCall: Single[T]): T = { + try apiCall.blockingGet + catch { + case httpException: HttpException => + try if (httpException.response != null && httpException.response.errorBody != null) { + val errorBody: String = httpException.response.errorBody.string + val statusCode: Int = httpException.response.code + throw new ApiHttpException(statusCode, errorBody, httpException) + } else { + throw httpException + } + catch { + case ioException: IOException => + throw httpException + } + } + } + + def createChatCompletion( + request: ChatCompletionRequest, + model: String, + accessToken: String): ChatCompletionResult = { + execute(this.api.createChatCompletion(model, accessToken, request)) + } +} + +object ErnieBotService { + final private val BASE_URL = "https://aip.baidubce.com/" + + def apply(api: ErnieBotApi): ErnieBotService = new ErnieBotService(api) + + def defaultObjectMapper: ObjectMapper = { + val mapper: ObjectMapper = new ObjectMapper + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL) + mapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE) + mapper + } + + def defaultClient(timeout: Duration): OkHttpClient = { + new OkHttpClient.Builder() + .connectionPool(new ConnectionPool(5, 1, TimeUnit.SECONDS)) + .readTimeout(timeout.toMillis, TimeUnit.MILLISECONDS) + .build + } + + def defaultRetrofit(client: OkHttpClient, mapper: ObjectMapper): Retrofit = { + new Retrofit.Builder().baseUrl(BASE_URL).client(client) + .addConverterFactory(JacksonConverterFactory.create(mapper)) + .addCallAdapterFactory(RxJava2CallAdapterFactory.create) + .build + } + +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/operation/ChatOperation.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/operation/ChatOperation.scala index b0b1806f80c..60f15ea6534 100644 --- a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/operation/ChatOperation.scala +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/operation/ChatOperation.scala @@ -16,14 +16,14 @@ */ package org.apache.kyuubi.engine.chat.operation -import org.apache.hive.service.rpc.thrift._ - import org.apache.kyuubi.{KyuubiSQLException, Utils} import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.engine.chat.schema.{RowSet, SchemaHelper} +import org.apache.kyuubi.engine.chat.schema.{ChatTRowSetGenerator, SchemaHelper} +import org.apache.kyuubi.engine.chat.schema.ChatTRowSetGenerator.COL_STRING_TYPE import org.apache.kyuubi.operation.{AbstractOperation, FetchIterator, OperationState} import org.apache.kyuubi.operation.FetchOrientation.{FETCH_FIRST, FETCH_NEXT, FETCH_PRIOR, FetchOrientation} import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ abstract class ChatOperation(session: Session) extends AbstractOperation(session) { @@ -46,8 +46,11 @@ abstract class ChatOperation(session: Session) extends AbstractOperation(session iter.fetchAbsolute(0) } - val taken = iter.take(rowSetSize) - val resultRowSet = RowSet.toTRowSet(taken.toSeq, 1, getProtocolVersion) + val taken = iter.take(rowSetSize).map(_.toSeq) + val resultRowSet = new ChatTRowSetGenerator().toTRowSet( + taken.toSeq, + Seq(COL_STRING_TYPE), + getProtocolVersion) resultRowSet.setStartRowOffset(iter.getPosition) val resp = new TFetchResultsResp(OK_STATUS) resp.setResults(resultRowSet) diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/ErnieBotProvider.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/ErnieBotProvider.scala new file mode 100644 index 00000000000..967ea333223 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/provider/ErnieBotProvider.scala @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.provider + +import java.net.{InetSocketAddress, Proxy, URL} +import java.time.Duration +import java.util +import java.util.concurrent.TimeUnit + +import scala.collection.JavaConverters._ + +import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache} +import com.theokanning.openai.service.OpenAiService.defaultObjectMapper + +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.engine.chat.api.ErnieBotApi +import org.apache.kyuubi.engine.chat.ernie.bean.{ChatCompletionRequest, ChatMessage} +import org.apache.kyuubi.engine.chat.ernie.enums.ChatMessageRole +import org.apache.kyuubi.engine.chat.ernie.service.ErnieBotService +import org.apache.kyuubi.engine.chat.ernie.service.ErnieBotService.{defaultClient, defaultRetrofit} + +class ErnieBotProvider(conf: KyuubiConf) extends ChatProvider { + + private val accessToken = conf.get(KyuubiConf.ENGINE_ERNIE_BOT_ACCESS_TOKEN).getOrElse { + throw new IllegalArgumentException( + s"'${KyuubiConf.ENGINE_ERNIE_BOT_ACCESS_TOKEN.key}' must be configured, " + + s"which could be got at https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Ilkkrb0i5") + } + + private val model = conf.get(KyuubiConf.ENGINE_ERNIE_BOT_MODEL) + + private val ernieBotService: ErnieBotService = { + val builder = defaultClient( + Duration.ofMillis(conf.get(KyuubiConf.ENGINE_ERNIE_HTTP_SOCKET_TIMEOUT))) + .newBuilder + .connectTimeout(Duration.ofMillis(conf.get(KyuubiConf.ENGINE_ERNIE_HTTP_CONNECT_TIMEOUT))) + + conf.get(KyuubiConf.ENGINE_CHAT_GPT_HTTP_PROXY) match { + case Some(httpProxyUrl) => + val url = new URL(httpProxyUrl) + val proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(url.getHost, url.getPort)) + builder.proxy(proxy) + case _ => + } + + val retrofit = defaultRetrofit(builder.build(), defaultObjectMapper) + val ernieBotApi = retrofit.create(classOf[ErnieBotApi]) + new ErnieBotService(ernieBotApi) + } + + private var sessionUser: Option[String] = None + + private val chatHistory: LoadingCache[String, util.ArrayDeque[ChatMessage]] = + CacheBuilder.newBuilder() + .expireAfterWrite(10, TimeUnit.MINUTES) + .build(new CacheLoader[String, util.ArrayDeque[ChatMessage]] { + override def load(sessionId: String): util.ArrayDeque[ChatMessage] = + new util.ArrayDeque[ChatMessage] + }) + + override def open(sessionId: String, user: Option[String]): Unit = { + sessionUser = user + chatHistory.getIfPresent(sessionId) + } + + override def ask(sessionId: String, q: String): String = { + val messages = chatHistory.get(sessionId) + try { + messages.addLast(ChatMessage(ChatMessageRole.USER.value(), q, null)) + val completionRequest = ChatCompletionRequest( + messages = messages.asScala.toList.asJava, + userId = sessionUser.orNull) + val chatCompletionResult = ernieBotService + .createChatCompletion(completionRequest, model, accessToken) + if (chatCompletionResult.errorMsg != null) { + throw new RuntimeException(chatCompletionResult.errorMsg) + } + val responseText = chatCompletionResult.result + responseText + } catch { + case e: Throwable => + messages.removeLast() + s"Chat failed. Error: ${e.getMessage}" + } + } + + override def close(sessionId: String): Unit = { + chatHistory.invalidate(sessionId) + } +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/ChatTRowSetGenerator.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/ChatTRowSetGenerator.scala new file mode 100644 index 00000000000..7e6a121bef7 --- /dev/null +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/ChatTRowSetGenerator.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.chat.schema + +import org.apache.kyuubi.engine.chat.schema.ChatTRowSetGenerator._ +import org.apache.kyuubi.engine.result.TRowSetGenerator +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ + +class ChatTRowSetGenerator + extends TRowSetGenerator[Seq[String], Seq[String], String] { + + override def getColumnSizeFromSchemaType(schema: Seq[String]): Int = schema.length + + override def getColumnType(schema: Seq[String], ordinal: Int): String = COL_STRING_TYPE + + override def isColumnNullAt(row: Seq[String], ordinal: Int): Boolean = row(ordinal) == null + + override def getColumnAs[T](row: Seq[String], ordinal: Int): T = row(ordinal).asInstanceOf[T] + + override def toTColumn(rows: Seq[Seq[String]], ordinal: Int, typ: String): TColumn = + typ match { + case COL_STRING_TYPE => asStringTColumn(rows, ordinal) + case otherType => throw new UnsupportedOperationException(s"type $otherType") + } + + override def toTColumnValue(row: Seq[String], ordinal: Int, types: Seq[String]): TColumnValue = + getColumnType(types, ordinal) match { + case COL_STRING_TYPE => asStringTColumnValue(row, ordinal) + case otherType => throw new UnsupportedOperationException(s"type $otherType") + } +} + +object ChatTRowSetGenerator { + val COL_STRING_TYPE: String = classOf[String].getSimpleName +} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/RowSet.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/RowSet.scala deleted file mode 100644 index 3bb4ba7dfa9..00000000000 --- a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/RowSet.scala +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kyuubi.engine.chat.schema - -import java.util - -import org.apache.hive.service.rpc.thrift._ - -import org.apache.kyuubi.util.RowSetUtils._ - -object RowSet { - - def emptyTRowSet(): TRowSet = { - new TRowSet(0, new java.util.ArrayList[TRow](0)) - } - - def toTRowSet( - rows: Seq[Array[String]], - columnSize: Int, - protocolVersion: TProtocolVersion): TRowSet = { - if (protocolVersion.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) { - toRowBasedSet(rows, columnSize) - } else { - toColumnBasedSet(rows, columnSize) - } - } - - def toRowBasedSet(rows: Seq[Array[String]], columnSize: Int): TRowSet = { - val rowSize = rows.length - val tRows = new java.util.ArrayList[TRow](rowSize) - var i = 0 - while (i < rowSize) { - val row = rows(i) - val tRow = new TRow() - var j = 0 - val columnSize = row.length - while (j < columnSize) { - val columnValue = stringTColumnValue(j, row) - tRow.addToColVals(columnValue) - j += 1 - } - i += 1 - tRows.add(tRow) - } - new TRowSet(0, tRows) - } - - def toColumnBasedSet(rows: Seq[Array[String]], columnSize: Int): TRowSet = { - val rowSize = rows.length - val tRowSet = new TRowSet(0, new util.ArrayList[TRow](rowSize)) - var i = 0 - while (i < columnSize) { - val tColumn = toTColumn(rows, i) - tRowSet.addToColumns(tColumn) - i += 1 - } - tRowSet - } - - private def toTColumn(rows: Seq[Array[String]], ordinal: Int): TColumn = { - val nulls = new java.util.BitSet() - val values = getOrSetAsNull[String](rows, ordinal, nulls, "") - TColumn.stringVal(new TStringColumn(values, nulls)) - } - - private def getOrSetAsNull[String]( - rows: Seq[Array[String]], - ordinal: Int, - nulls: util.BitSet, - defaultVal: String): util.List[String] = { - val size = rows.length - val ret = new util.ArrayList[String](size) - var idx = 0 - while (idx < size) { - val row = rows(idx) - val isNull = row(ordinal) == null - if (isNull) { - nulls.set(idx, true) - ret.add(idx, defaultVal) - } else { - ret.add(idx, row(ordinal)) - } - idx += 1 - } - ret - } - - private def stringTColumnValue(ordinal: Int, row: Array[String]): TColumnValue = { - val tStringValue = new TStringValue - if (row(ordinal) != null) tStringValue.setValue(row(ordinal)) - TColumnValue.stringVal(tStringValue) - } -} diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/SchemaHelper.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/SchemaHelper.scala index 8ccfdda2fe9..2b380f3845d 100644 --- a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/SchemaHelper.scala +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/schema/SchemaHelper.scala @@ -19,7 +19,7 @@ package org.apache.kyuubi.engine.chat.schema import java.util.Collections -import org.apache.hive.service.rpc.thrift._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ object SchemaHelper { diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionImpl.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionImpl.scala index 6ec6d062600..0d836877445 100644 --- a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionImpl.scala +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionImpl.scala @@ -16,11 +16,10 @@ */ package org.apache.kyuubi.engine.chat.session -import org.apache.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} - import org.apache.kyuubi.{KYUUBI_VERSION, KyuubiSQLException} import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.session.{AbstractSession, SessionHandle, SessionManager} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} class ChatSessionImpl( protocol: TProtocolVersion, diff --git a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionManager.scala b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionManager.scala index 33a9dd45066..ff5c4748e34 100644 --- a/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionManager.scala +++ b/externals/kyuubi-chat-engine/src/main/scala/org/apache/kyuubi/engine/chat/session/ChatSessionManager.scala @@ -16,8 +16,6 @@ */ package org.apache.kyuubi.engine.chat.session -import org.apache.hive.service.rpc.thrift.TProtocolVersion - import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.ENGINE_SHARE_LEVEL import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY @@ -27,6 +25,7 @@ import org.apache.kyuubi.engine.chat.operation.ChatOperationManager import org.apache.kyuubi.engine.chat.provider.ChatProvider import org.apache.kyuubi.operation.OperationManager import org.apache.kyuubi.session.{Session, SessionHandle, SessionManager} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion class ChatSessionManager(name: String) extends SessionManager(name) { diff --git a/externals/kyuubi-flink-sql-engine/pom.xml b/externals/kyuubi-flink-sql-engine/pom.xml index eec5c1cd9e8..d01f05fed7d 100644 --- a/externals/kyuubi-flink-sql-engine/pom.xml +++ b/externals/kyuubi-flink-sql-engine/pom.xml @@ -105,6 +105,12 @@ provided + + org.apache.flink + flink-table-planner-loader + provided + + org.apache.kyuubi @@ -180,8 +186,6 @@ com.google.guava:guava commons-codec:commons-codec org.apache.commons:commons-lang3 - org.apache.hive:hive-service-rpc - org.apache.thrift:* org.apache.kyuubi:* @@ -233,27 +237,6 @@ org.apache.commons.lang ${kyuubi.shade.packageName}.org.apache.commons.lang - - org.apache.hive.service.rpc.thrift - ${kyuubi.shade.packageName}.org.apache.hive.service.rpc.thrift - - org.apache.hive.service.rpc.thrift.** - - - - com.facebook.fb303 - ${kyuubi.shade.packageName}.com.facebook.fb303 - - com.facebook.fb303.** - - - - org.apache.thrift - ${kyuubi.shade.packageName}.org.apache.thrift - - org.apache.thrift.** - - diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/FlinkSQLEngine.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/FlinkSQLEngine.scala index 8838799bc24..dff9aa6025b 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/FlinkSQLEngine.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/FlinkSQLEngine.scala @@ -32,6 +32,7 @@ import org.apache.flink.table.gateway.service.context.DefaultContext import org.apache.kyuubi.{Logging, Utils} import org.apache.kyuubi.Utils.{addShutdownHook, currentUser, FLINK_ENGINE_SHUTDOWN_PRIORITY} import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.config.KyuubiConf.ENGINE_FLINK_INITIALIZE_SQL import org.apache.kyuubi.config.KyuubiReservedKeys.{KYUUBI_ENGINE_NAME, KYUUBI_SESSION_USER_KEY} import org.apache.kyuubi.engine.flink.FlinkSQLEngine.{countDownLatch, currentEngine} import org.apache.kyuubi.service.Serverable @@ -102,9 +103,7 @@ object FlinkSQLEngine extends Logging { startEngine(engineContext) info("Flink engine started") - if ("yarn-application".equalsIgnoreCase(executionTarget)) { - bootstrapFlinkApplicationExecutor() - } + bootstrap(executionTarget) // blocking main thread countDownLatch.await() @@ -129,15 +128,22 @@ object FlinkSQLEngine extends Logging { } } - private def bootstrapFlinkApplicationExecutor() = { - // trigger an execution to initiate EmbeddedExecutor with the default flink conf + private def bootstrap(executionTarget: String) = { val flinkConf = new Configuration() - flinkConf.set(PipelineOptions.NAME, "kyuubi-bootstrap-sql") - debug(s"Running bootstrap Flink SQL in application mode with flink conf: $flinkConf.") val tableEnv = TableEnvironment.create(flinkConf) - val res = tableEnv.executeSql("select 'kyuubi'") - res.await() - info("Bootstrap Flink SQL finished.") + + if ("yarn-application".equalsIgnoreCase(executionTarget)) { + // trigger an execution to initiate EmbeddedExecutor with the default flink conf + flinkConf.set(PipelineOptions.NAME, "kyuubi-bootstrap-sql") + debug(s"Running bootstrap Flink SQL in application mode with flink conf: $flinkConf.") + tableEnv.executeSql("select 'kyuubi'").await() + } + + kyuubiConf.get(ENGINE_FLINK_INITIALIZE_SQL).foreach { stmt => + tableEnv.executeSql(stmt).await() + } + + info("Bootstrap SQL finished.") } private def setDeploymentConf(executionTarget: String, flinkConf: Configuration): Unit = { diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/ExecuteStatement.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/ExecuteStatement.scala index 0e0c476e2d4..f30b6ab8627 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/ExecuteStatement.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/ExecuteStatement.scala @@ -17,10 +17,15 @@ package org.apache.kyuubi.engine.flink.operation +import java.util.Optional + import scala.concurrent.duration.Duration import org.apache.flink.api.common.JobID +import org.apache.flink.table.api.TableException import org.apache.flink.table.gateway.api.operation.OperationHandle +import org.apache.flink.table.operations.Operation +import org.apache.flink.table.operations.command.HelpOperation import org.apache.kyuubi.Logging import org.apache.kyuubi.engine.flink.FlinkEngineUtils @@ -28,6 +33,7 @@ import org.apache.kyuubi.engine.flink.result.ResultSetUtil import org.apache.kyuubi.operation.OperationState import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session +import org.apache.kyuubi.util.reflect.{DynConstructors, DynFields, DynMethods} class ExecuteStatement( session: Session, @@ -59,6 +65,14 @@ class ExecuteStatement( private def executeStatement(): Unit = { try { setState(OperationState.RUNNING) + + val operation = parseExtendedStatement(statement) + if (operation.isPresent && operation.get().isInstanceOf[HelpOperation]) { + resultSet = ResultSetUtil.helpMessageResultSet + setState(OperationState.FINISHED) + return + } + val resultFetcher = executor.executeStatement( new OperationHandle(getHandle.identifier), statement) @@ -71,4 +85,36 @@ class ExecuteStatement( shutdownTimeoutMonitor() } } + + private def parseExtendedStatement(statement: String): Optional[Operation] = { + val plannerModuleClassLoader: ClassLoader = getPlannerModuleClassLoader + val extendedParser: AnyRef = + DynConstructors.builder() + .loader(plannerModuleClassLoader) + .impl("org.apache.flink.table.planner.parse.ExtendedParser") + .build().newInstance() + DynMethods.builder("parse") + .hiddenImpl(extendedParser.getClass, classOf[String]) + .buildChecked() + .invoke(extendedParser, statement) + } + + private def getPlannerModuleClassLoader: ClassLoader = { + try { + val plannerModule = DynMethods.builder("getInstance") + .hiddenImpl("org.apache.flink.table.planner.loader.PlannerModule") + .buildStaticChecked() + .invoke().asInstanceOf[AnyRef] + + DynFields.builder() + .hiddenImpl(plannerModule.getClass, "submoduleClassLoader") + .build[ClassLoader].bind(plannerModule).get + } catch { + case e: Exception => + throw new TableException( + "Error obtaining Flink planner module ClassLoader. " + + "Make sure a flink-table-planner-loader.jar is on the classpath", + e) + } + } } diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/FlinkOperation.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/FlinkOperation.scala index 1424b721c4b..df067a888c6 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/FlinkOperation.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/FlinkOperation.scala @@ -28,16 +28,16 @@ import org.apache.flink.configuration.Configuration import org.apache.flink.table.gateway.service.context.SessionContext import org.apache.flink.table.gateway.service.operation.OperationExecutor import org.apache.flink.types.Row -import org.apache.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp, TTableSchema} import org.apache.kyuubi.{KyuubiSQLException, Utils} import org.apache.kyuubi.engine.flink.result.ResultSet -import org.apache.kyuubi.engine.flink.schema.RowSet +import org.apache.kyuubi.engine.flink.schema.{FlinkTRowSetGenerator, RowSet} import org.apache.kyuubi.engine.flink.session.FlinkSessionImpl import org.apache.kyuubi.operation.{AbstractOperation, OperationState} import org.apache.kyuubi.operation.FetchOrientation.{FETCH_FIRST, FETCH_NEXT, FETCH_PRIOR, FetchOrientation} import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp, TTableSchema} abstract class FlinkOperation(session: Session) extends AbstractOperation(session) { @@ -133,10 +133,9 @@ abstract class FlinkOperation(session: Session) extends AbstractOperation(sessio case Some(tz) => ZoneId.of(tz) case None => ZoneId.systemDefault() } - val resultRowSet = RowSet.resultSetToTRowSet( + val resultRowSet = new FlinkTRowSetGenerator(zoneId).toTRowSet( batch.toList, resultSet, - zoneId, getProtocolVersion) val resp = new TFetchResultsResp(OK_STATUS) resp.setResults(resultRowSet) diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/FlinkSQLOperationManager.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/FlinkSQLOperationManager.scala index d5c0629eedd..324efb6585c 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/FlinkSQLOperationManager.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/operation/FlinkSQLOperationManager.scala @@ -37,6 +37,9 @@ class FlinkSQLOperationManager extends OperationManager("FlinkSQLOperationManage private lazy val resultMaxRowsDefault = getConf.get(ENGINE_FLINK_MAX_ROWS) + private lazy val resultFetchTimeoutDefault = getConf.get(ENGINE_FLINK_FETCH_TIMEOUT) + .map(_ milliseconds).getOrElse(Duration.Inf) + private lazy val operationConvertCatalogDatabaseDefault = getConf.get(ENGINE_OPERATION_CONVERT_CATALOG_DATABASE_ENABLED) @@ -70,8 +73,11 @@ class FlinkSQLOperationManager extends OperationManager("FlinkSQLOperationManage resultMaxRowsDefault.toString).toInt val resultFetchTimeout = - flinkSession.normalizedConf.get(ENGINE_FLINK_FETCH_TIMEOUT.key).map(_.toLong milliseconds) - .getOrElse(Duration.Inf) + flinkSession.normalizedConf + .get(ENGINE_FLINK_FETCH_TIMEOUT.key) + .map(ENGINE_FLINK_FETCH_TIMEOUT.valueConverter) + .map(_.get milliseconds) + .getOrElse(resultFetchTimeoutDefault) val op = mode match { case NoneMode => diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/CommandStrings.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/CommandStrings.scala new file mode 100644 index 00000000000..56a199fa697 --- /dev/null +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/CommandStrings.scala @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.flink.result + +import scala.collection.mutable.ListBuffer + +import org.apache.flink.util.Preconditions +import org.jline.utils.{AttributedString, AttributedStringBuilder, AttributedStyle} + +/** + * Utility class that contains all strings for Flink SQL commands and messages. + */ +object CommandStrings { + private val CMD_DESC_DELIMITER = "\t\t" + + private class SQLCommandsDescriptions { + private var commandMaxLength = -1 + private val commandsDescriptionList = ListBuffer[(String, String)]() + + def commandDescription(command: String, description: String): SQLCommandsDescriptions = { + Preconditions.checkState( + command.nonEmpty, + s"content of command must not be empty.", + Seq(): _*) + Preconditions.checkState( + description.nonEmpty, + s"content of command's description must not be empty.", + Seq(): _*) + + updateMaxCommandLength(command.length) + commandsDescriptionList += ((command, description)) + this + } + + private def updateMaxCommandLength(newLength: Int): Unit = { + Preconditions.checkState(newLength > 0) + if (commandMaxLength < newLength) { + commandMaxLength = newLength + } + } + + private def formatDescription(input: String): String = { + val maxLineLength = 160 + val newLinePrefix = " " * commandMaxLength + CMD_DESC_DELIMITER + val words = input.split(" ") + + val (lastLine, lines) = words.foldLeft(("", List[String]())) { + case ((line, lines), word) => + val newLine = if (line.isEmpty) word else line + " " + word + if (newLine.length > maxLineLength) (word, lines :+ line) else (newLine, lines) + } + + (lines :+ lastLine).mkString("\n" + newLinePrefix) + } + + def build(): AttributedString = { + val attributedStringBuilder = new AttributedStringBuilder + if (commandsDescriptionList.nonEmpty) { + commandsDescriptionList.foreach { + case (cmd, cmdDesc) => + attributedStringBuilder + .style(AttributedStyle.DEFAULT.bold()) + .append(cmd.padTo(commandMaxLength, " ").mkString) + .append(CMD_DESC_DELIMITER) + .style(AttributedStyle.DEFAULT) + .append(formatDescription(cmdDesc)) + .append('\n') + } + } + attributedStringBuilder.toAttributedString + } + } + + // scalastyle:off line.size.limit + val MESSAGE_HELP: AttributedString = + new AttributedStringBuilder() + .append("The following commands are available:\n\n") + .append(COMMANDS_DESCRIPTIONS) + .style(AttributedStyle.DEFAULT.underline()) + .append("\nHint") + .style(AttributedStyle.DEFAULT) + .append( + ": Make sure that a statement ends with \";\" for finalizing (multi-line) statements.") + // About Documentation Link. + .style(AttributedStyle.DEFAULT) + .append( + "\nThe above list includes only the most frequently used statements.\nYou can also type any Flink SQL statement, please visit https://nightlies.apache.org/flink/flink-docs-stable/docs/dev/table/sql/overview/ for more details.") + .toAttributedString + + def COMMANDS_DESCRIPTIONS: AttributedString = + new SQLCommandsDescriptions() + .commandDescription( + "HELP", + "Prints the available commands or the detailed description of a specified command.") + .commandDescription( + "SET", + "Sets a session configuration property. Syntax: \"SET ''='';\". Use \"SET;\" for listing all properties.") + .commandDescription( + "RESET", + "Resets a session configuration property. Syntax: \"RESET '';\". Use \"RESET;\" for reset all session properties.") + .commandDescription( + "INSERT INTO", + "Inserts the results of a SQL SELECT query into a declared table sink.") + .commandDescription( + "INSERT OVERWRITE", + "Inserts the results of a SQL SELECT query into a declared table sink and overwrite existing data.") + .commandDescription( + "SELECT", + "Executes a SQL SELECT query on the Flink cluster.") + .commandDescription( + "EXPLAIN", + "Describes the execution plan of a query or table with the given name.") + .commandDescription( + "BEGIN STATEMENT SET", + "Begins a statement set. Syntax: \"BEGIN STATEMENT SET;\"") + .commandDescription("END", "Ends a statement set. Syntax: \"END;\"") + .commandDescription( + "ADD JAR", + "Adds the specified jar file to the submitted jobs' classloader. Syntax: \"ADD JAR '.jar'\"") + .commandDescription( + "SHOW JARS", + "Shows the list of user-specified jar dependencies. This list is impacted by the ADD JAR commands.") + .commandDescription( + "SHOW CATALOGS", + "Shows the list of registered catalogs.") + .commandDescription( + "SHOW CURRENT CATALOG", + "Shows the name of the current catalog.") + .commandDescription( + "SHOW DATABASES", + "Shows all databases in the current catalog.") + .commandDescription( + "SHOW CURRENT DATABASE", + "Shows the name of the current database.") + .commandDescription( + "SHOW TABLES", + "Shows all tables for an optionally specified database. Syntax: \"SHOW TABLES [ ( FROM | IN ) [catalog_name.]database_name ] [ [NOT] LIKE ]\"") + .commandDescription( + "SHOW CREATE TABLE", + "Shows the CREATE TABLE statement that creates the specified table.") + .commandDescription( + "SHOW COLUMNS", + "Shows all columns of a table with the given name. Syntax: \"SHOW COLUMNS ( FROM | IN ) [[catalog_name.]database.] [ [NOT] LIKE ]\"") + .commandDescription( + "SHOW VIEWS", + "Shows all views in the current catalog and the current database.") + .commandDescription( + "SHOW CREATE VIEW", + "Shows the CREATE VIEW statement that creates the specified view. Syntax: \"SHOW CREATE VIEW [catalog_name.][db_name.]view_name\"") + .commandDescription( + "SHOW FUNCTIONS", + "Shows all user-defined and built-in functions in the current catalog and current database. Use \"SHOW USER FUNCTIONS\" for listing all user-defined functions in the current catalog and current database.") + .commandDescription( + "SHOW MODULES", + "Shows all enabled module names with resolution order.") + .commandDescription( + "USE CATALOG", + "Sets the current catalog. All subsequent commands that do not explicitly specify a catalog will use this one. If the provided catalog does not exist, an exception is thrown. The default current catalog is default_catalog. Syntax: \"USE CATALOG catalog_name\"") + .commandDescription( + "USE", + "Sets the current database. All subsequent commands that do not explicitly specify a database will use this one. If the provided database does not exist, an exception is thrown. The default current database is default_database. Syntax: \"USE [catalog_name.]database_name\"") + .commandDescription( + "DESC", + "Describes the schema of a table with the given name. Syntax: \"{ DESCRIBE | DESC } [catalog_name.][db_name.]table_name\"") + .commandDescription( + "ANALYZE", + "ANALYZE statements are used to collect statistics for existing tables and store the result to catalog. Only supports in batch mode. Syntax: \"ANALYZE TABLE [catalog_name.][db_name.]table_name PARTITION(partcol1[=val1] [, partcol2[=val2], ...]) COMPUTE STATISTICS [FOR COLUMNS col1 [, col2, ...] | FOR ALL COLUMNS]\"") + .commandDescription( + "ALTER TABLE", + "Renames a table or change a table's properties. Syntax: \"ALTER TABLE [catalog_name.][db_name.]table_name RENAME TO new_table_name\", the other syntax: \"ALTER TABLE [catalog_name.][db_name.]table_name SET ( key1=val1[, key2=val2, ...] )\"") + .commandDescription( + "ALTER VIEW", + "Renames a given view to a new name within the same catalog and database. Syntax: \"ALTER VIEW [catalog_name.][db_name.]view_name RENAME TO new_view_name\"") + .commandDescription( + "ALTER DATABASE", + "Changes a database's properties. Syntax: \"ALTER DATABASE [catalog_name.]db_name SET ( key1=val1[, key2=val2, ...] )\"") + .commandDescription( + "ALTER FUNCTION", + "Changes a catalog function with the new identifier and optional language tag. Syntax: \"ALTER [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF EXISTS] [catalog_name.][db_name.]function_name AS identifier [LANGUAGE JAVA|SCALA|PYTHON]\"") + .commandDescription( + "DROP CATALOG", + "Drops a catalog with the given catalog name. Syntax: \"DROP CATALOG [IF EXISTS] catalog_name\"") + .commandDescription( + "DROP DATABASE", + "Drops a database with the given database name. Syntax: \"DROP DATABASE [IF EXISTS] [catalog_name.]db_name [ (RESTRICT | CASCADE) ]\"") + .commandDescription( + "DROP TABLE", + "Drops a table with the given table name. Syntax: \"DROP [TEMPORARY] TABLE [IF EXISTS] [catalog_name.][db_name.]table_name\"") + .commandDescription( + "DROP VIEW", + "Drops a view with the given view name. Syntax: \"DROP [TEMPORARY] VIEW [IF EXISTS] [catalog_name.][db_name.]view_name\"") + .commandDescription( + "DROP FUNCTION", + "Drops a catalog function with the given function name. Syntax: \"DROP [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF EXISTS] [catalog_name.][db_name.]function_name\"") + .commandDescription( + "CREATE CATALOG", + "Creates a catalog with the given catalog properties. Syntax: \"CREATE CATALOG catalog_name WITH ( 'key1'='value1'[, 'key2'='value2', ...] )\"") + .commandDescription( + "CREATE DATABASE", + "Creates a database with the given database properties. Syntax: \"CREATE DATABASE [IF NOT EXISTS] [catalog_name.]db_name [COMMENT 'database_comment'] [WITH ( 'key1'='value1'[, 'key2'='value2', ...] )]\"") + .commandDescription( + "CREATE TABLE", + "Creates a table with the given table properties. Syntax: \"CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [catalog_name.][db_name.]table_name ( { col_name data_type [COMMENT col_comment] [column_constraint] | table_constraint } [,...] ) [COMMENT table_comment] [PARTITIONED BY (col_name, col_name, ...)] [WITH ( 'key1'='value1'[, 'key2'='value2', ...] )] \"") + .commandDescription( + "CREATE VIEW", + "Creates a view with the given view expression. Syntax: \"CREATE [TEMPORARY] VIEW [IF NOT EXISTS] [catalog_name.][db_name.]view_name [(column_name [,...])] [COMMENT view_comment] AS query_expression\"") + .commandDescription( + "CREATE FUNCTION", + "Creates a catalog function with the given function properties. Syntax: \"CREATE [TEMPORARY|TEMPORARY SYSTEM] FUNCTION [IF NOT EXISTS] [catalog_name.][db_name.]function_name AS identifier [LANGUAGE JAVA|SCALA|PYTHON] [USING JAR '.jar' [, JAR '.jar']* ]\"") + .commandDescription( + "SHOW JOBS", + "Show the jobs in the Flink cluster. Supports in version 1.17 and later.") + .commandDescription( + "STOP JOB", + "Stop the job with the given job ID. Supports in version 1.17 and later. Syntax: \"STOP JOB '' [WITH SAVEPOINT] [WITH DRAIN]\"") + .commandDescription( + "UPDATE", + "Performs row-level updating on the target table. Only supports in batch mode. Supports in version 1.17 and later. Syntax: \"UPDATE [catalog_name.][db_name.]table_name SET col_name1 = col_val1 [, col_name2 = col_val2 ...] [WHERE condition]\"") + .commandDescription( + "DELETE", + "Performs row-level deleting on the target table. Only supports in batch mode. Supports in version 1.17 and later. Syntax: \"DELETE FROM [catalog_name.][db_name.]table_name [WHERE condition]\"") + .commandDescription( + "TRUNCATE TABLE", + "Truncates the target table. Only supports in batch mode. Supports in version 1.18 and later. Syntax: \"TRUNCATE TABLE [catalog_name.][db_name.]table_name\"") + .commandDescription( + "CALL", + "Calls a stored procedure. Supports in version 1.18 and later. Syntax: \"CALL [catalog_name.][database_name.]procedure_name ([ expression [, expression]* ] )\"") + .build() + // scalastyle:on +} diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/QueryResultFetchIterator.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/IncrementalResultFetchIterator.scala similarity index 88% rename from externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/QueryResultFetchIterator.scala rename to externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/IncrementalResultFetchIterator.scala index 60ae08d9dd8..60c92d9afdf 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/QueryResultFetchIterator.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/IncrementalResultFetchIterator.scala @@ -34,10 +34,12 @@ import org.apache.flink.table.types.DataType import org.apache.flink.types.Row import org.apache.kyuubi.Logging +import org.apache.kyuubi.engine.flink.FlinkEngineUtils import org.apache.kyuubi.engine.flink.shim.FlinkResultSet import org.apache.kyuubi.operation.FetchIterator +import org.apache.kyuubi.util.reflect.DynFields -class QueryResultFetchIterator( +class IncrementalResultFetchIterator( resultFetcher: ResultFetcher, maxRows: Int = 1000000, resultFetchTimeout: Duration = Duration.Inf) extends FetchIterator[Row] with Logging { @@ -58,8 +60,17 @@ class QueryResultFetchIterator( val FETCH_INTERVAL_MS: Long = 1000 + // for Flink 1.16 and below, isQueryResult is not supported + val isQueryResult: Boolean = + FlinkEngineUtils.FLINK_RUNTIME_VERSION < "1.17" || + DynFields.builder + .hiddenImpl(classOf[ResultFetcher], "isQueryResult") + .build[Boolean](resultFetcher).get() + + val effectiveMaxRows: Int = if (isQueryResult) maxRows else Int.MaxValue + private val executor = Executors.newSingleThreadScheduledExecutor( - new ThreadFactoryBuilder().setNameFormat("flink-query-iterator-%d").setDaemon(true).build) + new ThreadFactoryBuilder().setNameFormat("flink-result-iterator-%d").setDaemon(true).build) implicit private val executionContext: ExecutionContextExecutor = ExecutionContext.fromExecutor(executor) @@ -78,7 +89,7 @@ class QueryResultFetchIterator( // if no timeout is set, this would block until some rows are fetched debug(s"Fetching from result store with timeout $resultFetchTimeout ms") while (!fetched && !Thread.interrupted()) { - val rs = resultFetcher.fetchResults(token, maxRows - bufferedRows.length) + val rs = resultFetcher.fetchResults(token, effectiveMaxRows - bufferedRows.length) val flinkRs = new FlinkResultSet(rs) // TODO: replace string-based match when Flink 1.16 support is dropped flinkRs.getResultType.name() match { @@ -144,7 +155,7 @@ class QueryResultFetchIterator( debug(s"Fetching from buffered rows at pos $pos.") val row = bufferedRows(pos.toInt) pos += 1 - if (pos >= maxRows) { + if (pos >= effectiveMaxRows) { hasNext = false } row @@ -154,7 +165,7 @@ class QueryResultFetchIterator( if (hasNext) { val row = bufferedRows(pos.toInt) pos += 1 - if (pos >= maxRows) { + if (pos >= effectiveMaxRows) { hasNext = false } row diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/ResultSet.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/ResultSet.scala index b8d407297ac..f9d3de0ab97 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/ResultSet.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/ResultSet.scala @@ -53,7 +53,7 @@ case class ResultSet( def close: Unit = { data match { - case queryIte: QueryResultFetchIterator => queryIte.close() + case incIte: IncrementalResultFetchIterator => incIte.close() case _ => } } diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/ResultSetUtil.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/ResultSetUtil.scala index 8b722f1e5e9..032c86ac13f 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/ResultSetUtil.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/result/ResultSetUtil.scala @@ -58,6 +58,13 @@ object ResultSetUtil { .data(Array[Row](Row.of("OK"))) .build + def helpMessageResultSet: ResultSet = + ResultSet.builder + .resultKind(ResultKind.SUCCESS_WITH_CONTENT) + .columns(Column.physical("result", DataTypes.STRING)) + .data(Array[Row](Row.of(CommandStrings.MESSAGE_HELP.toString))) + .build + def fromResultFetcher( resultFetcher: ResultFetcher, maxRows: Int, @@ -66,7 +73,7 @@ object ResultSetUtil { throw new IllegalArgumentException("maxRows should be positive") } val schema = resultFetcher.getResultSchema - val ite = new QueryResultFetchIterator(resultFetcher, maxRows, resultFetchTimeout) + val ite = new IncrementalResultFetchIterator(resultFetcher, maxRows, resultFetchTimeout) ResultSet.builder .resultKind(ResultKind.SUCCESS_WITH_CONTENT) .columns(schema.getColumns) diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/schema/FlinkTRowSetGenerator.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/schema/FlinkTRowSetGenerator.scala new file mode 100644 index 00000000000..463b66111a5 --- /dev/null +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/schema/FlinkTRowSetGenerator.scala @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.flink.schema + +import java.time.{Instant, ZonedDateTime, ZoneId} + +import org.apache.flink.table.data.StringData +import org.apache.flink.table.types.logical._ +import org.apache.flink.types.Row + +import org.apache.kyuubi.engine.flink.result.ResultSet +import org.apache.kyuubi.engine.flink.schema.RowSet.{toHiveString, TIMESTAMP_LZT_FORMATTER} +import org.apache.kyuubi.engine.result.TRowSetGenerator +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ + +class FlinkTRowSetGenerator(zoneId: ZoneId) + extends TRowSetGenerator[ResultSet, Row, LogicalType] { + override def getColumnSizeFromSchemaType(schema: ResultSet): Int = schema.columns.size + + override def getColumnType(schema: ResultSet, ordinal: Int): LogicalType = + schema.columns.get(ordinal).getDataType.getLogicalType + + override def isColumnNullAt(row: Row, ordinal: Int): Boolean = row.getField(ordinal) == null + + override def getColumnAs[T](row: Row, ordinal: Int): T = row.getFieldAs[T](ordinal) + + override def toTColumnValue(row: Row, ordinal: Int, types: ResultSet): TColumnValue = { + getColumnType(types, ordinal) match { + case _: BooleanType => asBooleanTColumnValue(row, ordinal) + case _: TinyIntType => asByteTColumnValue(row, ordinal) + case _: SmallIntType => asShortTColumnValue(row, ordinal) + case _: IntType => asIntegerTColumnValue(row, ordinal) + case _: BigIntType => asLongTColumnValue(row, ordinal) + case _: DoubleType => asDoubleTColumnValue(row, ordinal) + case _: FloatType => asFloatTColumnValue(row, ordinal) + case t @ (_: VarCharType | _: CharType) => + asStringTColumnValue( + row, + ordinal, + convertFunc = { + case value: String => value + case value: StringData => value.toString + case null => null + case other => throw new IllegalArgumentException( + s"Unsupported conversion class ${other.getClass} for type ${t.getClass}.") + }) + case _: LocalZonedTimestampType => + asStringTColumnValue( + row, + ordinal, + rawValue => + TIMESTAMP_LZT_FORMATTER.format( + ZonedDateTime.ofInstant(rawValue.asInstanceOf[Instant], zoneId))) + case t => asStringTColumnValue(row, ordinal, rawValue => toHiveString((rawValue, t))) + } + } + + override def toTColumn(rows: Seq[Row], ordinal: Int, logicalType: LogicalType): TColumn = { + // for each column, determine the conversion class by sampling the first non-value value + // if there's no row, set the entire column empty + logicalType match { + case _: BooleanType => asBooleanTColumn(rows, ordinal) + case _: TinyIntType => asByteTColumn(rows, ordinal) + case _: SmallIntType => asShortTColumn(rows, ordinal) + case _: IntType => asIntegerTColumn(rows, ordinal) + case _: BigIntType => asLongTColumn(rows, ordinal) + case _: FloatType => asFloatTColumn(rows, ordinal) + case _: DoubleType => asDoubleTColumn(rows, ordinal) + case t @ (_: VarCharType | _: CharType) => + val sampleField = rows.iterator.map(_.getField(ordinal)).find(_ ne null).orNull + sampleField match { + case _: String => asStringTColumn(rows, ordinal) + case _: StringData => + asStringTColumn( + rows, + ordinal, + convertFunc = (row, ordinal) => getColumnAs[StringData](row, ordinal).toString) + case null => asStringTColumn(rows, ordinal) + case other => throw new IllegalArgumentException( + s"Unsupported conversion class ${other.getClass} for type ${t.getClass}.") + } + case _: LocalZonedTimestampType => + asStringTColumn( + rows, + ordinal, + TIMESTAMP_LZT_FORMATTER.format(ZonedDateTime.ofInstant(Instant.EPOCH, zoneId)), + (row, ordinal) => + TIMESTAMP_LZT_FORMATTER.format( + ZonedDateTime.ofInstant(getColumnAs[Instant](row, ordinal), zoneId))) + case _ => + asStringTColumn( + rows, + ordinal, + convertFunc = (row, ordinal) => toHiveString((row.getField(ordinal), logicalType))) + } + } + +} diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/schema/RowSet.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/schema/RowSet.scala index c446396d5bb..7015d7c52b6 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/schema/RowSet.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/schema/RowSet.scala @@ -17,262 +17,25 @@ package org.apache.kyuubi.engine.flink.schema -import java.{lang, util} -import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import java.sql.{Date, Timestamp} -import java.time.{Instant, LocalDate, LocalDateTime, ZonedDateTime, ZoneId} +import java.time.{LocalDate, LocalDateTime} import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, TextStyle} import java.time.temporal.ChronoField import java.util.Collections import scala.collection.JavaConverters._ import scala.collection.mutable.ListBuffer -import scala.language.implicitConversions import org.apache.flink.table.catalog.Column -import org.apache.flink.table.data.StringData import org.apache.flink.table.types.logical._ import org.apache.flink.types.Row -import org.apache.hive.service.rpc.thrift._ -import org.apache.kyuubi.engine.flink.result.ResultSet +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ import org.apache.kyuubi.util.RowSetUtils._ object RowSet { - def resultSetToTRowSet( - rows: Seq[Row], - resultSet: ResultSet, - zoneId: ZoneId, - protocolVersion: TProtocolVersion): TRowSet = { - if (protocolVersion.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) { - toRowBaseSet(rows, resultSet, zoneId) - } else { - toColumnBasedSet(rows, resultSet, zoneId) - } - } - - def toRowBaseSet(rows: Seq[Row], resultSet: ResultSet, zoneId: ZoneId): TRowSet = { - val rowSize = rows.size - val tRows = new util.ArrayList[TRow](rowSize) - var i = 0 - while (i < rowSize) { - val row = rows(i) - val tRow = new TRow() - val columnSize = row.getArity - var j = 0 - while (j < columnSize) { - val columnValue = toTColumnValue(j, row, resultSet, zoneId) - tRow.addToColVals(columnValue) - j += 1 - } - tRows.add(tRow) - i += 1 - } - - new TRowSet(0, tRows) - } - - def toColumnBasedSet(rows: Seq[Row], resultSet: ResultSet, zoneId: ZoneId): TRowSet = { - val size = rows.length - val tRowSet = new TRowSet(0, new util.ArrayList[TRow](size)) - val columnSize = resultSet.getColumns.size() - var i = 0 - while (i < columnSize) { - val field = resultSet.getColumns.get(i) - val tColumn = toTColumn(rows, i, field.getDataType.getLogicalType, zoneId) - tRowSet.addToColumns(tColumn) - i += 1 - } - tRowSet - } - - private def toTColumnValue( - ordinal: Int, - row: Row, - resultSet: ResultSet, - zoneId: ZoneId): TColumnValue = { - - val column = resultSet.getColumns.get(ordinal) - val logicalType = column.getDataType.getLogicalType - - logicalType match { - case _: BooleanType => - val boolValue = new TBoolValue - if (row.getField(ordinal) != null) { - boolValue.setValue(row.getField(ordinal).asInstanceOf[Boolean]) - } - TColumnValue.boolVal(boolValue) - case _: TinyIntType => - val tByteValue = new TByteValue - if (row.getField(ordinal) != null) { - tByteValue.setValue(row.getField(ordinal).asInstanceOf[Byte]) - } - TColumnValue.byteVal(tByteValue) - case _: SmallIntType => - val tI16Value = new TI16Value - if (row.getField(ordinal) != null) { - tI16Value.setValue(row.getField(ordinal).asInstanceOf[Short]) - } - TColumnValue.i16Val(tI16Value) - case _: IntType => - val tI32Value = new TI32Value - if (row.getField(ordinal) != null) { - tI32Value.setValue(row.getField(ordinal).asInstanceOf[Int]) - } - TColumnValue.i32Val(tI32Value) - case _: BigIntType => - val tI64Value = new TI64Value - if (row.getField(ordinal) != null) { - tI64Value.setValue(row.getField(ordinal).asInstanceOf[Long]) - } - TColumnValue.i64Val(tI64Value) - case _: FloatType => - val tDoubleValue = new TDoubleValue - if (row.getField(ordinal) != null) { - val doubleValue = lang.Double.valueOf(row.getField(ordinal).asInstanceOf[Float].toString) - tDoubleValue.setValue(doubleValue) - } - TColumnValue.doubleVal(tDoubleValue) - case _: DoubleType => - val tDoubleValue = new TDoubleValue - if (row.getField(ordinal) != null) { - tDoubleValue.setValue(row.getField(ordinal).asInstanceOf[Double]) - } - TColumnValue.doubleVal(tDoubleValue) - case t @ (_: VarCharType | _: CharType) => - val tStringValue = new TStringValue - val fieldValue = row.getField(ordinal) - fieldValue match { - case value: String => - tStringValue.setValue(value) - case value: StringData => - tStringValue.setValue(value.toString) - case null => - tStringValue.setValue(null) - case other => - throw new IllegalArgumentException( - s"Unsupported conversion class ${other.getClass} " + - s"for type ${t.getClass}.") - } - TColumnValue.stringVal(tStringValue) - case _: LocalZonedTimestampType => - val tStringValue = new TStringValue - val fieldValue = row.getField(ordinal) - tStringValue.setValue(TIMESTAMP_LZT_FORMATTER.format( - ZonedDateTime.ofInstant(fieldValue.asInstanceOf[Instant], zoneId))) - TColumnValue.stringVal(tStringValue) - case t => - val tStringValue = new TStringValue - if (row.getField(ordinal) != null) { - tStringValue.setValue(toHiveString((row.getField(ordinal), t))) - } - TColumnValue.stringVal(tStringValue) - } - } - - implicit private def bitSetToBuffer(bitSet: java.util.BitSet): ByteBuffer = { - ByteBuffer.wrap(bitSet.toByteArray) - } - - private def toTColumn( - rows: Seq[Row], - ordinal: Int, - logicalType: LogicalType, - zoneId: ZoneId): TColumn = { - val nulls = new java.util.BitSet() - // for each column, determine the conversion class by sampling the first non-value value - // if there's no row, set the entire column empty - val sampleField = rows.iterator.map(_.getField(ordinal)).find(_ ne null).orNull - logicalType match { - case _: BooleanType => - val values = getOrSetAsNull[lang.Boolean](rows, ordinal, nulls, true) - TColumn.boolVal(new TBoolColumn(values, nulls)) - case _: TinyIntType => - val values = getOrSetAsNull[lang.Byte](rows, ordinal, nulls, 0.toByte) - TColumn.byteVal(new TByteColumn(values, nulls)) - case _: SmallIntType => - val values = getOrSetAsNull[lang.Short](rows, ordinal, nulls, 0.toShort) - TColumn.i16Val(new TI16Column(values, nulls)) - case _: IntType => - val values = getOrSetAsNull[lang.Integer](rows, ordinal, nulls, 0) - TColumn.i32Val(new TI32Column(values, nulls)) - case _: BigIntType => - val values = getOrSetAsNull[lang.Long](rows, ordinal, nulls, 0L) - TColumn.i64Val(new TI64Column(values, nulls)) - case _: FloatType => - val values = getOrSetAsNull[lang.Float](rows, ordinal, nulls, 0.0f) - .asScala.map(n => lang.Double.valueOf(n.toString)).asJava - TColumn.doubleVal(new TDoubleColumn(values, nulls)) - case _: DoubleType => - val values = getOrSetAsNull[lang.Double](rows, ordinal, nulls, 0.0) - TColumn.doubleVal(new TDoubleColumn(values, nulls)) - case t @ (_: VarCharType | _: CharType) => - val values: util.List[String] = new util.ArrayList[String](0) - sampleField match { - case _: String => - values.addAll(getOrSetAsNull[String](rows, ordinal, nulls, "")) - case _: StringData => - val stringDataValues = - getOrSetAsNull[StringData](rows, ordinal, nulls, StringData.fromString("")) - stringDataValues.forEach(e => values.add(e.toString)) - case null => - values.addAll(getOrSetAsNull[String](rows, ordinal, nulls, "")) - case other => - throw new IllegalArgumentException( - s"Unsupported conversion class ${other.getClass} " + - s"for type ${t.getClass}.") - } - TColumn.stringVal(new TStringColumn(values, nulls)) - case _: LocalZonedTimestampType => - val values = getOrSetAsNull[Instant](rows, ordinal, nulls, Instant.EPOCH) - .toArray().map(v => - TIMESTAMP_LZT_FORMATTER.format( - ZonedDateTime.ofInstant(v.asInstanceOf[Instant], zoneId))) - TColumn.stringVal(new TStringColumn(values.toList.asJava, nulls)) - case _ => - var i = 0 - val rowSize = rows.length - val values = new java.util.ArrayList[String](rowSize) - while (i < rowSize) { - val row = rows(i) - nulls.set(i, row.getField(ordinal) == null) - val value = - if (row.getField(ordinal) == null) { - "" - } else { - toHiveString((row.getField(ordinal), logicalType)) - } - values.add(value) - i += 1 - } - TColumn.stringVal(new TStringColumn(values, nulls)) - } - } - - private def getOrSetAsNull[T]( - rows: Seq[Row], - ordinal: Int, - nulls: java.util.BitSet, - defaultVal: T): java.util.List[T] = { - val size = rows.length - val ret = new java.util.ArrayList[T](size) - var idx = 0 - while (idx < size) { - val row = rows(idx) - val isNull = row.getField(ordinal) == null - if (isNull) { - nulls.set(idx, true) - ret.add(idx, defaultVal) - } else { - ret.add(idx, row.getFieldAs[T](ordinal)) - } - idx += 1 - } - ret - } - def toTColumnDesc(field: Column, pos: Int): TColumnDesc = { val tColumnDesc = new TColumnDesc() tColumnDesc.setColumnName(field.getName) diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSQLSessionManager.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSQLSessionManager.scala index b7cd462172f..8627e5a2475 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSQLSessionManager.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSQLSessionManager.scala @@ -23,12 +23,12 @@ import scala.collection.JavaConverters.mapAsJavaMap import org.apache.flink.table.gateway.api.session.SessionEnvironment import org.apache.flink.table.gateway.rest.util.SqlGatewayRestAPIVersion import org.apache.flink.table.gateway.service.context.DefaultContext -import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.flink.operation.FlinkSQLOperationManager import org.apache.kyuubi.engine.flink.shim.FlinkSessionManager import org.apache.kyuubi.session.{Session, SessionHandle, SessionManager} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion class FlinkSQLSessionManager(engineContext: DefaultContext) extends SessionManager("FlinkSQLSessionManager") { diff --git a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSessionImpl.scala b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSessionImpl.scala index b8d1f85692b..624c3ad9465 100644 --- a/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSessionImpl.scala +++ b/externals/kyuubi-flink-sql-engine/src/main/scala/org/apache/kyuubi/engine/flink/session/FlinkSessionImpl.scala @@ -25,13 +25,14 @@ import org.apache.flink.table.client.gateway.SqlExecutionException import org.apache.flink.table.gateway.api.operation.OperationHandle import org.apache.flink.table.gateway.service.context.SessionContext import org.apache.flink.table.gateway.service.session.{Session => FSession} -import org.apache.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} import org.apache.kyuubi.KyuubiSQLException +import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.flink.FlinkEngineUtils import org.apache.kyuubi.engine.flink.udf.KDFRegistry import org.apache.kyuubi.session.{AbstractSession, SessionHandle, SessionManager, USE_CATALOG, USE_DATABASE} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} class FlinkSessionImpl( protocol: TProtocolVersion, @@ -64,6 +65,15 @@ class FlinkSessionImpl( override def open(): Unit = { val executor = fSession.createExecutor(Configuration.fromMap(fSession.getSessionConfig)) + sessionManager.getConf.get(ENGINE_SESSION_FLINK_INITIALIZE_SQL).foreach { sql => + try { + executor.executeStatement(OperationHandle.create, sql) + } catch { + case NonFatal(e) => + throw KyuubiSQLException(s"execute ${ENGINE_SESSION_FLINK_INITIALIZE_SQL.key} $sql ", e) + } + } + val (useCatalogAndDatabaseConf, otherConf) = normalizedConf.partition { case (k, _) => Array(USE_CATALOG, USE_DATABASE).contains(k) } @@ -99,6 +109,7 @@ class FlinkSessionImpl( case TGetInfoType.CLI_SERVER_NAME | TGetInfoType.CLI_DBMS_NAME => TGetInfoValue.stringValue("Apache Flink") case TGetInfoType.CLI_DBMS_VER => TGetInfoValue.stringValue(EnvironmentInformation.getVersion) + case TGetInfoType.CLI_ODBC_KEYWORDS => TGetInfoValue.stringValue("Unimplemented") case _ => throw KyuubiSQLException(s"Unrecognized GetInfoType value: $infoType") } } diff --git a/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/WithFlinkSQLEngineLocal.scala b/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/WithFlinkSQLEngineLocal.scala index 92c1bcd83fc..1c4adce189d 100644 --- a/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/WithFlinkSQLEngineLocal.scala +++ b/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/WithFlinkSQLEngineLocal.scala @@ -23,7 +23,7 @@ import java.net.URI import java.nio.file.{Files, Paths} import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable import org.apache.flink.configuration.{Configuration, RestOptions} import org.apache.flink.runtime.minicluster.{MiniCluster, MiniClusterConfiguration} @@ -32,6 +32,7 @@ import org.apache.kyuubi.{KYUUBI_VERSION, KyuubiException, KyuubiFunSuite, SCALA import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.ha.HighAvailabilityConf.HA_ADDRESSES +import org.apache.kyuubi.util.command.CommandLineUtils._ import org.apache.kyuubi.zookeeper.EmbeddedZookeeper import org.apache.kyuubi.zookeeper.ZookeeperConf.{ZK_CLIENT_PORT, ZK_CLIENT_PORT_ADDRESS} @@ -45,7 +46,7 @@ trait WithFlinkSQLEngineLocal extends KyuubiFunSuite with WithFlinkTestResources private var zkServer: EmbeddedZookeeper = _ - protected val conf: KyuubiConf = FlinkSQLEngine.kyuubiConf + protected val conf: KyuubiConf = new KyuubiConf(false) protected def engineRefId: String @@ -60,7 +61,6 @@ trait WithFlinkSQLEngineLocal extends KyuubiFunSuite with WithFlinkTestResources } } withKyuubiConf.foreach { case (k, v) => - System.setProperty(k, v) conf.set(k, v) } @@ -112,7 +112,7 @@ trait WithFlinkSQLEngineLocal extends KyuubiFunSuite with WithFlinkTestResources processBuilder.environment().putAll(envs.asJava) conf.set(ENGINE_FLINK_EXTRA_CLASSPATH, udfJar.getAbsolutePath) - val command = new ArrayBuffer[String]() + val command = new mutable.ListBuffer[String]() command += envs("JAVA_EXEC") @@ -123,8 +123,7 @@ trait WithFlinkSQLEngineLocal extends KyuubiFunSuite with WithFlinkTestResources command += javaOptions.get } - command += "-cp" - val classpathEntries = new java.util.LinkedHashSet[String] + val classpathEntries = new mutable.LinkedHashSet[String] // flink engine runtime jar mainResource(envs).foreach(classpathEntries.add) // flink sql jars @@ -164,13 +163,11 @@ trait WithFlinkSQLEngineLocal extends KyuubiFunSuite with WithFlinkTestResources classpathEntries.add(s"$devHadoopJars${File.separator}*") } } - command += classpathEntries.asScala.mkString(File.pathSeparator) + command ++= genClasspathOption(classpathEntries) + command += "org.apache.kyuubi.engine.flink.FlinkSQLEngine" - conf.getAll.foreach { case (k, v) => - command += "--conf" - command += s"$k=$v" - } + command ++= confKeyValues(conf.getAll) processBuilder.command(command.toList.asJava) processBuilder.redirectOutput(Redirect.INHERIT) diff --git a/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/WithFlinkSQLEngineOnYarn.scala b/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/WithFlinkSQLEngineOnYarn.scala index 49fb947a3ec..730a2646bed 100644 --- a/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/WithFlinkSQLEngineOnYarn.scala +++ b/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/WithFlinkSQLEngineOnYarn.scala @@ -34,6 +34,7 @@ import org.apache.kyuubi.{KYUUBI_VERSION, KyuubiFunSuite, SCALA_COMPILE_VERSION, import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.{ENGINE_FLINK_APPLICATION_JARS, KYUUBI_HOME} import org.apache.kyuubi.ha.HighAvailabilityConf.HA_ADDRESSES +import org.apache.kyuubi.util.command.CommandLineUtils._ import org.apache.kyuubi.zookeeper.EmbeddedZookeeper import org.apache.kyuubi.zookeeper.ZookeeperConf.{ZK_CLIENT_PORT, ZK_CLIENT_PORT_ADDRESS} @@ -179,10 +180,7 @@ trait WithFlinkSQLEngineOnYarn extends KyuubiFunSuite with WithFlinkTestResource conf.set(k, v) } - for ((k, v) <- conf.getAll) { - command += "--conf" - command += s"$k=$v" - } + command ++= confKeyValues(conf.getAll) processBuilder.command(command.toList.asJava) processBuilder.redirectOutput(Redirect.INHERIT) diff --git a/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/operation/FlinkEngineInitializeSuite.scala b/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/operation/FlinkEngineInitializeSuite.scala new file mode 100644 index 00000000000..c98d07cc48c --- /dev/null +++ b/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/operation/FlinkEngineInitializeSuite.scala @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.flink.operation + +import java.util.UUID + +import org.apache.kyuubi.config.KyuubiConf._ +import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_USER_KEY +import org.apache.kyuubi.engine.ShareLevel +import org.apache.kyuubi.engine.flink.{WithDiscoveryFlinkSQLEngine, WithFlinkSQLEngineLocal} +import org.apache.kyuubi.ha.HighAvailabilityConf.{HA_ENGINE_REF_ID, HA_NAMESPACE} +import org.apache.kyuubi.operation.{HiveJDBCTestHelper, NoneMode} + +class FlinkEngineInitializeSuite extends HiveJDBCTestHelper + with WithDiscoveryFlinkSQLEngine with WithFlinkSQLEngineLocal { + + protected def jdbcUrl: String = getFlinkEngineServiceUrl + + protected val ENGINE_INITIALIZE_SQL_VALUE: String = + "show databases;" + + protected val ENGINE_SESSION_INITIALIZE_SQL_VALUE: String = + """create catalog cat_b with ('type'='generic_in_memory'); + |create table blackhole(i int) with ('connector'='blackhole'); + |create table datagen(i int) with ( + |'connector'='datagen', + |'fields.i.kind'='sequence', + |'fields.i.start'='1', + |'fields.i.end'='10')""".stripMargin + + override def withKyuubiConf: Map[String, String] = { + Map( + "flink.execution.target" -> "remote", + "flink.high-availability.cluster-id" -> "flink-mini-cluster", + "flink.app.name" -> "kyuubi_connection_flink_kandy", + HA_NAMESPACE.key -> namespace, + HA_ENGINE_REF_ID.key -> engineRefId, + ENGINE_TYPE.key -> "FLINK_SQL", + ENGINE_SHARE_LEVEL.key -> shareLevel, + OPERATION_PLAN_ONLY_MODE.key -> NoneMode.name, + ENGINE_FLINK_INITIALIZE_SQL.key -> ENGINE_INITIALIZE_SQL_VALUE, + ENGINE_SESSION_FLINK_INITIALIZE_SQL.key -> ENGINE_SESSION_INITIALIZE_SQL_VALUE, + KYUUBI_SESSION_USER_KEY -> "kandy") + } + + override protected def engineRefId: String = UUID.randomUUID().toString + + def namespace: String = "/kyuubi/flink-local-engine-test" + + def shareLevel: String = ShareLevel.USER.toString + + def engineType: String = "flink" + + test("execute statement - kyuubi engine initialize") { + withJdbcStatement() { statement => + var resultSet = statement.executeQuery("show catalogs") + val expectedCatalogs = Set("default_catalog", "cat_b") + var actualCatalogs = Set[String]() + while (resultSet.next()) { + actualCatalogs += resultSet.getString(1) + } + assert(expectedCatalogs.subsetOf(actualCatalogs)) + + resultSet = statement.executeQuery("show databases") + assert(resultSet.next()) + assert(resultSet.getString(1) === "default_database") + assert(!resultSet.next()) + + val expectedTables = Set("blackhole", "datagen") + resultSet = statement.executeQuery("show tables") + while (resultSet.next()) { + assert(expectedTables.contains(resultSet.getString(1))) + } + assert(!resultSet.next()) + + var dropResult = statement.executeQuery("drop catalog cat_b") + assert(dropResult.next()) + assert(dropResult.getString(1) === "OK") + + dropResult = statement.executeQuery("drop table blackhole") + assert(dropResult.next()) + assert(dropResult.getString(1) === "OK") + + dropResult = statement.executeQuery("drop table datagen") + assert(dropResult.next()) + assert(dropResult.getString(1) === "OK") + } + } +} diff --git a/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/operation/FlinkOperationSuite.scala b/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/operation/FlinkOperationSuite.scala index 8e7c35a95a4..59d5fde3467 100644 --- a/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/operation/FlinkOperationSuite.scala +++ b/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/operation/FlinkOperationSuite.scala @@ -26,18 +26,18 @@ import scala.collection.JavaConverters._ import org.apache.flink.api.common.JobID import org.apache.flink.configuration.PipelineOptions import org.apache.flink.table.types.logical.LogicalTypeRoot -import org.apache.hive.service.rpc.thrift._ import org.apache.kyuubi.Utils import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.engine.flink.FlinkEngineUtils.FLINK_RUNTIME_VERSION import org.apache.kyuubi.engine.flink.WithFlinkTestResources -import org.apache.kyuubi.engine.flink.result.Constants +import org.apache.kyuubi.engine.flink.result.{CommandStrings, Constants} import org.apache.kyuubi.engine.flink.util.TestUserClassLoaderJar import org.apache.kyuubi.jdbc.hive.{KyuubiSQLException, KyuubiStatement} import org.apache.kyuubi.jdbc.hive.common.TimestampTZ import org.apache.kyuubi.operation.HiveJDBCTestHelper import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ abstract class FlinkOperationSuite extends HiveJDBCTestHelper with WithFlinkTestResources { @@ -637,7 +637,9 @@ abstract class FlinkOperationSuite extends HiveJDBCTestHelper with WithFlinkTest test("execute statement - show/stop jobs") { if (FLINK_RUNTIME_VERSION >= "1.17") { - withSessionConf()(Map(ENGINE_FLINK_MAX_ROWS.key -> "10"))(Map.empty) { + // use a bigger value to ensure all tasks of the streaming query run until + // we explicitly stop the job. + withSessionConf()(Map(ENGINE_FLINK_MAX_ROWS.key -> "10000"))(Map.empty) { withMultipleConnectionJdbcStatement()({ statement => statement.executeQuery( "create table tbl_a (a int) with (" + @@ -1146,6 +1148,22 @@ abstract class FlinkOperationSuite extends HiveJDBCTestHelper with WithFlinkTest assert(rows === 200) } } + if (FLINK_RUNTIME_VERSION >= "1.17") { + withSessionConf()(Map(ENGINE_FLINK_MAX_ROWS.key -> "10"))(Map.empty) { + withJdbcStatement() { statement => + for (i <- 0 to 10) { + statement.execute(s"create table tbl_src$i (a bigint) " + + s"with ('connector' = 'blackhole')") + } + val resultSet = statement.executeQuery("show tables") + var rows = 0 + while (resultSet.next()) { + rows += 1 + } + assert(rows === 11) + } + } + } } test("execute statement - add/show jar") { @@ -1253,7 +1271,7 @@ abstract class FlinkOperationSuite extends HiveJDBCTestHelper with WithFlinkTest test("test result fetch timeout") { val exception = intercept[KyuubiSQLException]( - withSessionConf()(Map(ENGINE_FLINK_FETCH_TIMEOUT.key -> "60000"))() { + withSessionConf()(Map(ENGINE_FLINK_FETCH_TIMEOUT.key -> "PT60S"))() { withJdbcStatement("tbl_a") { stmt => stmt.executeQuery("create table tbl_a (a int) " + "with ('connector' = 'datagen', 'rows-per-second'='0')") @@ -1263,4 +1281,14 @@ abstract class FlinkOperationSuite extends HiveJDBCTestHelper with WithFlinkTest }) assert(exception.getMessage === "Futures timed out after [60000 milliseconds]") } + + test("execute statement - help") { + withJdbcStatement() { stmt => + val resultSet = stmt.executeQuery("help") + val metadata = resultSet.getMetaData + assert(metadata.getColumnName(1) === "result") + assert(resultSet.next()) + assert(resultSet.getString(1).equals(CommandStrings.MESSAGE_HELP.toString)) + } + } } diff --git a/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/result/ResultSetSuite.scala b/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/result/ResultSetSuite.scala index 9ee5c658bc9..5e58d433f91 100644 --- a/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/result/ResultSetSuite.scala +++ b/externals/kyuubi-flink-sql-engine/src/test/scala/org/apache/kyuubi/engine/flink/result/ResultSetSuite.scala @@ -25,7 +25,7 @@ import org.apache.flink.table.data.StringData import org.apache.flink.types.Row import org.apache.kyuubi.KyuubiFunSuite -import org.apache.kyuubi.engine.flink.schema.RowSet +import org.apache.kyuubi.engine.flink.schema.FlinkTRowSetGenerator class ResultSetSuite extends KyuubiFunSuite { @@ -47,9 +47,9 @@ class ResultSetSuite extends KyuubiFunSuite { .build val timeZone = ZoneId.of("America/Los_Angeles") - assert(RowSet.toRowBaseSet(rowsNew, resultSetNew, timeZone) - === RowSet.toRowBaseSet(rowsOld, resultSetOld, timeZone)) - assert(RowSet.toColumnBasedSet(rowsNew, resultSetNew, timeZone) - === RowSet.toColumnBasedSet(rowsOld, resultSetOld, timeZone)) + assert(new FlinkTRowSetGenerator(timeZone).toRowBasedSet(rowsNew, resultSetNew) + === new FlinkTRowSetGenerator(timeZone).toRowBasedSet(rowsOld, resultSetOld)) + assert(new FlinkTRowSetGenerator(timeZone).toColumnBasedSet(rowsNew, resultSetNew) + === new FlinkTRowSetGenerator(timeZone).toColumnBasedSet(rowsOld, resultSetOld)) } } diff --git a/externals/kyuubi-hive-sql-engine/pom.xml b/externals/kyuubi-hive-sql-engine/pom.xml index caed7e27c37..89f2395f043 100644 --- a/externals/kyuubi-hive-sql-engine/pom.xml +++ b/externals/kyuubi-hive-sql-engine/pom.xml @@ -50,18 +50,6 @@ ${project.version} - - org.apache.hive - hive-service-rpc - provided - - - - org.apache.thrift - libfb303 - provided - - com.google.code.findbugs jsr305 @@ -73,12 +61,6 @@ commons-collections - - org.apache.thrift - libthrift - provided - - com.google.guava failureaccess @@ -206,6 +188,18 @@ + + + com.fasterxml.jackson + ${kyuubi.shade.packageName}.com.fasterxml.jackson + + com.fasterxml.jackson.** + + + + + +
    diff --git a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/HiveSQLEngine.scala b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/HiveSQLEngine.scala index 3cc426c435a..f22e281fbaa 100644 --- a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/HiveSQLEngine.scala +++ b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/HiveSQLEngine.scala @@ -79,6 +79,14 @@ object HiveSQLEngine extends Logging { kyuubiConf.setIfMissing(KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_PORT, 0) kyuubiConf.setIfMissing(HA_ZK_CONN_RETRY_POLICY, RetryPolicies.N_TIME.toString) + // align with the operational behavior of HiveServer2, it is necessary to + // include the `hiveserver2-site.xml` configuration within the HiveConf settings. + // for instance, upon the installation of the Hive Ranger plugin, authorization + // configurations are appended to the `hiveserver2-site.xml` file. Similarly, to activate + // the Ranger plugin for the Hive engine within Kyuubi, it is essential for the Hive engine + // to load the `hiveserver2-site.xml` file. This ensures that the Hive engine's + // security features are consistent with those managed by HiveServer2. See [KYUUBI #5878]. + hiveConf.addResource("hiveserver2-site.xml") for ((k, v) <- kyuubiConf.getAll) { hiveConf.set(k, v) } @@ -130,7 +138,15 @@ object HiveSQLEngine extends Logging { } else { val effectiveUser = UserGroupInformation.createProxyUser(sessionUser.get, realUser) effectiveUser.doAs(new PrivilegedExceptionAction[Unit] { - override def run(): Unit = startEngine() + override def run(): Unit = { + val engineCredentials = + kyuubiConf.getOption(KyuubiReservedKeys.KYUUBI_ENGINE_CREDENTIALS_KEY) + kyuubiConf.unset(KyuubiReservedKeys.KYUUBI_ENGINE_CREDENTIALS_KEY) + engineCredentials.filter(_.nonEmpty).foreach { credentials => + HiveTBinaryFrontendService.renewDelegationToken(credentials) + } + startEngine() + } }) } diff --git a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/HiveTBinaryFrontendService.scala b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/HiveTBinaryFrontendService.scala index d7cc801d3f6..082e4d12f69 100644 --- a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/HiveTBinaryFrontendService.scala +++ b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/HiveTBinaryFrontendService.scala @@ -17,11 +17,19 @@ package org.apache.kyuubi.engine.hive +import org.apache.hadoop.io.Text +import org.apache.hadoop.security.UserGroupInformation + +import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.ha.client.{EngineServiceDiscovery, ServiceDiscovery} import org.apache.kyuubi.service.{Serverable, Service, TBinaryFrontendService} +import org.apache.kyuubi.service.TFrontendService.OK_STATUS +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TRenewDelegationTokenReq, TRenewDelegationTokenResp} +import org.apache.kyuubi.util.KyuubiHadoopUtils class HiveTBinaryFrontendService(override val serverable: Serverable) extends TBinaryFrontendService("HiveTBinaryFrontend") { + import HiveTBinaryFrontendService._ override lazy val discoveryService: Option[Service] = { if (ServiceDiscovery.supportServiceDiscovery(conf)) { @@ -30,4 +38,39 @@ class HiveTBinaryFrontendService(override val serverable: Serverable) None } } + + override def RenewDelegationToken(req: TRenewDelegationTokenReq): TRenewDelegationTokenResp = { + debug(req.toString) + + // We hacked `TCLIService.Iface.RenewDelegationToken` to transfer Credentials from Kyuubi + // Server to Hive SQL engine + val resp = new TRenewDelegationTokenResp() + try { + renewDelegationToken(req.getDelegationToken) + resp.setStatus(OK_STATUS) + } catch { + case e: Exception => + warn("Error renew delegation tokens: ", e) + resp.setStatus(KyuubiSQLException.toTStatus(e)) + } + resp + } +} + +object HiveTBinaryFrontendService { + + def renewDelegationToken(tokenStr: String): Unit = { + val currentUser = UserGroupInformation.getCurrentUser + // `currentUser` is either `UserGroupInformation.getLoginUser` or a proxy user. + // If `currentUser` is a proxy user, it needs a HIVE_DELEGATION_TOKEN to pass + // HiveMetastoreClient authentication. + if (currentUser.getAuthenticationMethod == UserGroupInformation.AuthenticationMethod.PROXY) { + val newCreds = KyuubiHadoopUtils.decodeCredentials(tokenStr) + KyuubiHadoopUtils.getTokenMap(newCreds).values + .find(_.getKind == new Text("HIVE_DELEGATION_TOKEN")) + .foreach { token => + UserGroupInformation.getCurrentUser.addToken(token) + } + } + } } diff --git a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/operation/HiveOperation.scala b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/operation/HiveOperation.scala index 9759fa00be4..11cb5c5dfb5 100644 --- a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/operation/HiveOperation.scala +++ b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/operation/HiveOperation.scala @@ -19,16 +19,18 @@ package org.apache.kyuubi.engine.hive.operation import java.util.concurrent.Future +import org.apache.hive.service.cli.{FetchOrientation => HiveFetchOrientation} import org.apache.hive.service.cli.operation.{Operation, OperationManager} import org.apache.hive.service.cli.session.{HiveSession, SessionManager => HiveSessionManager} -import org.apache.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp} import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_USER_KEY import org.apache.kyuubi.engine.hive.session.HiveSessionImpl +import org.apache.kyuubi.engine.hive.util.HiveRpcUtils import org.apache.kyuubi.operation.{AbstractOperation, FetchOrientation, OperationState, OperationStatus} import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp} abstract class HiveOperation(session: Session) extends AbstractOperation(session) { @@ -90,7 +92,7 @@ abstract class HiveOperation(session: Session) extends AbstractOperation(session override def getResultSetMetadata: TGetResultSetMetadataResp = { val schema = internalHiveOperation.getResultSetSchema.toTTableSchema val resp = new TGetResultSetMetadataResp - resp.setSchema(schema) + resp.setSchema(HiveRpcUtils.asKyuubi(schema)) resp.setStatus(OK_STATUS) resp } @@ -98,18 +100,18 @@ abstract class HiveOperation(session: Session) extends AbstractOperation(session override def getNextRowSetInternal( order: FetchOrientation, rowSetSize: Int): TFetchResultsResp = { - val tOrder = FetchOrientation.toTFetchOrientation(order) - val hiveOrder = org.apache.hive.service.cli.FetchOrientation.getFetchOrientation(tOrder) + val hiveTOrder = HiveRpcUtils.asHive(FetchOrientation.toTFetchOrientation(order)) + val hiveOrder = HiveFetchOrientation.getFetchOrientation(hiveTOrder) val rowSet = internalHiveOperation.getNextRowSet(hiveOrder, rowSetSize) val resp = new TFetchResultsResp(OK_STATUS) - resp.setResults(rowSet.toTRowSet) + resp.setResults(HiveRpcUtils.asKyuubi(rowSet.toTRowSet)) resp.setHasMoreRows(false) resp } def getOperationLogRowSet(order: FetchOrientation, rowSetSize: Int): TFetchResultsResp = { - val tOrder = FetchOrientation.toTFetchOrientation(order) - val hiveOrder = org.apache.hive.service.cli.FetchOrientation.getFetchOrientation(tOrder) + val hiveTOrder = HiveRpcUtils.asHive(FetchOrientation.toTFetchOrientation(order)) + val hiveOrder = HiveFetchOrientation.getFetchOrientation(hiveTOrder) val handle = internalHiveOperation.getHandle val rowSet = delegatedOperationManager.getOperationLogRowSet( handle, @@ -117,7 +119,7 @@ abstract class HiveOperation(session: Session) extends AbstractOperation(session rowSetSize, hive.getHiveConf).toTRowSet val resp = new TFetchResultsResp(OK_STATUS) - resp.setResults(rowSet) + resp.setResults(HiveRpcUtils.asKyuubi(rowSet)) resp.setHasMoreRows(false) resp } diff --git a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/operation/HiveOperationManager.scala b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/operation/HiveOperationManager.scala index 4e41e742e0b..faa7381ced7 100644 --- a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/operation/HiveOperationManager.scala +++ b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/operation/HiveOperationManager.scala @@ -20,13 +20,13 @@ package org.apache.kyuubi.engine.hive.operation import java.util.List import org.apache.hadoop.hive.conf.HiveConf.ConfVars -import org.apache.hive.service.rpc.thrift.TFetchResultsResp import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.engine.hive.session.HiveSessionImpl import org.apache.kyuubi.operation.{Operation, OperationHandle, OperationManager} import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TFetchResultsResp class HiveOperationManager() extends OperationManager("HiveOperationManager") { // we use hive's operation log diff --git a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionImpl.scala b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionImpl.scala index 5069b13798c..91db1cb8d0a 100644 --- a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionImpl.scala +++ b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionImpl.scala @@ -17,13 +17,12 @@ package org.apache.kyuubi.engine.hive.session -import java.util.HashMap +import java.util import scala.collection.JavaConverters._ import org.apache.hive.common.util.HiveVersionInfo import org.apache.hive.service.cli.session.HiveSession -import org.apache.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.engine.hive.events.HiveSessionEvent @@ -31,6 +30,8 @@ import org.apache.kyuubi.engine.hive.udf.KDFRegistry import org.apache.kyuubi.events.EventBus import org.apache.kyuubi.operation.{Operation, OperationHandle} import org.apache.kyuubi.session.{AbstractSession, SessionHandle, SessionManager} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} +import org.apache.kyuubi.util.reflect.{DynFields, DynMethods} class HiveSessionImpl( protocol: TProtocolVersion, @@ -46,7 +47,7 @@ class HiveSessionImpl( private val sessionEvent = HiveSessionEvent(this) override def open(): Unit = { - val confClone = new HashMap[String, String]() + val confClone = new util.HashMap[String, String]() confClone.putAll(conf.asJava) // pass conf.asScala not support `put` method hive.open(confClone) KDFRegistry.registerAll() @@ -63,7 +64,22 @@ class HiveSessionImpl( case TGetInfoType.CLI_SERVER_NAME => TGetInfoValue.stringValue("Hive") case TGetInfoType.CLI_DBMS_NAME => TGetInfoValue.stringValue("Apache Hive") case TGetInfoType.CLI_DBMS_VER => TGetInfoValue.stringValue(HiveVersionInfo.getVersion) - case TGetInfoType.CLI_ODBC_KEYWORDS => TGetInfoValue.stringValue("Unimplemented") + case TGetInfoType.CLI_ODBC_KEYWORDS => + try { + // HIVE-17765 expose Hive keywords. + // exclude these keywords to be consistent with Hive behavior. + val excludes = DynFields.builder() + .hiddenImpl("org.apache.hive.service.cli.session.HiveSessionImpl", "ODBC_KEYWORDS") + .buildStaticChecked[util.Set[String]]().get() + val keywords = DynMethods.builder("getKeywords") + .impl("org.apache.hadoop.hive.ql.parse.ParseUtils", classOf[util.Set[String]]) + .buildStaticChecked() + .invoke[String](excludes) + TGetInfoValue.stringValue(keywords) + } catch { + case _: ReflectiveOperationException => + TGetInfoValue.stringValue("Unimplemented") + } case TGetInfoType.CLI_MAX_COLUMN_NAME_LEN | TGetInfoType.CLI_MAX_SCHEMA_NAME_LEN | TGetInfoType.CLI_MAX_TABLE_NAME_LEN => TGetInfoValue.lenValue(128) diff --git a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionManager.scala b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionManager.scala index d09912770cc..ef98f5b0a11 100644 --- a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionManager.scala +++ b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/session/HiveSessionManager.scala @@ -18,22 +18,31 @@ package org.apache.kyuubi.engine.hive.session import java.io.File +import java.util.{List => JList} import java.util.concurrent.Future import scala.collection.JavaConverters._ +import scala.language.reflectiveCalls import org.apache.hadoop.hive.conf.HiveConf +import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.{SessionHandle => ImportedSessionHandle} -import org.apache.hive.service.cli.session.{HiveSessionImplwithUGI => ImportedHiveSessionImpl, HiveSessionProxy, SessionManager => ImportedHiveSessionManager} -import org.apache.hive.service.rpc.thrift.TProtocolVersion +import org.apache.hive.service.cli.session.{HiveSessionImpl => ImportedHiveSessionImpl} +import org.apache.hive.service.cli.session.{HiveSessionImplwithUGI => ImportedHiveSessionImplwithUGI} +import org.apache.hive.service.cli.session.{SessionManager => ImportedHiveSessionManager} +import org.apache.hive.service.cli.session.HiveSessionProxy +import org.apache.hive.service.rpc.thrift.{TProtocolVersion => HiveTProtocolVersion} import org.apache.kyuubi.config.KyuubiConf.ENGINE_SHARE_LEVEL import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.ShareLevel import org.apache.kyuubi.engine.hive.HiveSQLEngine import org.apache.kyuubi.engine.hive.operation.HiveOperationManager +import org.apache.kyuubi.engine.hive.util.HiveRpcUtils import org.apache.kyuubi.operation.OperationManager import org.apache.kyuubi.session.{Session, SessionHandle, SessionManager} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion +import org.apache.kyuubi.util.reflect.DynConstructors class HiveSessionManager(engine: HiveSQLEngine) extends SessionManager("HiveSessionManager") { override protected def isServer: Boolean = false @@ -42,11 +51,14 @@ class HiveSessionManager(engine: HiveSQLEngine) extends SessionManager("HiveSess private val internalSessionManager = new ImportedHiveSessionManager(null) { + var doAsEnabled: Boolean = _ + /** * Avoid unnecessary hive initialization */ override def init(hiveConf: HiveConf): Unit = { // this.hiveConf = hiveConf + this.doAsEnabled = hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS) } /** @@ -75,21 +87,72 @@ class HiveSessionManager(engine: HiveSQLEngine) extends SessionManager("HiveSess conf: Map[String, String]): Session = { conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).flatMap( getSessionOption).getOrElse { + val hiveProtocol = HiveRpcUtils.asHive(protocol) val sessionHandle = conf.get(KYUUBI_SESSION_HANDLE_KEY).map(SessionHandle.fromUUID).getOrElse(SessionHandle()) - val hive = { - val sessionWithUGI = new ImportedHiveSessionImpl( - new ImportedSessionHandle(sessionHandle.toTSessionHandle, protocol), - protocol, - user, - password, - HiveSQLEngine.hiveConf, - ipAddress, - null, - Seq(ipAddress).asJava) + val hiveTSessionHandle = HiveRpcUtils.asHive(sessionHandle.toTSessionHandle) + val hive = if (internalSessionManager.doAsEnabled) { + val sessionWithUGI = DynConstructors.builder() + .impl( // for Hive 3.1 + classOf[ImportedHiveSessionImplwithUGI], + classOf[ImportedSessionHandle], + classOf[HiveTProtocolVersion], + classOf[String], + classOf[String], + classOf[HiveConf], + classOf[String], + classOf[String], + classOf[JList[String]]) + .impl( // for Hive 2.3 + classOf[ImportedHiveSessionImplwithUGI], + classOf[ImportedSessionHandle], + classOf[HiveTProtocolVersion], + classOf[String], + classOf[String], + classOf[HiveConf], + classOf[String], + classOf[String]) + .build[ImportedHiveSessionImplwithUGI]() + .newInstance( + new ImportedSessionHandle(hiveTSessionHandle, hiveProtocol), + hiveProtocol, + user, + password, + HiveSQLEngine.hiveConf, + ipAddress, + null, + Seq(ipAddress).asJava) val proxy = HiveSessionProxy.getProxy(sessionWithUGI, sessionWithUGI.getSessionUgi) sessionWithUGI.setProxySession(proxy) proxy + } else { + DynConstructors.builder() + .impl( // for Hive 3.1 + classOf[ImportedHiveSessionImpl], + classOf[ImportedSessionHandle], + classOf[HiveTProtocolVersion], + classOf[String], + classOf[String], + classOf[HiveConf], + classOf[String], + classOf[JList[String]]) + .impl( // for Hive 2.3 + classOf[ImportedHiveSessionImpl], + classOf[ImportedSessionHandle], + classOf[HiveTProtocolVersion], + classOf[String], + classOf[String], + classOf[HiveConf], + classOf[String]) + .build[ImportedHiveSessionImpl]() + .newInstance( + new ImportedSessionHandle(hiveTSessionHandle, hiveProtocol), + hiveProtocol, + user, + password, + HiveSQLEngine.hiveConf, + ipAddress, + Seq(ipAddress).asJava) } hive.setSessionManager(internalSessionManager) hive.setOperationManager(internalSessionManager.getOperationManager) @@ -104,7 +167,6 @@ class HiveSessionManager(engine: HiveSQLEngine) extends SessionManager("HiveSess sessionHandle, hive) } - } override def closeSession(sessionHandle: SessionHandle): Unit = { diff --git a/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/util/HiveRpcUtils.scala b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/util/HiveRpcUtils.scala new file mode 100644 index 00000000000..2dab173420d --- /dev/null +++ b/externals/kyuubi-hive-sql-engine/src/main/scala/org/apache/kyuubi/engine/hive/util/HiveRpcUtils.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.hive.util + +import org.apache.hive.service.rpc.thrift.{TFetchOrientation => HiveTFetchOrientation, THandleIdentifier => HiveTHandleIdentifier, TProtocolVersion => HiveTProtocolVersion, TRowSet => HiveTRowSet, TSessionHandle => HiveTSessionHandle, TTableSchema => HiveTTableSchema} +import org.apache.thrift.protocol.{TCompactProtocol => HiveTCompactProtocol} +import org.apache.thrift.transport.{TMemoryBuffer => HiveTMemoryBuffer} + +import org.apache.kyuubi.Logging +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ +import org.apache.kyuubi.shaded.thrift.protocol.TCompactProtocol +import org.apache.kyuubi.shaded.thrift.transport.TMemoryInputTransport + +object HiveRpcUtils extends Logging { + + def asHive(tProtocolVersion: TProtocolVersion): HiveTProtocolVersion = + Option(HiveTProtocolVersion.findByValue(tProtocolVersion.getValue)).getOrElse { + val latestHiveTProtocolVersion = HiveTProtocolVersion.values().last + warn(s"Unsupported TProtocolVersion (Kyuubi): $tProtocolVersion, " + + s"fallback to latest TProtocolVersion (Hive): $latestHiveTProtocolVersion") + latestHiveTProtocolVersion + } + + def asHive(tHandleIdentifier: THandleIdentifier): HiveTHandleIdentifier = + new HiveTHandleIdentifier( + tHandleIdentifier.bufferForGuid(), + tHandleIdentifier.bufferForSecret()) + + def asHive(tSessionHandle: TSessionHandle): HiveTSessionHandle = + new HiveTSessionHandle(asHive(tSessionHandle.getSessionId)) + + def asHive(tFetchOrientation: TFetchOrientation): HiveTFetchOrientation = + Option(HiveTFetchOrientation.findByValue(tFetchOrientation.getValue)).getOrElse { + throw new UnsupportedOperationException( + s"Unsupported TFetchOrientation (Kyuubi): $tFetchOrientation") + } + + def asKyuubi(hiveTTableSchema: HiveTTableSchema): TTableSchema = { + val hiveBuffer = new HiveTMemoryBuffer(128) + hiveTTableSchema.write(new HiveTCompactProtocol(hiveBuffer)) + val bytes = hiveBuffer.getArray + val kyuubiBuffer = new TMemoryInputTransport(bytes) + val kyuubiTTableSchema = new TTableSchema + kyuubiTTableSchema.read(new TCompactProtocol(kyuubiBuffer)) + kyuubiTTableSchema + } + + def asKyuubi(hiveTRowSet: HiveTRowSet): TRowSet = { + val hiveBuffer = new HiveTMemoryBuffer(128) + hiveTRowSet.write(new HiveTCompactProtocol(hiveBuffer)) + val bytes = hiveBuffer.getArray + val kyuubiBuffer = new TMemoryInputTransport(bytes) + val kyuubiTRowSet = new TRowSet + kyuubiTRowSet.read(new TCompactProtocol(kyuubiBuffer)) + kyuubiTRowSet + } +} diff --git a/externals/kyuubi-hive-sql-engine/src/test/scala/org/apache/kyuubi/engine/hive/operation/HiveCatalogDatabaseOperationSuite.scala b/externals/kyuubi-hive-sql-engine/src/test/scala/org/apache/kyuubi/engine/hive/operation/HiveCatalogDatabaseOperationSuite.scala index a63de20c7de..7db2d7fdca3 100644 --- a/externals/kyuubi-hive-sql-engine/src/test/scala/org/apache/kyuubi/engine/hive/operation/HiveCatalogDatabaseOperationSuite.scala +++ b/externals/kyuubi-hive-sql-engine/src/test/scala/org/apache/kyuubi/engine/hive/operation/HiveCatalogDatabaseOperationSuite.scala @@ -23,6 +23,7 @@ import org.apache.kyuubi.Utils import org.apache.kyuubi.config.KyuubiConf.ENGINE_OPERATION_CONVERT_CATALOG_DATABASE_ENABLED import org.apache.kyuubi.engine.hive.HiveSQLEngine import org.apache.kyuubi.operation.HiveJDBCTestHelper +import org.apache.kyuubi.util.command.CommandLineUtils._ class HiveCatalogDatabaseOperationSuite extends HiveJDBCTestHelper { @@ -30,9 +31,9 @@ class HiveCatalogDatabaseOperationSuite extends HiveJDBCTestHelper { val metastore = Utils.createTempDir(prefix = getClass.getSimpleName) metastore.toFile.delete() val args = Array( - "--conf", + CONF, s"javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=$metastore;create=true", - "--conf", + CONF, s"${ENGINE_OPERATION_CONVERT_CATALOG_DATABASE_ENABLED.key}=true") HiveSQLEngine.main(args) super.beforeAll() diff --git a/externals/kyuubi-hive-sql-engine/src/test/scala/org/apache/kyuubi/engine/hive/operation/HiveOperationSuite.scala b/externals/kyuubi-hive-sql-engine/src/test/scala/org/apache/kyuubi/engine/hive/operation/HiveOperationSuite.scala index eb10e0b4144..53cc9457ae1 100644 --- a/externals/kyuubi-hive-sql-engine/src/test/scala/org/apache/kyuubi/engine/hive/operation/HiveOperationSuite.scala +++ b/externals/kyuubi-hive-sql-engine/src/test/scala/org/apache/kyuubi/engine/hive/operation/HiveOperationSuite.scala @@ -22,6 +22,7 @@ import org.apache.commons.lang3.{JavaVersion, SystemUtils} import org.apache.kyuubi.{HiveEngineTests, KYUUBI_VERSION, Utils} import org.apache.kyuubi.engine.hive.HiveSQLEngine import org.apache.kyuubi.jdbc.hive.KyuubiStatement +import org.apache.kyuubi.util.command.CommandLineUtils._ class HiveOperationSuite extends HiveEngineTests { @@ -29,7 +30,7 @@ class HiveOperationSuite extends HiveEngineTests { val metastore = Utils.createTempDir(prefix = getClass.getSimpleName) metastore.toFile.delete() val args = Array( - "--conf", + CONF, s"javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=$metastore;create=true") HiveSQLEngine.main(args) super.beforeAll() diff --git a/externals/kyuubi-jdbc-engine/pom.xml b/externals/kyuubi-jdbc-engine/pom.xml index 3c21fed570f..33f84da15c4 100644 --- a/externals/kyuubi-jdbc-engine/pom.xml +++ b/externals/kyuubi-jdbc-engine/pom.xml @@ -58,6 +58,18 @@ test + + com.dimafeng + testcontainers-scala-mysql_${scala.binary.version} + test + + + + com.dimafeng + testcontainers-scala-postgresql_${scala.binary.version} + test + + org.apache.kyuubi ${hive.jdbc.artifact} @@ -76,6 +88,12 @@ phoenix-queryserver-client test + + + org.postgresql + postgresql + test + diff --git a/externals/kyuubi-jdbc-engine/src/main/resources/META-INF/services/org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider b/externals/kyuubi-jdbc-engine/src/main/resources/META-INF/services/org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider index ec68c6884a9..0d8a2c58e5c 100644 --- a/externals/kyuubi-jdbc-engine/src/main/resources/META-INF/services/org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider +++ b/externals/kyuubi-jdbc-engine/src/main/resources/META-INF/services/org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -16,4 +16,7 @@ # org.apache.kyuubi.engine.jdbc.doris.DorisConnectionProvider -org.apache.kyuubi.engine.jdbc.phoenix.PhoenixConnectionProvider \ No newline at end of file +org.apache.kyuubi.engine.jdbc.mysql.MySQLConnectionProvider +org.apache.kyuubi.engine.jdbc.phoenix.PhoenixConnectionProvider +org.apache.kyuubi.engine.jdbc.postgresql.PostgreSQLConnectionProvider +org.apache.kyuubi.engine.jdbc.starrocks.StarRocksConnectionProvider diff --git a/externals/kyuubi-jdbc-engine/src/main/resources/META-INF/services/org.apache.kyuubi.engine.jdbc.dialect.JdbcDialect b/externals/kyuubi-jdbc-engine/src/main/resources/META-INF/services/org.apache.kyuubi.engine.jdbc.dialect.JdbcDialect index cf84af61253..c5a75ec9c9f 100644 --- a/externals/kyuubi-jdbc-engine/src/main/resources/META-INF/services/org.apache.kyuubi.engine.jdbc.dialect.JdbcDialect +++ b/externals/kyuubi-jdbc-engine/src/main/resources/META-INF/services/org.apache.kyuubi.engine.jdbc.dialect.JdbcDialect @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -16,4 +16,7 @@ # org.apache.kyuubi.engine.jdbc.dialect.DorisDialect -org.apache.kyuubi.engine.jdbc.dialect.PhoenixDialect \ No newline at end of file +org.apache.kyuubi.engine.jdbc.dialect.MySQLDialect +org.apache.kyuubi.engine.jdbc.dialect.PhoenixDialect +org.apache.kyuubi.engine.jdbc.dialect.PostgreSQLDialect +org.apache.kyuubi.engine.jdbc.dialect.StarRocksDialect diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/connection/ConnectionProvider.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/connection/ConnectionProvider.scala index cb6e4b6c551..f8ec72dca93 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/connection/ConnectionProvider.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/connection/ConnectionProvider.scala @@ -27,7 +27,7 @@ import org.apache.kyuubi.util.reflect.ReflectUtils._ abstract class AbstractConnectionProvider extends Logging { protected val providers = loadProviders() - def getProviderClass(kyuubiConf: KyuubiConf): String = { + def getDriverClass(kyuubiConf: KyuubiConf): String = { val driverClass: Class[_ <: Driver] = Option( DynClasses.builder().impl(kyuubiConf.get(ENGINE_JDBC_DRIVER_CLASS).get) .orNull().build[Driver]()).getOrElse { @@ -38,7 +38,7 @@ abstract class AbstractConnectionProvider extends Logging { } def create(kyuubiConf: KyuubiConf): Connection = { - val filteredProviders = providers.filter(_.canHandle(getProviderClass(kyuubiConf))) + val filteredProviders = providers.filter(_.canHandle(getDriverClass(kyuubiConf))) if (filteredProviders.isEmpty) { throw new IllegalArgumentException( "Empty list of JDBC connection providers for the specified driver and options") @@ -57,10 +57,9 @@ abstract class AbstractConnectionProvider extends Logging { case None => // TODO if (filteredProviders.size != 1) { - throw new IllegalArgumentException( - "JDBC connection initiated but more than one connection provider was found. Use " + - s"${ENGINE_JDBC_CONNECTION_PROVIDER.key} option to select a specific provider. " + - s"Found active providers ${filteredProviders.mkString("[", ", ", "]")}") + warn("JDBC connection initiated but more than one connection provider was found. Use " + + s"${ENGINE_JDBC_CONNECTION_PROVIDER.key} option to select a specific provider. " + + s"Found active providers ${filteredProviders.mkString("[", ", ", "]")}") } filteredProviders.head } diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/DorisDialect.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/DorisDialect.scala index f7c1ace6473..e48a12a8991 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/DorisDialect.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/DorisDialect.scala @@ -15,120 +15,15 @@ * limitations under the License. */ package org.apache.kyuubi.engine.jdbc.dialect -import java.sql.{Connection, Statement} -import java.util -import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer +import org.apache.kyuubi.engine.jdbc.doris.{DorisSchemaHelper, DorisTRowSetGenerator} +import org.apache.kyuubi.engine.jdbc.schema.{JdbcTRowSetGenerator, SchemaHelper} -import org.apache.commons.lang3.StringUtils +class DorisDialect extends MySQLDialect { -import org.apache.kyuubi.engine.jdbc.doris.{DorisRowSetHelper, DorisSchemaHelper} -import org.apache.kyuubi.engine.jdbc.schema.{RowSetHelper, SchemaHelper} -import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ -import org.apache.kyuubi.session.Session + override def name(): String = "doris" -class DorisDialect extends JdbcDialect { + override def getTRowSetGenerator(): JdbcTRowSetGenerator = new DorisTRowSetGenerator - override def createStatement(connection: Connection, fetchSize: Int): Statement = { - val statement = super.createStatement(connection, fetchSize) - statement.setFetchSize(Integer.MIN_VALUE) - statement - } - - override def getTablesQuery( - catalog: String, - schema: String, - tableName: String, - tableTypes: util.List[String]): String = { - val tTypes = - if (tableTypes == null || tableTypes.isEmpty) { - Set("BASE TABLE", "SYSTEM VIEW", "VIEW") - } else { - tableTypes.asScala.toSet - } - val query = new StringBuilder( - s""" - |SELECT TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE, ENGINE, - |TABLE_ROWS, AVG_ROW_LENGTH, DATA_LENGTH, - |CREATE_TIME, UPDATE_TIME, TABLE_COLLATION, TABLE_COMMENT - |FROM INFORMATION_SCHEMA.TABLES - |""".stripMargin) - - val filters = ArrayBuffer[String]() - if (StringUtils.isNotBlank(catalog)) { - filters += s"$TABLE_CATALOG = '$catalog'" - } - - if (StringUtils.isNotBlank(schema)) { - filters += s"$TABLE_SCHEMA LIKE '$schema'" - } - - if (StringUtils.isNotBlank(tableName)) { - filters += s"$TABLE_NAME LIKE '$tableName'" - } - - if (tTypes.nonEmpty) { - filters += s"(${tTypes.map { tableType => s"$TABLE_TYPE = '$tableType'" } - .mkString(" OR ")})" - } - - if (filters.nonEmpty) { - query.append(" WHERE ") - query.append(filters.mkString(" AND ")) - } - - query.toString() - } - - override def getColumnsQuery( - session: Session, - catalogName: String, - schemaName: String, - tableName: String, - columnName: String): String = { - val query = new StringBuilder( - """ - |SELECT - |`TABLE_CATALOG`,`TABLE_SCHEMA`,`TABLE_NAME`, `COLUMN_NAME`,`ORDINAL_POSITION`, - |`COLUMN_DEFAULT`,`IS_NULLABLE`,`DATA_TYPE`,`CHARACTER_MAXIMUM_LENGTH`, - |`CHARACTER_OCTET_LENGTH`,`NUMERIC_PRECISION`,`NUMERIC_SCALE`,`DATETIME_PRECISION`, - |`CHARACTER_SET_NAME`,`COLLATION_NAME`,`COLUMN_TYPE`,`COLUMN_KEY`,`EXTRA`,`PRIVILEGES`, - |`COLUMN_COMMENT`,`COLUMN_SIZE`,`DECIMAL_DIGITS`,`GENERATION_EXPRESSION`,`SRS_ID` - |FROM information_schema.columns - |""".stripMargin) - - val filters = ArrayBuffer[String]() - if (StringUtils.isNotEmpty(catalogName)) { - filters += s"$TABLE_CATALOG = '$catalogName'" - } - if (StringUtils.isNotEmpty(schemaName)) { - filters += s"$TABLE_SCHEMA LIKE '$schemaName'" - } - if (StringUtils.isNotEmpty(tableName)) { - filters += s"$TABLE_NAME LIKE '$tableName'" - } - if (StringUtils.isNotEmpty(columnName)) { - filters += s"$COLUMN_NAME LIKE '$columnName'" - } - - if (filters.nonEmpty) { - query.append(" WHERE ") - query.append(filters.mkString(" AND ")) - } - - query.toString() - } - - override def getRowSetHelper(): RowSetHelper = { - new DorisRowSetHelper - } - - override def getSchemaHelper(): SchemaHelper = { - new DorisSchemaHelper - } - - override def name(): String = { - "doris" - } + override def getSchemaHelper(): SchemaHelper = new DorisSchemaHelper } diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/JdbcDialect.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/JdbcDialect.scala index 62e20a1d258..6c2d3b1e09d 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/JdbcDialect.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/JdbcDialect.scala @@ -22,7 +22,7 @@ import java.util import org.apache.kyuubi.{KyuubiException, KyuubiSQLException, Logging} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.{ENGINE_JDBC_CONNECTION_URL, ENGINE_JDBC_SHORT_NAME} -import org.apache.kyuubi.engine.jdbc.schema.{RowSetHelper, SchemaHelper} +import org.apache.kyuubi.engine.jdbc.schema.{JdbcTRowSetGenerator, SchemaHelper} import org.apache.kyuubi.engine.jdbc.util.SupportServiceLoader import org.apache.kyuubi.operation.Operation import org.apache.kyuubi.session.Session @@ -41,11 +41,11 @@ abstract class JdbcDialect extends SupportServiceLoader with Logging { throw KyuubiSQLException.featureNotSupported() } - def getCatalogsOperation(session: Session): Operation = { + def getCatalogsOperation(): String = { throw KyuubiSQLException.featureNotSupported() } - def getSchemasOperation(session: Session): Operation = { + def getSchemasOperation(catalog: String, schema: String): String = { throw KyuubiSQLException.featureNotSupported() } @@ -78,7 +78,7 @@ abstract class JdbcDialect extends SupportServiceLoader with Logging { throw KyuubiSQLException.featureNotSupported() } - def getRowSetHelper(): RowSetHelper + def getTRowSetGenerator(): JdbcTRowSetGenerator def getSchemaHelper(): SchemaHelper } diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/MySQLDialect.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/MySQLDialect.scala new file mode 100644 index 00000000000..e1392436391 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/MySQLDialect.scala @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.dialect +import java.sql.{Connection, ResultSet, Statement} +import java.util + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer + +import org.apache.commons.lang3.StringUtils + +import org.apache.kyuubi.engine.jdbc.mysql.{MySQLSchemaHelper, MySQLTRowSetGenerator} +import org.apache.kyuubi.engine.jdbc.schema.{JdbcTRowSetGenerator, SchemaHelper} +import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ +import org.apache.kyuubi.session.Session + +class MySQLDialect extends JdbcDialect { + override def createStatement(connection: Connection, fetchSize: Int): Statement = { + val statement = + connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY) + statement.setFetchSize(Integer.MIN_VALUE) + statement + } + + override def getTablesQuery( + catalog: String, + schema: String, + tableName: String, + tableTypes: util.List[String]): String = { + val tTypes = + if (tableTypes == null || tableTypes.isEmpty) { + Set("BASE TABLE", "SYSTEM VIEW", "VIEW") + } else { + tableTypes.asScala.toSet + } + val query = new StringBuilder( + s""" + |SELECT TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE, ENGINE, + |TABLE_ROWS, AVG_ROW_LENGTH, DATA_LENGTH, + |CREATE_TIME, UPDATE_TIME, TABLE_COLLATION, TABLE_COMMENT + |FROM INFORMATION_SCHEMA.TABLES + |""".stripMargin) + + val filters = ArrayBuffer[String]() + if (StringUtils.isNotBlank(catalog)) { + filters += s"$TABLE_CATALOG = '$catalog'" + } + + if (StringUtils.isNotBlank(schema)) { + filters += s"$TABLE_SCHEMA LIKE '$schema'" + } + + if (StringUtils.isNotBlank(tableName)) { + filters += s"$TABLE_NAME LIKE '$tableName'" + } + + if (tTypes.nonEmpty) { + filters += s"(${ + tTypes.map { tableType => s"$TABLE_TYPE = '$tableType'" } + .mkString(" OR ") + })" + } + + if (filters.nonEmpty) { + query.append(" WHERE ") + query.append(filters.mkString(" AND ")) + } + + query.toString() + } + + override def getColumnsQuery( + session: Session, + catalogName: String, + schemaName: String, + tableName: String, + columnName: String): String = { + val query = new StringBuilder( + """ + |SELECT + |`TABLE_CATALOG`,`TABLE_SCHEMA`,`TABLE_NAME`, `COLUMN_NAME`,`ORDINAL_POSITION`, + |`COLUMN_DEFAULT`,`IS_NULLABLE`,`DATA_TYPE`,`CHARACTER_MAXIMUM_LENGTH`, + |`CHARACTER_OCTET_LENGTH`,`NUMERIC_PRECISION`,`NUMERIC_SCALE`,`DATETIME_PRECISION`, + |`CHARACTER_SET_NAME`,`COLLATION_NAME`,`COLUMN_TYPE`,`COLUMN_KEY`,`EXTRA`,`PRIVILEGES`, + |`COLUMN_COMMENT`,`GENERATION_EXPRESSION` + |FROM information_schema.columns + |""".stripMargin) + + val filters = ArrayBuffer[String]() + if (StringUtils.isNotEmpty(catalogName)) { + filters += s"$TABLE_CATALOG = '$catalogName'" + } + if (StringUtils.isNotEmpty(schemaName)) { + filters += s"$TABLE_SCHEMA LIKE '$schemaName'" + } + if (StringUtils.isNotEmpty(tableName)) { + filters += s"$TABLE_NAME LIKE '$tableName'" + } + if (StringUtils.isNotEmpty(columnName)) { + filters += s"$COLUMN_NAME LIKE '$columnName'" + } + + if (filters.nonEmpty) { + query.append(" WHERE ") + query.append(filters.mkString(" AND ")) + } + + query.toString() + } + + override def getTRowSetGenerator(): JdbcTRowSetGenerator = new MySQLTRowSetGenerator + + override def getSchemaHelper(): SchemaHelper = { + new MySQLSchemaHelper + } + + override def name(): String = { + "mysql" + } +} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/PhoenixDialect.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/PhoenixDialect.scala index 4c8e8f26549..61440ac501e 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/PhoenixDialect.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/PhoenixDialect.scala @@ -22,8 +22,8 @@ import scala.collection.mutable.ArrayBuffer import org.apache.commons.lang3.StringUtils -import org.apache.kyuubi.engine.jdbc.phoenix.{PhoenixRowSetHelper, PhoenixSchemaHelper} -import org.apache.kyuubi.engine.jdbc.schema.{RowSetHelper, SchemaHelper} +import org.apache.kyuubi.engine.jdbc.phoenix.{PhoenixSchemaHelper, PhoenixTRowSetGenerator} +import org.apache.kyuubi.engine.jdbc.schema.{JdbcTRowSetGenerator, SchemaHelper} import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ import org.apache.kyuubi.session.Session @@ -100,9 +100,7 @@ class PhoenixDialect extends JdbcDialect { query.toString() } - override def getRowSetHelper(): RowSetHelper = { - new PhoenixRowSetHelper - } + override def getTRowSetGenerator(): JdbcTRowSetGenerator = new PhoenixTRowSetGenerator override def getSchemaHelper(): SchemaHelper = { new PhoenixSchemaHelper diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/PostgreSQLDialect.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/PostgreSQLDialect.scala new file mode 100644 index 00000000000..d3d4c8297b2 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/PostgreSQLDialect.scala @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.dialect + +import java.sql.{Connection, ResultSet, Statement} +import java.util + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer + +import org.apache.commons.lang3.StringUtils + +import org.apache.kyuubi.KyuubiSQLException +import org.apache.kyuubi.engine.jdbc.postgresql.{PostgreSQLSchemaHelper, PostgreSQLTRowSetGenerator} +import org.apache.kyuubi.engine.jdbc.schema.{JdbcTRowSetGenerator, SchemaHelper} +import org.apache.kyuubi.operation.Operation +import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ +import org.apache.kyuubi.session.Session + +class PostgreSQLDialect extends JdbcDialect { + + override def createStatement(connection: Connection, fetchSize: Int): Statement = { + val statement = + connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY) + if (connection.getAutoCommit) { + statement.setFetchSize(fetchSize) + } + statement + } + + override def getCatalogsOperation(): String = "SELECT CATALOG_NAME " + + "FROM INFORMATION_SCHEMA.INFORMATION_SCHEMA_CATALOG_NAME" + + override def getSchemasOperation( + catalog: String, + schema: String): String = { + val query = new StringBuilder( + s""" + |SELECT CATALOG_NAME, SCHEMA_NAME, SCHEMA_OWNER, + |DEFAULT_CHARACTER_SET_CATALOG, DEFAULT_CHARACTER_SET_SCHEMA, + |DEFAULT_CHARACTER_SET_NAME, SQL_PATH + |FROM INFORMATION_SCHEMA.SCHEMATA + |""".stripMargin) + + val filters = ArrayBuffer[String]() + if (StringUtils.isNotBlank(catalog)) { + filters += s"catalog_name LIKE '$catalog'" + } + + if (StringUtils.isNotBlank(schema)) { + filters += s"schema_name LIKE '$schema'" + } + + if (filters.nonEmpty) { + query.append(" WHERE ") + query.append(filters.mkString(" AND ")) + } + + query.toString() + } + + override def getTablesQuery( + catalog: String, + schema: String, + tableName: String, + tableTypes: util.List[String]): String = { + val tTypes = + if (tableTypes == null || tableTypes.isEmpty) { + Set("BASE TABLE", "VIEW") + } else { + tableTypes.asScala.toSet + } + val query = new StringBuilder( + s""" + |SELECT TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE, + |SELF_REFERENCING_COLUMN_NAME, REFERENCE_GENERATION, USER_DEFINED_TYPE_CATALOG, + |USER_DEFINED_TYPE_SCHEMA,USER_DEFINED_TYPE_NAME, + |IS_INSERTABLE_INTO,IS_TYPED,COMMIT_ACTION + |FROM INFORMATION_SCHEMA.TABLES + |""".stripMargin) + + val filters = ArrayBuffer[String]() + if (StringUtils.isNotBlank(catalog)) { + filters += s"$TABLE_CATALOG LIKE '$catalog'" + } + + if (StringUtils.isNotBlank(schema)) { + filters += s"$TABLE_SCHEMA LIKE '$schema'" + } + + if (StringUtils.isNotBlank(tableName)) { + filters += s"$TABLE_NAME LIKE '$tableName'" + } + + if (tTypes.nonEmpty) { + filters += s"(${ + tTypes.map { tableType => s"$TABLE_TYPE = '$tableType'" } + .mkString(" OR ") + })" + } + + if (filters.nonEmpty) { + query.append(" WHERE ") + query.append(filters.mkString(" AND ")) + } + + query.toString() + } + + override def getTableTypesOperation(session: Session): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def getColumnsQuery( + session: Session, + catalogName: String, + schemaName: String, + tableName: String, + columnName: String): String = { + val query = new StringBuilder( + """ + |SELECT TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, ORDINAL_POSITION, + |COLUMN_DEFAULT, IS_NULLABLE, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH, + |CHARACTER_OCTET_LENGTH, NUMERIC_PRECISION, NUMERIC_PRECISION_RADIX, + |NUMERIC_SCALE, DATETIME_PRECISION, INTERVAL_TYPE, INTERVAL_PRECISION, + |CHARACTER_SET_CATALOG, CHARACTER_SET_SCHEMA, CHARACTER_SET_NAME, + |COLLATION_CATALOG, COLLATION_SCHEMA, COLLATION_NAME, DOMAIN_CATALOG, + |DOMAIN_SCHEMA, DOMAIN_NAME, UDT_CATALOG, UDT_SCHEMA, UDT_NAME, SCOPE_CATALOG, + |SCOPE_SCHEMA, SCOPE_NAME, MAXIMUM_CARDINALITY, DTD_IDENTIFIER, + |IS_SELF_REFERENCING, IS_IDENTITY, IDENTITY_GENERATION, IDENTITY_START, + |IDENTITY_INCREMENT, IDENTITY_MAXIMUM, IDENTITY_MINIMUM, IDENTITY_CYCLE, + |IS_GENERATED, GENERATION_EXPRESSION, IS_UPDATABLE + |FROM INFORMATION_SCHEMA.COLUMNS + |""".stripMargin) + + val filters = ArrayBuffer[String]() + if (StringUtils.isNotEmpty(catalogName)) { + filters += s"$TABLE_CATALOG LIKE '$catalogName'" + } + if (StringUtils.isNotEmpty(schemaName)) { + filters += s"$TABLE_SCHEMA LIKE '$schemaName'" + } + if (StringUtils.isNotEmpty(tableName)) { + filters += s"$TABLE_NAME LIKE '$tableName'" + } + if (StringUtils.isNotEmpty(columnName)) { + filters += s"$COLUMN_NAME LIKE '$columnName'" + } + + if (filters.nonEmpty) { + query.append(" WHERE ") + query.append(filters.mkString(" AND ")) + } + + query.toString() + } + + override def getFunctionsOperation(session: Session): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def getPrimaryKeysOperation(session: Session): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def getCrossReferenceOperation(session: Session): Operation = { + throw KyuubiSQLException.featureNotSupported() + } + + override def getTRowSetGenerator(): JdbcTRowSetGenerator = new PostgreSQLTRowSetGenerator + + override def getSchemaHelper(): SchemaHelper = { + new PostgreSQLSchemaHelper + } + + override def name(): String = { + "postgresql" + } +} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/StarRocksDialect.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/StarRocksDialect.scala new file mode 100644 index 00000000000..aa4054eaa7c --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/dialect/StarRocksDialect.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.dialect + +import org.apache.kyuubi.engine.jdbc.schema.{JdbcTRowSetGenerator, SchemaHelper} +import org.apache.kyuubi.engine.jdbc.starrocks.{StarRocksSchemaHelper, StarRocksTRowSetGenerator} + +class StarRocksDialect extends MySQLDialect { + override def name(): String = "starrocks" + + override def getTRowSetGenerator(): JdbcTRowSetGenerator = new StarRocksTRowSetGenerator + + override def getSchemaHelper(): SchemaHelper = new StarRocksSchemaHelper + +} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisConnectionProvider.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisConnectionProvider.scala index 291e85d2d67..c38bf784561 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisConnectionProvider.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisConnectionProvider.scala @@ -16,9 +16,9 @@ */ package org.apache.kyuubi.engine.jdbc.doris -import org.apache.kyuubi.engine.jdbc.mysql.Mysql8ConnectionProvider +import org.apache.kyuubi.engine.jdbc.mysql.MySQL8ConnectionProvider -class DorisConnectionProvider extends Mysql8ConnectionProvider { +class DorisConnectionProvider extends MySQL8ConnectionProvider { override val name: String = classOf[DorisConnectionProvider].getSimpleName } diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisSchemaHelper.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisSchemaHelper.scala index b323d373142..a37ba4a39ac 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisSchemaHelper.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisSchemaHelper.scala @@ -16,13 +16,6 @@ */ package org.apache.kyuubi.engine.jdbc.doris -import org.apache.hive.service.rpc.thrift._ +import org.apache.kyuubi.engine.jdbc.mysql.MySQLSchemaHelper -import org.apache.kyuubi.engine.jdbc.schema.SchemaHelper - -class DorisSchemaHelper extends SchemaHelper { - - override def tinyIntToTTypeId: TTypeId = TTypeId.INT_TYPE - - override def smallIntToTTypeId: TTypeId = TTypeId.INT_TYPE -} +class DorisSchemaHelper extends MySQLSchemaHelper {} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisTRowSetGenerator.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisTRowSetGenerator.scala new file mode 100644 index 00000000000..b77a7b31096 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/doris/DorisTRowSetGenerator.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.doris + +import org.apache.kyuubi.engine.jdbc.mysql.MySQLTRowSetGenerator + +class DorisTRowSetGenerator extends MySQLTRowSetGenerator {} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQL8ConnectionProvider.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQL8ConnectionProvider.scala new file mode 100644 index 00000000000..563d5758bdc --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQL8ConnectionProvider.scala @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.mysql + +import org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider + +class MySQL8ConnectionProvider extends JdbcConnectionProvider { + + override val name: String = classOf[MySQL8ConnectionProvider].getSimpleName + + override val driverClass: String = MySQL8ConnectionProvider.driverClass + + override def canHandle(providerClass: String): Boolean = { + driverClass.equalsIgnoreCase(providerClass) + } + +} + +object MySQL8ConnectionProvider { + val driverClass: String = "com.mysql.cj.jdbc.Driver" +} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQLConnectionProvider.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQLConnectionProvider.scala new file mode 100644 index 00000000000..bd57d1f53e5 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQLConnectionProvider.scala @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.mysql + +class MySQLConnectionProvider extends MySQL8ConnectionProvider { + + override val name: String = classOf[MySQLConnectionProvider].getSimpleName +} diff --git a/kyuubi-server/web-ui/src/router/contact/index.ts b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQLSchemaHelper.scala similarity index 84% rename from kyuubi-server/web-ui/src/router/contact/index.ts rename to externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQLSchemaHelper.scala index a83c653ecb9..b7351b26b3e 100644 --- a/kyuubi-server/web-ui/src/router/contact/index.ts +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQLSchemaHelper.scala @@ -14,13 +14,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.kyuubi.engine.jdbc.mysql -const routes = [ - { - path: '/contact', - name: 'contact', - component: () => import('@/views/contact/index.vue') - } -] +import org.apache.kyuubi.engine.jdbc.schema.SchemaHelper -export default routes +class MySQLSchemaHelper extends SchemaHelper {} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQLTRowSetGenerator.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQLTRowSetGenerator.scala new file mode 100644 index 00000000000..c029131fa5a --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQLTRowSetGenerator.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.mysql + +import java.lang.{Long => JLong} +import java.sql.Types + +import org.apache.kyuubi.engine.jdbc.schema.DefaultJdbcTRowSetGenerator +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TColumn, TColumnValue} + +class MySQLTRowSetGenerator extends DefaultJdbcTRowSetGenerator { + + override def toTinyIntTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = + asIntegerTColumn(rows, ordinal) + + override def toSmallIntTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = + asIntegerTColumn(rows, ordinal) + + override def toTinyIntTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = + asIntegerTColumnValue(row, ordinal) + + override def toSmallIntTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = + asIntegerTColumnValue(row, ordinal) + + override def toIntegerTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = { + val colHead = if (rows.isEmpty) None else rows.head(ordinal) + colHead match { + case _: Integer => super.toIntegerTColumn(rows, ordinal) + case _: JLong => super.toBigIntTColumn(rows, ordinal) + case _ => super.toDefaultTColumn(rows, ordinal, Types.INTEGER) + } + } + + override def toIntegerTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = { + row(ordinal) match { + case _: Integer => super.toIntegerTColumnValue(row, ordinal) + case _: JLong => super.toBigIntTColumnValue(row, ordinal) + case _ => super.toDefaultTColumnValue(row, ordinal, Types.INTEGER) + } + } + + override def toBigIntTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = { + val colHead = if (rows.isEmpty) None else rows.head(ordinal) + colHead match { + case _: JLong => super.toBigIntTColumn(rows, ordinal) + case _ => super.toDefaultTColumn(rows, ordinal, Types.BIGINT) + } + } + + override def toBigIntTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = + row(ordinal) match { + case _: JLong => super.toBigIntTColumnValue(row, ordinal) + case _ => super.toDefaultTColumnValue(row, ordinal, Types.BIGINT) + } +} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/operation/ExecuteStatement.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/operation/ExecuteStatement.scala index ef49f2b3086..4292c320b30 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/operation/ExecuteStatement.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/operation/ExecuteStatement.scala @@ -18,11 +18,13 @@ package org.apache.kyuubi.engine.jdbc.operation import java.sql.{Connection, Statement, Types} -import org.apache.kyuubi.Logging +import org.apache.kyuubi.{KyuubiSQLException, Logging} import org.apache.kyuubi.engine.jdbc.schema.{Column, Row, Schema} import org.apache.kyuubi.engine.jdbc.session.JdbcSessionImpl import org.apache.kyuubi.engine.jdbc.util.ResultSetWrapper -import org.apache.kyuubi.operation.{ArrayFetchIterator, IterableFetchIterator, OperationState} +import org.apache.kyuubi.operation.{ArrayFetchIterator, FetchOrientation, IterableFetchIterator, OperationState} +import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation +import org.apache.kyuubi.operation.OperationState.OperationState import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session @@ -31,12 +33,15 @@ class ExecuteStatement( override val statement: String, override val shouldRunAsync: Boolean, queryTimeout: Long, - incrementalCollect: Boolean) + incrementalCollect: Boolean, + fetchSize: Int) extends JdbcOperation(session) with Logging { private val operationLog: OperationLog = OperationLog.createOperationLog(session, getHandle) override def getOperationLog: Option[OperationLog] = Option(operationLog) + @volatile private var jdbcStatement: Statement = _ + override protected def runInternal(): Unit = { addTimeoutMonitor(queryTimeout) if (shouldRunAsync) { @@ -55,10 +60,9 @@ class ExecuteStatement( private def executeStatement(): Unit = { setState(OperationState.RUNNING) - var jdbcStatement: Statement = null try { val connection: Connection = session.asInstanceOf[JdbcSessionImpl].sessionConnection - jdbcStatement = dialect.createStatement(connection) + jdbcStatement = dialect.createStatement(connection, fetchSize) val hasResult = jdbcStatement.execute(statement) if (hasResult) { val resultSetWrapper = new ResultSetWrapper(jdbcStatement) @@ -67,9 +71,12 @@ class ExecuteStatement( iter = if (incrementalCollect) { info("Execute in incremental collect mode") - new IterableFetchIterator(resultSetWrapper.toIterable) + new IterableFetchIterator(new Iterable[Row] { + override def iterator: Iterator[Row] = resultSetWrapper + }) } else { warn(s"Execute in full collect mode") + jdbcStatement.closeOnCompletion() new ArrayFetchIterator(resultSetWrapper.toArray()) } } else { @@ -89,10 +96,27 @@ class ExecuteStatement( } catch { onError(true) } finally { - if (jdbcStatement != null) { - jdbcStatement.closeOnCompletion() - } shutdownTimeoutMonitor() } } + + override def validateFetchOrientation(order: FetchOrientation): Unit = { + if (incrementalCollect && order != FetchOrientation.FETCH_NEXT) { + throw KyuubiSQLException(s"The fetch type $order is not supported" + + " of incremental collect mode.") + } + super.validateFetchOrientation(order) + } + + override def cleanup(targetState: OperationState): Unit = withLockRequired { + try { + super.cleanup(targetState) + } finally { + if (jdbcStatement != null && !jdbcStatement.isClosed) { + jdbcStatement.close() + jdbcStatement = null + } + } + } + } diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/operation/JdbcOperation.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/operation/JdbcOperation.scala index 2ca17375717..5e5819adb55 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/operation/JdbcOperation.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/operation/JdbcOperation.scala @@ -16,8 +16,6 @@ */ package org.apache.kyuubi.engine.jdbc.operation -import org.apache.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp, TRowSet} - import org.apache.kyuubi.{KyuubiSQLException, Utils} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.engine.jdbc.dialect.{JdbcDialect, JdbcDialects} @@ -25,6 +23,7 @@ import org.apache.kyuubi.engine.jdbc.schema.{Row, Schema} import org.apache.kyuubi.operation.{AbstractOperation, FetchIterator, OperationState} import org.apache.kyuubi.operation.FetchOrientation.{FETCH_FIRST, FETCH_NEXT, FETCH_PRIOR, FetchOrientation} import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp, TRowSet} abstract class JdbcOperation(session: Session) extends AbstractOperation(session) { @@ -36,10 +35,13 @@ abstract class JdbcOperation(session: Session) extends AbstractOperation(session protected lazy val dialect: JdbcDialect = JdbcDialects.get(conf) + def validateFetchOrientation(order: FetchOrientation): Unit = + validateDefaultFetchOrientation(order) + override def getNextRowSetInternal( order: FetchOrientation, rowSetSize: Int): TFetchResultsResp = { - validateDefaultFetchOrientation(order) + validateFetchOrientation(order) assertState(OperationState.FINISHED) setHasResultSet(true) order match { @@ -98,11 +100,8 @@ abstract class JdbcOperation(session: Session) extends AbstractOperation(session override protected def afterRun(): Unit = {} protected def toTRowSet(taken: Iterator[Row]): TRowSet = { - val rowSetHelper = dialect.getRowSetHelper() - rowSetHelper.toTRowSet( - taken.toList.map(_.values), - schema.columns, - getProtocolVersion) + dialect.getTRowSetGenerator() + .toTRowSet(taken.toSeq.map(_.values), schema.columns, getProtocolVersion) } override def getResultSetMetadata: TGetResultSetMetadataResp = { diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/operation/JdbcOperationManager.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/operation/JdbcOperationManager.scala index d10bb34cfb6..7ced3e6b87c 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/operation/JdbcOperationManager.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/operation/JdbcOperationManager.scala @@ -20,7 +20,7 @@ import java.util import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.config.KyuubiConf.OPERATION_INCREMENTAL_COLLECT +import org.apache.kyuubi.config.KyuubiConf.{ENGINE_JDBC_FETCH_SIZE, OPERATION_INCREMENTAL_COLLECT} import org.apache.kyuubi.engine.jdbc.dialect.{JdbcDialect, JdbcDialects} import org.apache.kyuubi.engine.jdbc.session.JdbcSessionImpl import org.apache.kyuubi.engine.jdbc.util.SupportServiceLoader @@ -44,13 +44,16 @@ class JdbcOperationManager(conf: KyuubiConf) extends OperationManager("JdbcOpera val incrementalCollect = normalizedConf.get(OPERATION_INCREMENTAL_COLLECT.key).map( _.toBoolean).getOrElse( session.sessionManager.getConf.get(OPERATION_INCREMENTAL_COLLECT)) + val fetchSize = normalizedConf.get(ENGINE_JDBC_FETCH_SIZE.key).map(_.toInt) + .getOrElse(session.sessionManager.getConf.get(ENGINE_JDBC_FETCH_SIZE)) val executeStatement = new ExecuteStatement( session, statement, runAsync, queryTimeout, - incrementalCollect) + incrementalCollect, + fetchSize) addOperation(executeStatement) } @@ -60,16 +63,26 @@ class JdbcOperationManager(conf: KyuubiConf) extends OperationManager("JdbcOpera } override def newGetCatalogsOperation(session: Session): Operation = { - val operation = dialect.getCatalogsOperation(session) - addOperation(operation) + val query = dialect.getCatalogsOperation() + val normalizedConf = session.asInstanceOf[JdbcSessionImpl].normalizedConf + val fetchSize = normalizedConf.get(ENGINE_JDBC_FETCH_SIZE.key).map(_.toInt) + .getOrElse(session.sessionManager.getConf.get(ENGINE_JDBC_FETCH_SIZE)) + val executeStatement = + new ExecuteStatement(session, query, false, 0L, true, fetchSize) + addOperation(executeStatement) } override def newGetSchemasOperation( session: Session, catalog: String, schema: String): Operation = { - val operation = dialect.getSchemasOperation(session) - addOperation(operation) + val query = dialect.getSchemasOperation(catalog, schema) + val normalizedConf = session.asInstanceOf[JdbcSessionImpl].normalizedConf + val fetchSize = normalizedConf.get(ENGINE_JDBC_FETCH_SIZE.key).map(_.toInt) + .getOrElse(session.sessionManager.getConf.get(ENGINE_JDBC_FETCH_SIZE)) + val executeStatement = + new ExecuteStatement(session, query, false, 0L, true, fetchSize) + addOperation(executeStatement) } override def newGetTablesOperation( @@ -79,8 +92,11 @@ class JdbcOperationManager(conf: KyuubiConf) extends OperationManager("JdbcOpera tableName: String, tableTypes: util.List[String]): Operation = { val query = dialect.getTablesQuery(catalogName, schemaName, tableName, tableTypes) + val normalizedConf = session.asInstanceOf[JdbcSessionImpl].normalizedConf + val fetchSize = normalizedConf.get(ENGINE_JDBC_FETCH_SIZE.key).map(_.toInt) + .getOrElse(session.sessionManager.getConf.get(ENGINE_JDBC_FETCH_SIZE)) val executeStatement = - new ExecuteStatement(session, query, false, 0L, true) + new ExecuteStatement(session, query, false, 0L, true, fetchSize) addOperation(executeStatement) } @@ -96,8 +112,12 @@ class JdbcOperationManager(conf: KyuubiConf) extends OperationManager("JdbcOpera tableName: String, columnName: String): Operation = { val query = dialect.getColumnsQuery(session, catalogName, schemaName, tableName, columnName) + val normalizedConf = session.asInstanceOf[JdbcSessionImpl].normalizedConf + val fetchSize = normalizedConf.get(ENGINE_JDBC_FETCH_SIZE.key).map( + _.toInt).getOrElse( + session.sessionManager.getConf.get(ENGINE_JDBC_FETCH_SIZE)) val executeStatement = - new ExecuteStatement(session, query, false, 0L, true) + new ExecuteStatement(session, query, false, 0L, true, fetchSize) addOperation(executeStatement) } diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/phoenix/PhoenixRowSetHelper.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/phoenix/PhoenixTRowSetGenerator.scala similarity index 85% rename from externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/phoenix/PhoenixRowSetHelper.scala rename to externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/phoenix/PhoenixTRowSetGenerator.scala index 67d9d09e529..f8740fce483 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/phoenix/PhoenixRowSetHelper.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/phoenix/PhoenixTRowSetGenerator.scala @@ -16,6 +16,6 @@ */ package org.apache.kyuubi.engine.jdbc.phoenix -import org.apache.kyuubi.engine.jdbc.schema.RowSetHelper +import org.apache.kyuubi.engine.jdbc.schema.DefaultJdbcTRowSetGenerator -class PhoenixRowSetHelper extends RowSetHelper {} +class PhoenixTRowSetGenerator extends DefaultJdbcTRowSetGenerator {} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/Mysql8ConnectionProvider.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/postgresql/PostgreSQLConnectionProvider.scala similarity index 79% rename from externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/Mysql8ConnectionProvider.scala rename to externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/postgresql/PostgreSQLConnectionProvider.scala index 8dc930e4889..3fb392795d1 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/mysql/Mysql8ConnectionProvider.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/postgresql/PostgreSQLConnectionProvider.scala @@ -14,15 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.kyuubi.engine.jdbc.mysql +package org.apache.kyuubi.engine.jdbc.postgresql import org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider -class Mysql8ConnectionProvider extends JdbcConnectionProvider { +class PostgreSQLConnectionProvider extends JdbcConnectionProvider { - override val name: String = classOf[Mysql8ConnectionProvider].getSimpleName + override val name: String = classOf[PostgreSQLConnectionProvider].getSimpleName - override val driverClass: String = "com.mysql.cj.jdbc.Driver" + override val driverClass: String = "org.postgresql.Driver" override def canHandle(providerClass: String): Boolean = { driverClass.equalsIgnoreCase(providerClass) diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/postgresql/PostgreSQLSchemaHelper.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/postgresql/PostgreSQLSchemaHelper.scala new file mode 100644 index 00000000000..47ad314d33e --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/postgresql/PostgreSQLSchemaHelper.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.postgresql + +import org.apache.kyuubi.engine.jdbc.schema.SchemaHelper +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ + +class PostgreSQLSchemaHelper extends SchemaHelper { + + override def smallIntToTTypeId: TTypeId = TTypeId.INT_TYPE +} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/postgresql/PostgreSQLTRowSetGenerator.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/postgresql/PostgreSQLTRowSetGenerator.scala new file mode 100644 index 00000000000..104b3b15dde --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/postgresql/PostgreSQLTRowSetGenerator.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.postgresql + +import org.apache.kyuubi.engine.jdbc.schema.DefaultJdbcTRowSetGenerator +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TColumn, TColumnValue} + +class PostgreSQLTRowSetGenerator extends DefaultJdbcTRowSetGenerator { + + override def toSmallIntTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = + toIntegerTColumn(rows, ordinal) + + override def toSmallIntTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = + toIntegerTColumnValue(row, ordinal) +} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/schema/DefaultJdbcTRowSetGenerator.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/schema/DefaultJdbcTRowSetGenerator.scala new file mode 100644 index 00000000000..2c9ddd6da3e --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/schema/DefaultJdbcTRowSetGenerator.scala @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.schema + +import java.sql.Date +import java.sql.Types._ +import java.time.LocalDateTime + +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ +import org.apache.kyuubi.util.RowSetUtils.{formatDate, formatLocalDateTime} + +class DefaultJdbcTRowSetGenerator extends JdbcTRowSetGenerator { + + override def toTColumn(rows: Seq[Seq[_]], ordinal: Int, sqlType: Int): TColumn = + sqlType match { + case BIT => toBitTColumn(rows, ordinal) + case TINYINT => toTinyIntTColumn(rows, ordinal) + case SMALLINT => toSmallIntTColumn(rows, ordinal) + case INTEGER => toIntegerTColumn(rows, ordinal) + case BIGINT => toBigIntTColumn(rows, ordinal) + case REAL => toRealTColumn(rows, ordinal) + case DOUBLE => toDoubleTColumn(rows, ordinal) + case CHAR => toCharTColumn(rows, ordinal) + case VARCHAR => toVarcharTColumn(rows, ordinal) + case _ => toDefaultTColumn(rows, ordinal, sqlType) + } + + override def toTColumnValue(row: Seq[_], ordinal: Int, types: Seq[Column]): TColumnValue = { + getColumnType(types, ordinal) match { + case BIT => toBitTColumnValue(row, ordinal) + case TINYINT => toTinyIntTColumnValue(row, ordinal) + case SMALLINT => toSmallIntTColumnValue(row, ordinal) + case INTEGER => toIntegerTColumnValue(row, ordinal) + case BIGINT => toBigIntTColumnValue(row, ordinal) + case REAL => toRealTColumnValue(row, ordinal) + case DOUBLE => toDoubleTColumnValue(row, ordinal) + case CHAR => toCharTColumnValue(row, ordinal) + case VARCHAR => toVarcharTColumnValue(row, ordinal) + case otherType => toDefaultTColumnValue(row, ordinal, otherType) + } + } + + def toDefaultTColumn(rows: Seq[Seq[_]], ordinal: Int, sqlType: Int): TColumn = + asStringTColumn( + rows, + ordinal, + convertFunc = (row, ordinal) => toHiveString(row(ordinal), sqlType)) + + def toBitTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = + asBooleanTColumn(rows, ordinal) + + def toTinyIntTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = + asShortTColumn(rows, ordinal) + + def toSmallIntTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = + asShortTColumn(rows, ordinal) + + def toIntegerTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = + asIntegerTColumn(rows, ordinal) + + def toBigIntTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = + asLongTColumn(rows, ordinal) + + def toRealTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = + asFloatTColumn(rows, ordinal) + + def toDoubleTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = + asDoubleTColumn(rows, ordinal) + + def toCharTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = + asStringTColumn(rows, ordinal) + + def toVarcharTColumn(rows: Seq[Seq[_]], ordinal: Int): TColumn = + asStringTColumn(rows, ordinal) + + // ========================================================== + + def toBitTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = + asBooleanTColumnValue(row, ordinal) + + def toTinyIntTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = + asShortTColumnValue(row, ordinal) + + def toSmallIntTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = + asShortTColumnValue(row, ordinal) + + def toIntegerTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = + asIntegerTColumnValue(row, ordinal) + + def toBigIntTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = + asLongTColumnValue(row, ordinal) + + def toRealTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = + asFloatTColumnValue(row, ordinal) + + def toDoubleTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = + asDoubleTColumnValue(row, ordinal) + + def toCharTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = + asStringTColumnValue(row, ordinal) + + def toVarcharTColumnValue(row: Seq[_], ordinal: Int): TColumnValue = + asStringTColumnValue(row, ordinal) + + def toDefaultTColumnValue(row: Seq[_], ordinal: Int, sqlType: Int): TColumnValue = + asStringTColumnValue(row, ordinal, rawValue => toHiveString(rawValue, sqlType)) + + def toHiveString(data: Any, sqlType: Int): String = + (data, sqlType) match { + case (date: Date, DATE) => formatDate(date) + case (dateTime: LocalDateTime, TIMESTAMP) => formatLocalDateTime(dateTime) + case (decimal: java.math.BigDecimal, DECIMAL) => decimal.toPlainString + case (other, _) => other.toString + } +} diff --git a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/PermanentViewMarker.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/schema/JdbcTRowSetGenerator.scala similarity index 58% rename from extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/PermanentViewMarker.scala rename to externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/schema/JdbcTRowSetGenerator.scala index d19f7a92314..233a6a79946 100644 --- a/extensions/spark/kyuubi-spark-authz/src/main/scala/org/apache/kyuubi/plugin/spark/authz/util/PermanentViewMarker.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/schema/JdbcTRowSetGenerator.scala @@ -14,22 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.kyuubi.engine.jdbc.schema -package org.apache.kyuubi.plugin.spark.authz.util +import org.apache.kyuubi.engine.result.TRowSetGenerator -import org.apache.spark.sql.catalyst.catalog.CatalogTable -import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} +trait JdbcTRowSetGenerator extends TRowSetGenerator[Seq[Column], Seq[_], Int] { + override def getColumnSizeFromSchemaType(schema: Seq[Column]): Int = schema.length -case class PermanentViewMarker( - child: LogicalPlan, - catalogTable: CatalogTable, - visitColNames: Seq[String]) extends UnaryNode - with WithInternalChild { + override def getColumnType(schema: Seq[Column], ordinal: Int): Int = schema(ordinal).sqlType - override def output: Seq[Attribute] = child.output - - override def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = - copy(child = newChild) + override protected def isColumnNullAt(row: Seq[_], ordinal: Int): Boolean = row(ordinal) == null + override protected def getColumnAs[T](row: Seq[_], ordinal: Int): T = row(ordinal).asInstanceOf[T] } diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/schema/RowSetHelper.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/schema/RowSetHelper.scala deleted file mode 100644 index 74b4cec108d..00000000000 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/schema/RowSetHelper.scala +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.kyuubi.engine.jdbc.schema - -import java.{lang, util} -import java.sql.{Date, Types} -import java.time.LocalDateTime - -import scala.collection.JavaConverters._ - -import org.apache.hive.service.rpc.thrift._ - -import org.apache.kyuubi.util.RowSetUtils.{bitSetToBuffer, formatDate, formatLocalDateTime} - -abstract class RowSetHelper { - - def toTRowSet( - rows: Seq[List[_]], - columns: List[Column], - protocolVersion: TProtocolVersion): TRowSet = { - if (protocolVersion.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) { - toRowBasedSet(rows, columns) - } else { - toColumnBasedSet(rows, columns) - } - } - - private def toRowBasedSet(rows: Seq[List[_]], columns: List[Column]): TRowSet = { - val rowSize = rows.length - val tRows = new util.ArrayList[TRow](rowSize) - var i = 0 - while (i < rowSize) { - val row = rows(i) - val tRow = new TRow() - val columnSize = row.size - var j = 0 - while (j < columnSize) { - val columnValue = toTColumnValue(j, row, columns) - tRow.addToColVals(columnValue) - j += 1 - } - tRows.add(tRow) - i += 1 - } - new TRowSet(0, tRows) - } - - private def toColumnBasedSet(rows: Seq[List[_]], columns: List[Column]): TRowSet = { - val size = rows.size - val tRowSet = new TRowSet(0, new java.util.ArrayList[TRow](size)) - val columnSize = columns.length - var i = 0 - while (i < columnSize) { - val field = columns(i) - val tColumn = toTColumn(rows, i, field.sqlType) - tRowSet.addToColumns(tColumn) - i += 1 - } - tRowSet - } - - protected def toTColumn( - rows: Seq[Seq[Any]], - ordinal: Int, - sqlType: Int): TColumn = { - sqlType match { - case Types.BIT => - toBitTColumn(rows, ordinal) - - case Types.TINYINT => - toTinyIntTColumn(rows, ordinal) - - case Types.SMALLINT => - toSmallIntTColumn(rows, ordinal) - - case Types.INTEGER => - toIntegerTColumn(rows, ordinal) - - case Types.BIGINT => - toBigIntTColumn(rows, ordinal) - - case Types.REAL => - toRealTColumn(rows, ordinal) - - case Types.DOUBLE => - toDoubleTColumn(rows, ordinal) - - case Types.CHAR => - toCharTColumn(rows, ordinal) - - case Types.VARCHAR => - toVarcharTColumn(rows, ordinal) - - case _ => - toDefaultTColumn(rows, ordinal, sqlType) - } - } - - protected def toTColumnValue(ordinal: Int, row: List[Any], types: List[Column]): TColumnValue = { - types(ordinal).sqlType match { - case Types.BIT => - toBitTColumnValue(row, ordinal) - - case Types.TINYINT => - toTinyIntTColumnValue(row, ordinal) - - case Types.SMALLINT => - toSmallIntTColumnValue(row, ordinal) - - case Types.INTEGER => - toIntegerTColumnValue(row, ordinal) - - case Types.BIGINT => - toBigIntTColumnValue(row, ordinal) - - case Types.REAL => - toRealTColumnValue(row, ordinal) - - case Types.DOUBLE => - toDoubleTColumnValue(row, ordinal) - - case Types.CHAR => - toCharTColumnValue(row, ordinal) - - case Types.VARCHAR => - toVarcharTColumnValue(row, ordinal) - - case _ => - toDefaultTColumnValue(row, ordinal, types) - } - } - - protected def getOrSetAsNull[T]( - rows: Seq[Seq[Any]], - ordinal: Int, - nulls: java.util.BitSet, - defaultVal: T): java.util.List[T] = { - val size = rows.length - val ret = new java.util.ArrayList[T](size) - var idx = 0 - while (idx < size) { - val row = rows(idx) - val isNull = row(ordinal) == null - if (isNull) { - nulls.set(idx, true) - ret.add(idx, defaultVal) - } else { - ret.add(idx, row(ordinal).asInstanceOf[T]) - } - idx += 1 - } - ret - } - - protected def toDefaultTColumn(rows: Seq[Seq[Any]], ordinal: Int, sqlType: Int): TColumn = { - val nulls = new java.util.BitSet() - val rowSize = rows.length - val values = new util.ArrayList[String](rowSize) - var i = 0 - while (i < rowSize) { - val row = rows(i) - nulls.set(i, row(ordinal) == null) - val value = - if (row(ordinal) == null) { - "" - } else { - toHiveString(row(ordinal), sqlType) - } - values.add(value) - i += 1 - } - TColumn.stringVal(new TStringColumn(values, nulls)) - } - - protected def toBitTColumn(rows: Seq[Seq[Any]], ordinal: Int): TColumn = { - val nulls = new java.util.BitSet() - val values = getOrSetAsNull[java.lang.Boolean](rows, ordinal, nulls, true) - TColumn.boolVal(new TBoolColumn(values, nulls)) - } - - protected def toTinyIntTColumn(rows: Seq[Seq[Any]], ordinal: Int): TColumn = { - val nulls = new java.util.BitSet() - val values = getOrSetAsNull[java.lang.Byte](rows, ordinal, nulls, 0.toByte) - TColumn.byteVal(new TByteColumn(values, nulls)) - } - - protected def toSmallIntTColumn(rows: Seq[Seq[Any]], ordinal: Int): TColumn = { - val nulls = new java.util.BitSet() - val values = getOrSetAsNull[java.lang.Short](rows, ordinal, nulls, 0.toShort) - TColumn.i16Val(new TI16Column(values, nulls)) - } - - protected def toIntegerTColumn(rows: Seq[Seq[Any]], ordinal: Int): TColumn = { - val nulls = new java.util.BitSet() - val values = getOrSetAsNull[java.lang.Integer](rows, ordinal, nulls, 0) - TColumn.i32Val(new TI32Column(values, nulls)) - } - - protected def toBigIntTColumn(rows: Seq[Seq[Any]], ordinal: Int): TColumn = { - val nulls = new java.util.BitSet() - val values = getOrSetAsNull[lang.Long](rows, ordinal, nulls, 0L) - TColumn.i64Val(new TI64Column(values, nulls)) - } - - protected def toRealTColumn(rows: Seq[Seq[Any]], ordinal: Int): TColumn = { - val nulls = new java.util.BitSet() - val values = getOrSetAsNull[lang.Float](rows, ordinal, nulls, 0.toFloat) - .asScala.map(n => java.lang.Double.valueOf(n.toString)).asJava - TColumn.doubleVal(new TDoubleColumn(values, nulls)) - } - - protected def toDoubleTColumn(rows: Seq[Seq[Any]], ordinal: Int): TColumn = { - val nulls = new java.util.BitSet() - val values = getOrSetAsNull[lang.Double](rows, ordinal, nulls, 0.toDouble) - TColumn.doubleVal(new TDoubleColumn(values, nulls)) - } - - protected def toCharTColumn(rows: Seq[Seq[Any]], ordinal: Int): TColumn = { - toVarcharTColumn(rows, ordinal) - } - - protected def toVarcharTColumn(rows: Seq[Seq[Any]], ordinal: Int): TColumn = { - val nulls = new java.util.BitSet() - val values = getOrSetAsNull[String](rows, ordinal, nulls, "") - TColumn.stringVal(new TStringColumn(values, nulls)) - } - - // ========================================================== - - protected def toBitTColumnValue(row: List[Any], ordinal: Int): TColumnValue = { - val boolValue = new TBoolValue - if (row(ordinal) != null) boolValue.setValue(row(ordinal).asInstanceOf[Boolean]) - TColumnValue.boolVal(boolValue) - } - - protected def toTinyIntTColumnValue(row: List[Any], ordinal: Int): TColumnValue = { - val byteValue = new TByteValue - if (row(ordinal) != null) byteValue.setValue(row(ordinal).asInstanceOf[Byte]) - TColumnValue.byteVal(byteValue) - } - - protected def toSmallIntTColumnValue(row: List[Any], ordinal: Int): TColumnValue = { - val tI16Value = new TI16Value - if (row(ordinal) != null) tI16Value.setValue(row(ordinal).asInstanceOf[Short]) - TColumnValue.i16Val(tI16Value) - } - - protected def toIntegerTColumnValue(row: List[Any], ordinal: Int): TColumnValue = { - val tI32Value = new TI32Value - if (row(ordinal) != null) tI32Value.setValue(row(ordinal).asInstanceOf[Int]) - TColumnValue.i32Val(tI32Value) - } - - protected def toBigIntTColumnValue(row: List[Any], ordinal: Int): TColumnValue = { - val tI64Value = new TI64Value - if (row(ordinal) != null) tI64Value.setValue(row(ordinal).asInstanceOf[Long]) - TColumnValue.i64Val(tI64Value) - } - - protected def toRealTColumnValue(row: List[Any], ordinal: Int): TColumnValue = { - val tDoubleValue = new TDoubleValue - if (row(ordinal) != null) { - val doubleValue = java.lang.Double.valueOf(row(ordinal).asInstanceOf[Float].toString) - tDoubleValue.setValue(doubleValue) - } - TColumnValue.doubleVal(tDoubleValue) - } - - protected def toDoubleTColumnValue(row: List[Any], ordinal: Int): TColumnValue = { - val tDoubleValue = new TDoubleValue - if (row(ordinal) != null) tDoubleValue.setValue(row(ordinal).asInstanceOf[Double]) - TColumnValue.doubleVal(tDoubleValue) - } - - protected def toCharTColumnValue(row: List[Any], ordinal: Int): TColumnValue = { - toVarcharTColumnValue(row, ordinal) - } - - protected def toVarcharTColumnValue(row: List[Any], ordinal: Int): TColumnValue = { - val tStringValue = new TStringValue - if (row(ordinal) != null) tStringValue.setValue(row(ordinal).asInstanceOf[String]) - TColumnValue.stringVal(tStringValue) - } - - protected def toDefaultTColumnValue( - row: List[Any], - ordinal: Int, - types: List[Column]): TColumnValue = { - val tStrValue = new TStringValue - if (row(ordinal) != null) { - tStrValue.setValue( - toHiveString(row(ordinal), types(ordinal).sqlType)) - } - TColumnValue.stringVal(tStrValue) - } - - protected def toHiveString(data: Any, sqlType: Int): String = { - (data, sqlType) match { - case (date: Date, Types.DATE) => - formatDate(date) - case (dateTime: LocalDateTime, Types.TIMESTAMP) => - formatLocalDateTime(dateTime) - case (decimal: java.math.BigDecimal, Types.DECIMAL) => - decimal.toPlainString - case (other, _) => - other.toString - } - } -} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/schema/SchemaHelper.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/schema/SchemaHelper.scala index 455eb2a9224..6b39bb3dbe4 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/schema/SchemaHelper.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/schema/SchemaHelper.scala @@ -21,7 +21,7 @@ import java.util.Collections import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ abstract class SchemaHelper { diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionImpl.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionImpl.scala index 8b36e5a56df..09d08d2c896 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionImpl.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionImpl.scala @@ -20,8 +20,6 @@ import java.sql.{Connection, DatabaseMetaData} import scala.util.{Failure, Success, Try} -import org.apache.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} - import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ @@ -29,6 +27,7 @@ import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.jdbc.connection.ConnectionProvider import org.apache.kyuubi.engine.jdbc.util.KyuubiJdbcUtils import org.apache.kyuubi.session.{AbstractSession, SessionHandle, SessionManager} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} class JdbcSessionImpl( protocol: TProtocolVersion, diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionManager.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionManager.scala index 09958e0507f..513e61303fd 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionManager.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/session/JdbcSessionManager.scala @@ -16,8 +16,6 @@ */ package org.apache.kyuubi.engine.jdbc.session -import org.apache.hive.service.rpc.thrift.TProtocolVersion - import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.ENGINE_SHARE_LEVEL import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY @@ -26,6 +24,7 @@ import org.apache.kyuubi.engine.jdbc.JdbcSQLEngine import org.apache.kyuubi.engine.jdbc.operation.JdbcOperationManager import org.apache.kyuubi.operation.OperationManager import org.apache.kyuubi.session.{Session, SessionHandle, SessionManager} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion class JdbcSessionManager(name: String) extends SessionManager(name) { diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksConnectionProvider.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksConnectionProvider.scala new file mode 100644 index 00000000000..09b7efb3ff5 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksConnectionProvider.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.starrocks + +import org.apache.kyuubi.engine.jdbc.mysql.MySQL8ConnectionProvider + +class StarRocksConnectionProvider extends MySQL8ConnectionProvider { + + override val name: String = classOf[StarRocksConnectionProvider].getSimpleName +} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksSchemaHelper.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksSchemaHelper.scala new file mode 100644 index 00000000000..e6b4e152140 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksSchemaHelper.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.starrocks + +import org.apache.kyuubi.engine.jdbc.mysql.MySQLSchemaHelper + +class StarRocksSchemaHelper extends MySQLSchemaHelper {} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksTRowSetGenerator.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksTRowSetGenerator.scala new file mode 100644 index 00000000000..736ce766461 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksTRowSetGenerator.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.starrocks + +import org.apache.kyuubi.engine.jdbc.mysql.MySQLTRowSetGenerator + +class StarRocksTRowSetGenerator extends MySQLTRowSetGenerator {} diff --git a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/util/ResultSetWrapper.scala b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/util/ResultSetWrapper.scala index 8bc7027f19b..0fead73b1a6 100644 --- a/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/util/ResultSetWrapper.scala +++ b/externals/kyuubi-jdbc-engine/src/main/scala/org/apache/kyuubi/engine/jdbc/util/ResultSetWrapper.scala @@ -30,6 +30,7 @@ class ResultSetWrapper(statement: Statement) private lazy val metadata = currentResult.getMetaData override def hasNext: Boolean = { + if (currentResult == null) return false val result = currentResult.next() if (!result) { val hasMoreResults = statement.getMoreResults(Statement.CLOSE_CURRENT_RESULT) @@ -37,6 +38,7 @@ class ResultSetWrapper(statement: Statement) currentResult = statement.getResultSet currentResult.next() } else { + currentResult = null false } } else { diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/CheckJdbcDialectSPISuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/CheckJdbcDialectSPISuite.scala new file mode 100644 index 00000000000..e30ebce5d73 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/CheckJdbcDialectSPISuite.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.jdbc + +import java.nio.file.Paths + +// scalastyle:off +import org.scalatest.funsuite.AnyFunSuite + +import org.apache.kyuubi.util.AssertionUtils._ +import org.apache.kyuubi.util.GoldenFileUtils._ + +class CheckJdbcDialectSPISuite extends AnyFunSuite { + // scalastyle:on + + test("check JDBC dialect SPI service file sorted") { + Seq( + "org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider", + "org.apache.kyuubi.engine.jdbc.dialect.JdbcDialect") + .foreach { fileName => + val filePath = Paths.get( + s"${getCurrentModuleHome(this)}/src/main/resources/META-INF/services/$fileName") + assertFileContentSorted(filePath) + } + } +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/WithJdbcServerContainer.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/WithJdbcServerContainer.scala index 18c2316c1bc..89b5534be58 100644 --- a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/WithJdbcServerContainer.scala +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/WithJdbcServerContainer.scala @@ -16,8 +16,8 @@ */ package org.apache.kyuubi.engine.jdbc -import com.dimafeng.testcontainers.ForAllTestContainer +import com.dimafeng.testcontainers.scalatest.TestContainerForAll import org.apache.kyuubi.KyuubiFunSuite -trait WithJdbcServerContainer extends KyuubiFunSuite with ForAllTestContainer {} +trait WithJdbcServerContainer extends KyuubiFunSuite with TestContainerForAll {} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/OperationWithEngineSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/OperationWithEngineSuite.scala index d5e3f4f0fcc..31ca4dee737 100644 --- a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/OperationWithEngineSuite.scala +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/OperationWithEngineSuite.scala @@ -16,17 +16,16 @@ */ package org.apache.kyuubi.engine.jdbc.doris -import org.apache.hive.service.rpc.thrift.{TExecuteStatementReq, TFetchResultsReq, TGetInfoReq, TGetInfoType, TStatusCode} - import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.engine.jdbc.connection.ConnectionProvider import org.apache.kyuubi.operation.HiveJDBCTestHelper +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TExecuteStatementReq, TFetchResultsReq, TGetInfoReq, TGetInfoType, TStatusCode} class OperationWithEngineSuite extends DorisOperationSuite with HiveJDBCTestHelper { override protected def jdbcUrl: String = jdbcConnectionUrl - test("Test for Jdbc engine getInfo") { + test("doris - test for Jdbc engine getInfo") { val metaData = ConnectionProvider.create(kyuubiConf).getMetaData withSessionConf(Map(KyuubiConf.SERVER_INFO_PROVIDER.key -> "ENGINE"))()() { @@ -60,7 +59,7 @@ class OperationWithEngineSuite extends DorisOperationSuite with HiveJDBCTestHelp } } - test("JDBC ExecuteStatement operation should contain operationLog") { + test("doris - JDBC ExecuteStatement operation should contain operationLog") { withSessionHandle { (client, handle) => val tExecuteStatementReq = new TExecuteStatementReq() tExecuteStatementReq.setSessionHandle(handle) diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/SessionSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/SessionSuite.scala index a8204105f7e..b5af0829a60 100644 --- a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/SessionSuite.scala +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/SessionSuite.scala @@ -20,7 +20,7 @@ import org.apache.kyuubi.operation.HiveJDBCTestHelper class SessionSuite extends WithDorisEngine with HiveJDBCTestHelper { - test("test session") { + test("doris - test session") { withJdbcStatement() { statement => val resultSet = statement.executeQuery( "select '1' as id") diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/StatementSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/StatementSuite.scala index 663c0da3abb..b27ad880b34 100644 --- a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/StatementSuite.scala +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/StatementSuite.scala @@ -22,7 +22,7 @@ import org.apache.kyuubi.operation.HiveJDBCTestHelper class StatementSuite extends WithDorisEngine with HiveJDBCTestHelper { - test("test select") { + test("doris - test select") { withJdbcStatement("test1") { statement => statement.execute("create database if not exists db1") statement.execute("use db1") @@ -44,7 +44,7 @@ class StatementSuite extends WithDorisEngine with HiveJDBCTestHelper { } } - test("test types") { + test("doris - test types") { withJdbcStatement("test1") { statement => statement.execute("create database if not exists db1") statement.execute("use db1") diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/WithDorisContainer.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/WithDorisContainer.scala index 8092e329941..c37478e9989 100644 --- a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/WithDorisContainer.scala +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/WithDorisContainer.scala @@ -27,15 +27,13 @@ import org.apache.kyuubi.engine.jdbc.WithJdbcServerContainer trait WithDorisContainer extends WithJdbcServerContainer { - private val DORIS_FE_PORT = 9030 - - private val DORIS_BE_PORT = 8040 + private val DORIS_FE_MYSQL_PORT = 9030 + private val DORIS_BE_HTTTP_PORT = 8040 private val DORIS_FE_SERVICE_NAME = "doris-fe" - private val DORIS_BE_SERVICE_NAME = "doris-be" - override val container: DockerComposeContainer = + override val containerDef: DockerComposeContainer.Def = DockerComposeContainer .Def( composeFiles = new File(Utils.getContextOrKyuubiClassLoader @@ -43,25 +41,18 @@ trait WithDorisContainer extends WithJdbcServerContainer { exposedServices = Seq[ExposedService]( ExposedService( DORIS_FE_SERVICE_NAME, - DORIS_FE_PORT, + DORIS_FE_MYSQL_PORT, waitStrategy = new DockerHealthcheckWaitStrategy().withStartupTimeout(Duration.ofMinutes(5))), ExposedService( DORIS_BE_SERVICE_NAME, - DORIS_BE_PORT, + DORIS_BE_HTTTP_PORT, waitStrategy = new DockerHealthcheckWaitStrategy().withStartupTimeout(Duration.ofMinutes(5))))) - .createContainer() - - protected def feUrl: String = { - val feHost: String = container.getServiceHost(DORIS_FE_SERVICE_NAME, DORIS_FE_PORT) - val fePort: Int = container.getServicePort(DORIS_FE_SERVICE_NAME, DORIS_FE_PORT) - val url = s"$feHost:$fePort" - url - } - override def afterAll(): Unit = { - super.afterAll() - container.close() + protected def feJdbcUrl: String = withContainers { container => + val feHost: String = container.getServiceHost(DORIS_FE_SERVICE_NAME, DORIS_FE_MYSQL_PORT) + val fePort: Int = container.getServicePort(DORIS_FE_SERVICE_NAME, DORIS_FE_MYSQL_PORT) + s"jdbc:mysql://$feHost:$fePort" } } diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/WithDorisEngine.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/WithDorisEngine.scala index 9945fb64047..692f37b9515 100644 --- a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/WithDorisEngine.scala +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/doris/WithDorisEngine.scala @@ -23,7 +23,7 @@ trait WithDorisEngine extends WithJdbcEngine with WithDorisContainer { override def withKyuubiConf: Map[String, String] = Map( ENGINE_SHARE_LEVEL.key -> "SERVER", - ENGINE_JDBC_CONNECTION_URL.key -> s"jdbc:mysql://$feUrl", + ENGINE_JDBC_CONNECTION_URL.key -> feJdbcUrl, ENGINE_JDBC_CONNECTION_USER.key -> "root", ENGINE_JDBC_CONNECTION_PASSWORD.key -> "", ENGINE_TYPE.key -> "jdbc", diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQLOperationSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQLOperationSuite.scala new file mode 100644 index 00000000000..ffd7c0a0fe8 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/MySQLOperationSuite.scala @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.mysql + +import java.sql.ResultSet + +import scala.collection.mutable.ArrayBuffer + +import org.apache.kyuubi.operation.HiveJDBCTestHelper +import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ + +abstract class MySQLOperationSuite extends WithMySQLEngine with HiveJDBCTestHelper { + test("mysql - get tables") { + case class Table(catalog: String, schema: String, tableName: String, tableType: String) + + withJdbcStatement() { statement => + val meta = statement.getConnection.getMetaData + val resultBuffer = ArrayBuffer[Table]() + + var tables = meta.getTables(null, null, null, null) + while (tables.next()) { + resultBuffer += + Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table("def", "information_schema", "TABLES", "SYSTEM VIEW"))) + assert(resultBuffer.contains(Table("def", "information_schema", "VIEWS", "SYSTEM VIEW"))) + resultBuffer.clear() + + statement.execute("create database if not exists db1") + statement.execute("create table db1.test1(id bigint)" + + "ENGINE=InnoDB DEFAULT CHARSET=utf8;") + statement.execute("create table db1.test2(id bigint)" + + "ENGINE=InnoDB DEFAULT CHARSET=utf8;") + + statement.execute("create database if not exists db2") + statement.execute("create table db2.test1(id bigint)" + + "ENGINE=InnoDB DEFAULT CHARSET=utf8;") + statement.execute("create table db2.test2(id bigint)" + + "ENGINE=InnoDB DEFAULT CHARSET=utf8;") + + statement.execute("create view db1.view1 (k1) as select id from db1.test1") + + tables = meta.getTables(null, "db1", "test1", Array("BASE TABLE")) + while (tables.next()) { + val table = Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + assert(table == Table("def", "db1", "test1", "BASE TABLE")) + } + + tables = meta.getTables("def", "db1", null, null) + while (tables.next()) { + resultBuffer += Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table("def", "db1", "test2", "BASE TABLE"))) + resultBuffer.clear() + + tables = meta.getTables(null, null, "test1", null) + while (tables.next()) { + resultBuffer += Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table("def", "db1", "test1", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db2", "test1", "BASE TABLE"))) + resultBuffer.clear() + + tables = meta.getTables(null, "db%", "test1", null) + while (tables.next()) { + resultBuffer += Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table("def", "db1", "test1", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db2", "test1", "BASE TABLE"))) + resultBuffer.clear() + + tables = meta.getTables(null, "db2", "test%", null) + while (tables.next()) { + resultBuffer += Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table("def", "db2", "test1", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db2", "test2", "BASE TABLE"))) + resultBuffer.clear() + + tables = meta.getTables(null, "fake_db", "test1", null) + assert(!tables.next()) + + tables = meta.getTables(null, "db1", null, Array("VIEW")) + while (tables.next()) { + val table = Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + assert(table == Table("def", "db1", "view1", "VIEW")) + } + + tables = meta.getTables(null, null, null, Array("VIEW", "BASE TABLE")) + while (tables.next()) { + resultBuffer += Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table("def", "db1", "test1", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db1", "test2", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db2", "test1", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db2", "test2", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db1", "view1", "VIEW"))) + resultBuffer.clear() + + statement.execute("drop view db1.view1") + statement.execute("drop table db1.test1") + statement.execute("drop table db1.test2") + statement.execute("drop table db2.test1") + statement.execute("drop table db2.test2") + statement.execute("drop database db1") + statement.execute("drop database db2") + } + } + + test("mysql - get columns") { + case class Column(tableSchema: String, tableName: String, columnName: String) + + def buildColumn(resultSet: ResultSet): Column = { + val schema = resultSet.getString(TABLE_SCHEMA) + val tableName = resultSet.getString(TABLE_NAME) + val columnName = resultSet.getString(COLUMN_NAME) + val column = Column(schema, tableName, columnName) + column + } + + withJdbcStatement() { statement => + val metadata = statement.getConnection.getMetaData + statement.execute("create database if not exists db1") + statement.execute("create table if not exists db1.test1" + + "(id bigint, str1 varchar(255), str2 varchar(255), age int)" + + "ENGINE=InnoDB DEFAULT CHARSET=utf8;") + statement.execute("create table if not exists db1.test2" + + "(id bigint, str1 varchar(255), str2 varchar(255), age int)" + + "ENGINE=InnoDB DEFAULT CHARSET=utf8;") + + statement.execute("create database if not exists db2") + + statement.execute("create table if not exists db2.test1" + + "(id bigint, str1 varchar(255), str2 varchar(255), age int)" + + "ENGINE=InnoDB DEFAULT CHARSET=utf8;") + + val resultBuffer = ArrayBuffer[Column]() + val resultSet1 = metadata.getColumns(null, "db1", null, null) + while (resultSet1.next()) { + val column = buildColumn(resultSet1) + resultBuffer += column + } + + assert(resultBuffer.contains(Column("db1", "test1", "id"))) + assert(resultBuffer.contains(Column("db1", "test1", "str1"))) + assert(resultBuffer.contains(Column("db1", "test1", "str2"))) + assert(resultBuffer.contains(Column("db1", "test1", "age"))) + + assert(resultBuffer.contains(Column("db1", "test2", "id"))) + assert(resultBuffer.contains(Column("db1", "test2", "str1"))) + assert(resultBuffer.contains(Column("db1", "test2", "str2"))) + assert(resultBuffer.contains(Column("db1", "test2", "age"))) + + resultBuffer.clear() + + val resultSet2 = metadata.getColumns(null, null, "test1", null) + while (resultSet2.next()) { + val column = buildColumn(resultSet2) + resultBuffer += column + } + + assert(resultBuffer.contains(Column("db1", "test1", "id"))) + assert(resultBuffer.contains(Column("db1", "test1", "str1"))) + assert(resultBuffer.contains(Column("db1", "test1", "str2"))) + assert(resultBuffer.contains(Column("db1", "test1", "age"))) + + assert(resultBuffer.contains(Column("db2", "test1", "id"))) + assert(resultBuffer.contains(Column("db2", "test1", "str1"))) + assert(resultBuffer.contains(Column("db2", "test1", "str2"))) + assert(resultBuffer.contains(Column("db2", "test1", "age"))) + + resultBuffer.clear() + + val resultSet3 = metadata.getColumns(null, null, null, "age") + while (resultSet3.next()) { + val column = buildColumn(resultSet3) + resultBuffer += column + } + + assert(resultBuffer.contains(Column("db1", "test1", "age"))) + assert(resultBuffer.contains(Column("db1", "test2", "age"))) + assert(resultBuffer.contains(Column("db2", "test1", "age"))) + + resultBuffer.clear() + + val resultSet4 = metadata.getColumns(null, "d%1", "t%1", "str%") + while (resultSet4.next()) { + val column = buildColumn(resultSet4) + resultBuffer += column + } + + assert(resultBuffer.contains(Column("db1", "test1", "str1"))) + assert(resultBuffer.contains(Column("db1", "test1", "str2"))) + + resultBuffer.clear() + + val resultSet5 = metadata.getColumns(null, "d%1", "t%1", "fake") + assert(!resultSet5.next()) + + statement.execute("drop table db1.test1") + statement.execute("drop table db1.test2") + statement.execute("drop database db1") + statement.execute("drop table db2.test1") + statement.execute("drop database db2") + } + } +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/OperationWithEngineSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/OperationWithEngineSuite.scala new file mode 100644 index 00000000000..b8264c06992 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/OperationWithEngineSuite.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.mysql + +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.engine.jdbc.connection.ConnectionProvider +import org.apache.kyuubi.operation.HiveJDBCTestHelper +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ + +class OperationWithEngineSuite extends MySQLOperationSuite with HiveJDBCTestHelper { + + override protected def jdbcUrl: String = jdbcConnectionUrl + + test("Test for Jdbc engine getInfo") { + val metaData = ConnectionProvider.create(kyuubiConf).getMetaData + + withSessionConf(Map(KyuubiConf.SERVER_INFO_PROVIDER.key -> "ENGINE"))()() { + withSessionHandle { (client, handle) => + val req = new TGetInfoReq() + req.setSessionHandle(handle) + req.setInfoType(TGetInfoType.CLI_DBMS_NAME) + assert(client.GetInfo(req).getInfoValue.getStringValue == metaData.getDatabaseProductName) + + val req2 = new TGetInfoReq() + req2.setSessionHandle(handle) + req2.setInfoType(TGetInfoType.CLI_DBMS_VER) + assert( + client.GetInfo(req2).getInfoValue.getStringValue == metaData.getDatabaseProductVersion) + + val req3 = new TGetInfoReq() + req3.setSessionHandle(handle) + req3.setInfoType(TGetInfoType.CLI_MAX_COLUMN_NAME_LEN) + assert(client.GetInfo(req3).getInfoValue.getLenValue == metaData.getMaxColumnNameLength) + + val req4 = new TGetInfoReq() + req4.setSessionHandle(handle) + req4.setInfoType(TGetInfoType.CLI_MAX_SCHEMA_NAME_LEN) + assert(client.GetInfo(req4).getInfoValue.getLenValue == metaData.getMaxSchemaNameLength) + + val req5 = new TGetInfoReq() + req5.setSessionHandle(handle) + req5.setInfoType(TGetInfoType.CLI_MAX_TABLE_NAME_LEN) + assert(client.GetInfo(req5).getInfoValue.getLenValue == metaData.getMaxTableNameLength) + } + } + } + + test("JDBC ExecuteStatement operation should contain operationLog") { + withSessionHandle { (client, handle) => + val tExecuteStatementReq = new TExecuteStatementReq() + tExecuteStatementReq.setSessionHandle(handle) + tExecuteStatementReq.setStatement("SELECT 1") + val tExecuteStatementResp = client.ExecuteStatement(tExecuteStatementReq) + + val tFetchResultsReq = new TFetchResultsReq() + tFetchResultsReq.setOperationHandle(tExecuteStatementResp.getOperationHandle) + tFetchResultsReq.setFetchType(1) + tFetchResultsReq.setMaxRows(1) + + val tFetchResultsResp = client.FetchResults(tFetchResultsReq) + assert(tFetchResultsResp.getStatus.getStatusCode === TStatusCode.SUCCESS_STATUS) + } + } +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/SessionSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/SessionSuite.scala new file mode 100644 index 00000000000..65107603d77 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/SessionSuite.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.mysql + +import org.apache.kyuubi.operation.HiveJDBCTestHelper + +class SessionSuite extends WithMySQLEngine with HiveJDBCTestHelper { + + test("test session") { + withJdbcStatement() { statement => + val resultSet = statement.executeQuery( + "select '1' as id") + val metadata = resultSet.getMetaData + for (i <- 1 to metadata.getColumnCount) { + assert(metadata.getColumnName(i) == "id") + } + while (resultSet.next()) { + val id = resultSet.getObject(1) + assert(id == "1") + } + } + } + + override protected def jdbcUrl: String = jdbcConnectionUrl +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/StatementSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/StatementSuite.scala new file mode 100644 index 00000000000..56ae737fc80 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/StatementSuite.scala @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.mysql + +import java.sql.{Date, Timestamp} + +import org.apache.kyuubi.operation.HiveJDBCTestHelper + +class StatementSuite extends WithMySQLEngine with HiveJDBCTestHelper { + + test("test select") { + withJdbcStatement("test1") { statement => + statement.execute("create database if not exists db1") + statement.execute("use db1") + statement.execute("create table db1.test1(id bigint, name varchar(255), age int, " + + "PRIMARY KEY ( `id` ))" + + "ENGINE=InnoDB " + + "DEFAULT CHARSET=utf8;") + statement.execute("insert into db1.test1 values(1, 'a', 11)") + + val resultSet1 = statement.executeQuery("select * from db1.test1") + while (resultSet1.next()) { + val id = resultSet1.getObject(1) + assert(id == 1) + val name = resultSet1.getObject(2) + assert(name == "a") + val age = resultSet1.getObject(3) + assert(age == 11) + } + } + } + + test("test types") { + withJdbcStatement("test1") { statement => + statement.execute("create database if not exists db1") + statement.execute("use db1") + statement.execute("create table db1.type_test(" + + "id bigint, " + + "tiny_col tinyint, smallint_col smallint, " + + "int_col int, bigint_col bigint, " + + "decimal_col decimal(27, 9)," + + "date_col date, datetime_col datetime, timestamp_col timestamp," + + "char_col char, varchar_col varchar(255), " + + "boolean_col boolean, " + + "double_col double, float_col float," + + "PRIMARY KEY ( `id` )) " + + "ENGINE=InnoDB " + + "DEFAULT CHARSET=utf8") + statement.execute("insert into db1.type_test" + + "(id, " + + "tiny_col, smallint_col, int_col, bigint_col, " + + "decimal_col, " + + "date_col, datetime_col, timestamp_col," + + "char_col, varchar_col, " + + "boolean_col, " + + "double_col, float_col) " + + "VALUES (1, 2, 3, 4, 5, 6.6, '2023-10-23', '2023-10-23 15:31:45', " + + "'2023-10-23 15:31:45', 'a', 'Hello', true, 7.7, 8.8)") + + val resultSet1 = statement.executeQuery("select * from db1.type_test") + while (resultSet1.next()) { + assert(resultSet1.getObject(1) == 1) + assert(resultSet1.getObject(2) == 2) + assert(resultSet1.getObject(3) == 3) + assert(resultSet1.getObject(4) == 4) + assert(resultSet1.getObject(5) == 5) + assert(resultSet1.getObject(6) == new java.math.BigDecimal("6.600000000")) + assert(resultSet1.getObject(7) == Date.valueOf("2023-10-23")) + assert(resultSet1.getObject(8) == Timestamp.valueOf("2023-10-23 15:31:45")) + assert(resultSet1.getObject(9) == Timestamp.valueOf("2023-10-23 15:31:45")) + assert(resultSet1.getObject(10) == "a") + assert(resultSet1.getObject(11) == "Hello") + assert(resultSet1.getObject(12) == true) + assert(resultSet1.getObject(13) == 7.7) + assert(resultSet1.getObject(14) == 8.8) + } + } + } + + override protected def jdbcUrl: String = jdbcConnectionUrl +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/WithMySQLEngine.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/WithMySQLEngine.scala new file mode 100644 index 00000000000..39d2e0a59ec --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/mysql/WithMySQLEngine.scala @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.mysql + +import com.dimafeng.testcontainers.MySQLContainer +import com.dimafeng.testcontainers.scalatest.TestContainerForAll +import org.testcontainers.utility.DockerImageName + +import org.apache.kyuubi.config.KyuubiConf._ +import org.apache.kyuubi.engine.jdbc.WithJdbcEngine + +trait WithMySQLEngine extends WithJdbcEngine with TestContainerForAll { + + private val mysqlDockerImage = "mysql:8.0.32" + + override val containerDef: MySQLContainer.Def = MySQLContainer.Def( + dockerImageName = DockerImageName.parse(mysqlDockerImage), + username = "root", + password = "kyuubi") + + override def withKyuubiConf: Map[String, String] = withContainers { mysqlContainer => + Map( + ENGINE_SHARE_LEVEL.key -> "SERVER", + ENGINE_JDBC_CONNECTION_URL.key -> mysqlContainer.jdbcUrl, + ENGINE_JDBC_CONNECTION_USER.key -> mysqlContainer.username, + ENGINE_JDBC_CONNECTION_PASSWORD.key -> mysqlContainer.password, + ENGINE_TYPE.key -> "jdbc", + ENGINE_JDBC_SHORT_NAME.key -> "mysql", + ENGINE_JDBC_DRIVER_CLASS.key -> "com.mysql.cj.jdbc.Driver") + } +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/OperationWithPhoenixEngineSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/OperationWithPhoenixEngineSuite.scala index 812efe3ee54..0aacc31549d 100644 --- a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/OperationWithPhoenixEngineSuite.scala +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/OperationWithPhoenixEngineSuite.scala @@ -16,17 +16,16 @@ */ package org.apache.kyuubi.engine.jdbc.phoenix -import org.apache.hive.service.rpc.thrift.{TGetInfoReq, TGetInfoType} - import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.engine.jdbc.connection.ConnectionProvider import org.apache.kyuubi.operation.HiveJDBCTestHelper +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TGetInfoReq, TGetInfoType} class OperationWithPhoenixEngineSuite extends PhoenixOperationSuite with HiveJDBCTestHelper { override protected def jdbcUrl: String = jdbcConnectionUrl - test("Test for Jdbc engine getInfo") { + test("phoenix - test for Jdbc engine getInfo") { val metaData = ConnectionProvider.create(kyuubiConf).getMetaData withSessionConf(Map(KyuubiConf.SERVER_INFO_PROVIDER.key -> "ENGINE"))()() { diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/SessionSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/SessionSuite.scala index e61d0916e5a..5072741ae27 100644 --- a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/SessionSuite.scala +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/SessionSuite.scala @@ -20,7 +20,7 @@ import org.apache.kyuubi.operation.HiveJDBCTestHelper class SessionSuite extends WithPhoenixEngine with HiveJDBCTestHelper { - test("test session") { + test("phoenix - test session") { withJdbcStatement() { statement => val resultSet = statement.executeQuery( "select '1' as id") diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/StatementSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/StatementSuite.scala index d7e7ebb9b64..21fa1202200 100644 --- a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/StatementSuite.scala +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/StatementSuite.scala @@ -22,7 +22,7 @@ import org.apache.kyuubi.operation.HiveJDBCTestHelper class StatementSuite extends WithPhoenixEngine with HiveJDBCTestHelper { - test("test select") { + test("phoenix - test select") { withJdbcStatement("test1") { statement => statement.execute("create table db1.test1(id bigint primary key, " + "name varchar(255), age integer)") diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/WithPhoenixContainer.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/WithPhoenixContainer.scala index 49b4369bc46..614261e840f 100644 --- a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/WithPhoenixContainer.scala +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/phoenix/WithPhoenixContainer.scala @@ -16,7 +16,7 @@ */ package org.apache.kyuubi.engine.jdbc.phoenix -import com.dimafeng.testcontainers.{GenericContainer, SingleContainer} +import com.dimafeng.testcontainers.GenericContainer import org.testcontainers.containers.wait.strategy.Wait import org.apache.kyuubi.engine.jdbc.WithJdbcServerContainer @@ -27,21 +27,15 @@ trait WithPhoenixContainer extends WithJdbcServerContainer { private val phoenixDockerImage = "iteblog/hbase-phoenix-docker:1.0" - override val container: SingleContainer[_] = GenericContainer( + override val containerDef: GenericContainer.Def[GenericContainer] = GenericContainer.Def( dockerImage = phoenixDockerImage, exposedPorts = Seq(PHOENIX_PORT), waitStrategy = Wait.forListeningPort) - protected def queryServerUrl: String = { + protected def queryServerUrl: String = withContainers { container => val queryServerHost: String = container.host val queryServerPort: Int = container.mappedPort(PHOENIX_PORT) val url = s"$queryServerHost:$queryServerPort" url } - - override def afterAll(): Unit = { - super.afterAll() - container.close() - } - } diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/OperationWithPostgreSQLEngineSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/OperationWithPostgreSQLEngineSuite.scala new file mode 100644 index 00000000000..67e191297aa --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/OperationWithPostgreSQLEngineSuite.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.postgresql + +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.engine.jdbc.connection.ConnectionProvider +import org.apache.kyuubi.operation.HiveJDBCTestHelper +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TGetInfoReq, TGetInfoType} + +class OperationWithPostgreSQLEngineSuite extends PostgreSQLOperationSuite with HiveJDBCTestHelper { + + override protected def jdbcUrl: String = jdbcConnectionUrl + + test("postgreSQL - test for Jdbc engine getInfo") { + val metaData = ConnectionProvider.create(kyuubiConf).getMetaData + + withSessionConf(Map(KyuubiConf.SERVER_INFO_PROVIDER.key -> "ENGINE"))()() { + withSessionHandle { (client, handle) => + val req = new TGetInfoReq() + req.setSessionHandle(handle) + req.setInfoType(TGetInfoType.CLI_DBMS_NAME) + assert(client.GetInfo(req).getInfoValue.getStringValue == metaData.getDatabaseProductName) + + val req2 = new TGetInfoReq() + req2.setSessionHandle(handle) + req2.setInfoType(TGetInfoType.CLI_DBMS_VER) + assert( + client.GetInfo(req2).getInfoValue.getStringValue == metaData.getDatabaseProductVersion) + + val req3 = new TGetInfoReq() + req3.setSessionHandle(handle) + req3.setInfoType(TGetInfoType.CLI_MAX_COLUMN_NAME_LEN) + assert(client.GetInfo(req3).getInfoValue.getLenValue == metaData.getMaxColumnNameLength) + + val req4 = new TGetInfoReq() + req4.setSessionHandle(handle) + req4.setInfoType(TGetInfoType.CLI_MAX_SCHEMA_NAME_LEN) + assert(client.GetInfo(req4).getInfoValue.getLenValue == metaData.getMaxSchemaNameLength) + + val req5 = new TGetInfoReq() + req5.setSessionHandle(handle) + req5.setInfoType(TGetInfoType.CLI_MAX_TABLE_NAME_LEN) + assert(client.GetInfo(req5).getInfoValue.getLenValue == metaData.getMaxTableNameLength) + } + } + } +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/PostgreSQLOperationSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/PostgreSQLOperationSuite.scala new file mode 100644 index 00000000000..06a76a9a887 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/PostgreSQLOperationSuite.scala @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.postgresql + +import java.sql.ResultSet + +import scala.collection.mutable.ArrayBuffer + +import org.apache.kyuubi.operation.HiveJDBCTestHelper +import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ + +abstract class PostgreSQLOperationSuite extends WithPostgreSQLEngine with HiveJDBCTestHelper { + test("postgreSQL - get catalog") { + case class Catalog(catalog: String) + + withJdbcStatement() { statement => + val meta = statement.getConnection.getMetaData + val resultBuffer = ArrayBuffer[Catalog]() + + val catalogs = meta.getCatalogs + while (catalogs.next()) { + resultBuffer += + Catalog(catalogs.getString("catalog_name")) + } + assert(resultBuffer.contains(Catalog("postgres"))) + resultBuffer.clear() + + } + } + + test("postgreSQL - get schemas") { + case class Schema(catalog: String, schema: String) + + withJdbcStatement() { statement => + val meta = statement.getConnection.getMetaData + val resultBuffer = ArrayBuffer[Schema]() + + val schemas = meta.getSchemas + while (schemas.next()) { + resultBuffer += + Schema(schemas.getString("catalog_name"), schemas.getString("schema_name")) + } + assert(resultBuffer.contains(Schema("postgres", "information_schema"))) + resultBuffer.clear() + } + } + + test("postgreSQL - get tables") { + case class Table(catalog: String, schema: String, tableName: String, tableType: String) + + withJdbcStatement() { statement => + val meta = statement.getConnection.getMetaData + val resultBuffer = ArrayBuffer[Table]() + + var tables = meta.getTables(null, null, null, null) + while (tables.next()) { + resultBuffer += + Table( + null, + null, + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table(null, null, "pg_statistic", "BASE TABLE"))) + assert(resultBuffer.contains(Table(null, null, "pg_roles", "VIEW"))) + resultBuffer.clear() + + statement.execute("create table public.test1(id bigint primary key)") + statement.execute("create table public.test2(id bigint primary key)") + + tables = meta.getTables(null, null, "test1", Array("BASE TABLE")) + while (tables.next()) { + val table = Table( + null, + null, + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + assert(table == Table(null, null, "test1", "BASE TABLE")) + } + + tables = meta.getTables(null, null, "test2", null) + while (tables.next()) { + resultBuffer += Table( + null, + null, + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table(null, null, "test2", "BASE TABLE"))) + resultBuffer.clear() + + tables = meta.getTables(null, null, null, Array("BASE TABLE")) + while (tables.next()) { + resultBuffer += Table( + null, + null, + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table(null, null, "test1", "BASE TABLE"))) + assert(resultBuffer.contains(Table(null, null, "test2", "BASE TABLE"))) + resultBuffer.clear() + + tables = meta.getTables(null, null, null, Array("BASE TABLE", "VIEW")) + while (tables.next()) { + resultBuffer += Table( + null, + null, + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table(null, null, "test1", "BASE TABLE"))) + assert(resultBuffer.contains(Table(null, null, "test2", "BASE TABLE"))) + assert(resultBuffer.contains(Table(null, null, "pg_shadow", "VIEW"))) + assert(resultBuffer.contains(Table(null, null, "pg_roles", "VIEW"))) + resultBuffer.clear() + + statement.execute("drop table public.test1") + statement.execute("drop table public.test2") + } + } + + test("postgreSQL - get columns") { + case class Column(tableName: String, columnName: String) + + def buildColumn(resultSet: ResultSet): Column = { + val tableName = resultSet.getString(TABLE_NAME) + val columnName = resultSet.getString(COLUMN_NAME) + val column = Column(tableName, columnName) + column + } + + withJdbcStatement() { statement => + val metadata = statement.getConnection.getMetaData + statement.execute("create table if not exists public.test1" + + "(id bigint primary key, str1 varchar, str2 varchar, age integer)") + + statement.execute("create table if not exists public.test2" + + "(id bigint primary key, str1 varchar, str2 varchar, age integer)") + + val resultBuffer = ArrayBuffer[Column]() + val resultSet1 = metadata.getColumns(null, null, null, null) + while (resultSet1.next()) { + val column = buildColumn(resultSet1) + resultBuffer += column + } + + assert(resultBuffer.contains(Column("test1", "id"))) + assert(resultBuffer.contains(Column("test1", "str1"))) + assert(resultBuffer.contains(Column("test1", "str2"))) + assert(resultBuffer.contains(Column("test1", "age"))) + + assert(resultBuffer.contains(Column("test2", "id"))) + assert(resultBuffer.contains(Column("test2", "str1"))) + assert(resultBuffer.contains(Column("test2", "str2"))) + assert(resultBuffer.contains(Column("test2", "age"))) + + resultBuffer.clear() + + val resultSet2 = metadata.getColumns(null, null, "test1", null) + while (resultSet2.next()) { + val column = buildColumn(resultSet2) + resultBuffer += column + } + + assert(resultBuffer.contains(Column("test1", "id"))) + assert(resultBuffer.contains(Column("test1", "str1"))) + assert(resultBuffer.contains(Column("test1", "str2"))) + assert(resultBuffer.contains(Column("test1", "age"))) + + resultBuffer.clear() + + val resultSet3 = metadata.getColumns(null, null, null, "age") + while (resultSet3.next()) { + val column = buildColumn(resultSet3) + resultBuffer += column + } + + assert(resultBuffer.contains(Column("test1", "age"))) + assert(resultBuffer.contains(Column("test2", "age"))) + + resultBuffer.clear() + + val resultSet4 = metadata.getColumns(null, null, "t%1", "str%") + while (resultSet4.next()) { + val column = buildColumn(resultSet4) + resultBuffer += column + } + + assert(resultBuffer.contains(Column("test1", "str1"))) + + resultBuffer.clear() + + val resultSet5 = metadata.getColumns(null, null, "t%1", "fake") + assert(!resultSet5.next()) + + statement.execute("drop table public.test1") + statement.execute("drop table public.test2") + } + } +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/SessionSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/SessionSuite.scala new file mode 100644 index 00000000000..d7fc4dc7b65 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/SessionSuite.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.postgresql + +import org.apache.kyuubi.operation.HiveJDBCTestHelper + +class SessionSuite extends WithPostgreSQLEngine with HiveJDBCTestHelper { + + test("postgreSQL - test session") { + withJdbcStatement() { statement => + val resultSet = statement.executeQuery( + "select '1' as id") + val metadata = resultSet.getMetaData + for (i <- 1 to metadata.getColumnCount) { + assert(metadata.getColumnName(i) == "id") + } + while (resultSet.next()) { + val id = resultSet.getObject(1) + assert(id == "1") + } + } + } + + override protected def jdbcUrl: String = jdbcConnectionUrl +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/StatementSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/StatementSuite.scala new file mode 100644 index 00000000000..f2a8ecf6037 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/StatementSuite.scala @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.postgresql + +import java.sql.{Date, Timestamp} + +import org.apache.kyuubi.operation.HiveJDBCTestHelper + +class StatementSuite extends WithPostgreSQLEngine with HiveJDBCTestHelper { + + test("postgreSQL - test select") { + withJdbcStatement("test1") { statement => + statement.execute("create table public.test1(id bigint primary key, " + + "name varchar(255), age integer)") + statement.execute("insert into public.test1 values(1, 'a', 11)") + + val resultSet1 = statement.executeQuery("select * from public.test1") + while (resultSet1.next()) { + val id = resultSet1.getObject(1) + assert(id == 1) + val name = resultSet1.getObject(2) + assert(name == "a") + val age = resultSet1.getObject(3) + assert(age == 11) + } + } + } + + test("postgreSQL - test types") { + withJdbcStatement("type_test") { statement => + statement.execute("create table public.type_test(" + + "id bigint primary key, " + + "smallint_col smallint, " + + "int_col integer, " + + "bigint_col bigint, " + + "date_col date, " + + "timestamp_col timestamp, " + + "char_col char(10), " + + "varchar_col varchar(255), " + + "boolean_col boolean, " + + "double_col double precision, " + + "float_col float)") + statement.execute("insert into public.type_test" + + "(id, " + + "smallint_col, " + + "int_col, " + + "bigint_col, " + + "date_col, " + + "timestamp_col, " + + "char_col, " + + "varchar_col, " + + "boolean_col, " + + "double_col, " + + "float_col) " + + "VALUES (1, " + + "2, " + + "3, " + + "4, " + + "'2022-05-08', " + + "'2022-05-08 17:47:45'," + + "'a', " + + "'Hello', " + + "true, " + + "8.8, " + + "9.9)") + + val resultSet1 = statement.executeQuery("select * from public.type_test") + while (resultSet1.next()) { + val id = resultSet1.getObject(1) + assert(resultSet1.getObject(1) == 1) + assert(resultSet1.getObject(2) == 2) + assert(resultSet1.getObject(3) == 3) + assert(resultSet1.getObject(4) == 4) + assert(resultSet1.getObject(5) == Date.valueOf("2022-05-08")) + assert(resultSet1.getObject(6) == Timestamp.valueOf("2022-05-08 17:47:45")) + assert(resultSet1.getString(7).trim == "a") + assert(resultSet1.getObject(8) == "Hello") + assert(resultSet1.getObject(9) == true) + assert(resultSet1.getObject(10) == 8.8) + assert(resultSet1.getObject(11) == 9.9) + } + } + } + + override protected def jdbcUrl: String = jdbcConnectionUrl +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/WithPostgreSQLContainer.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/WithPostgreSQLContainer.scala new file mode 100644 index 00000000000..32066946ae0 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/WithPostgreSQLContainer.scala @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.postgresql + +import com.dimafeng.testcontainers.PostgreSQLContainer +import org.testcontainers.utility.DockerImageName + +import org.apache.kyuubi.engine.jdbc.WithJdbcServerContainer + +trait WithPostgreSQLContainer extends WithJdbcServerContainer { + + private val postgreSQLDockerImage = "postgres:16.1" + + override val containerDef: PostgreSQLContainer.Def = PostgreSQLContainer.Def( + dockerImageName = DockerImageName.parse(postgreSQLDockerImage), + databaseName = "postgres", + username = "kyuubi", + password = "postgres") +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/WithPostgreSQLEngine.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/WithPostgreSQLEngine.scala new file mode 100644 index 00000000000..6d453934e6f --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/postgresql/WithPostgreSQLEngine.scala @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.postgresql + +import org.apache.kyuubi.config.KyuubiConf._ +import org.apache.kyuubi.engine.jdbc.WithJdbcEngine + +trait WithPostgreSQLEngine extends WithJdbcEngine with WithPostgreSQLContainer { + + override def withKyuubiConf: Map[String, String] = withContainers { container => + Map( + ENGINE_SHARE_LEVEL.key -> "SERVER", + ENGINE_JDBC_CONNECTION_URL.key -> container.jdbcUrl, + ENGINE_JDBC_CONNECTION_USER.key -> container.username, + ENGINE_JDBC_CONNECTION_PASSWORD.key -> container.password, + ENGINE_TYPE.key -> "jdbc", + ENGINE_JDBC_SHORT_NAME.key -> "postgresql", + ENGINE_JDBC_DRIVER_CLASS.key -> container.driverClassName) + } + +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksOperationSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksOperationSuite.scala new file mode 100644 index 00000000000..575467143ff --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksOperationSuite.scala @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.starrocks + +import java.sql.ResultSet + +import scala.collection.mutable.ArrayBuffer + +import org.apache.kyuubi.operation.HiveJDBCTestHelper +import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ + +abstract class StarRocksOperationSuite extends WithStarRocksEngine with HiveJDBCTestHelper { + test("starrocks - get tables") { + case class Table(catalog: String, schema: String, tableName: String, tableType: String) + + withJdbcStatement() { statement => + val meta = statement.getConnection.getMetaData + val resultBuffer = ArrayBuffer[Table]() + + var tables = meta.getTables(null, null, null, null) + while (tables.next()) { + resultBuffer += + Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table("def", "information_schema", "tables", "SYSTEM VIEW"))) + assert(resultBuffer.contains(Table("def", "information_schema", "views", "SYSTEM VIEW"))) + resultBuffer.clear() + + statement.execute("create database if not exists db1") + statement.execute("create table db1.test1(id bigint)" + + "ENGINE=OLAP DISTRIBUTED BY HASH(`id`) BUCKETS 32 " + + "PROPERTIES ('replication_num' = '1')") + statement.execute("create table db1.test2(id bigint)" + + "ENGINE=OLAP DISTRIBUTED BY HASH(`id`) BUCKETS 32 " + + "PROPERTIES ('replication_num' = '1')") + + statement.execute("create database if not exists db2") + statement.execute("create table db2.test1(id bigint)" + + "ENGINE=OLAP DISTRIBUTED BY HASH(`id`) BUCKETS 32 " + + "PROPERTIES ('replication_num' = '1')") + statement.execute("create table db2.test2(id bigint)" + + "ENGINE=OLAP DISTRIBUTED BY HASH(`id`) BUCKETS 32 " + + "PROPERTIES ('replication_num' = '1')") + + statement.execute("create view db1.view1 (k1) as select id from db1.test1") + + tables = meta.getTables(null, "db1", "test1", Array("BASE TABLE")) + while (tables.next()) { + val table = Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + assert(table == Table("def", "db1", "test1", "BASE TABLE")) + } + + tables = meta.getTables(null, "db1", null, null) + while (tables.next()) { + resultBuffer += Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table("def", "db1", "test1", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db1", "test2", "BASE TABLE"))) + resultBuffer.clear() + + tables = meta.getTables(null, null, "test1", null) + while (tables.next()) { + resultBuffer += Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table("def", "db1", "test1", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db2", "test1", "BASE TABLE"))) + resultBuffer.clear() + + tables = meta.getTables(null, "db%", "test1", null) + while (tables.next()) { + resultBuffer += Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table("def", "db1", "test1", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db2", "test1", "BASE TABLE"))) + resultBuffer.clear() + + tables = meta.getTables(null, "db2", "test%", null) + while (tables.next()) { + resultBuffer += Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table("def", "db2", "test1", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db2", "test2", "BASE TABLE"))) + resultBuffer.clear() + + tables = meta.getTables(null, "fake_db", "test1", null) + assert(!tables.next()) + + tables = meta.getTables(null, null, null, Array("VIEW")) + while (tables.next()) { + val table = Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + assert(table == Table("def", "db1", "view1", "VIEW")) + } + + tables = meta.getTables(null, null, null, Array("VIEW", "BASE TABLE")) + while (tables.next()) { + resultBuffer += Table( + tables.getString(TABLE_CATALOG), + tables.getString(TABLE_SCHEMA), + tables.getString(TABLE_NAME), + tables.getString(TABLE_TYPE)) + } + assert(resultBuffer.contains(Table("def", "db1", "test1", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db1", "test2", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db2", "test1", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db2", "test2", "BASE TABLE"))) + assert(resultBuffer.contains(Table("def", "db1", "view1", "VIEW"))) + resultBuffer.clear() + + statement.execute("drop view db1.view1") + statement.execute("drop table db1.test1") + statement.execute("drop table db1.test2") + statement.execute("drop table db2.test1") + statement.execute("drop table db2.test2") + statement.execute("drop database db1") + statement.execute("drop database db2") + } + } + + test("starrocks - get columns") { + case class Column(tableSchema: String, tableName: String, columnName: String) + + def buildColumn(resultSet: ResultSet): Column = { + val schema = resultSet.getString(TABLE_SCHEMA) + val tableName = resultSet.getString(TABLE_NAME) + val columnName = resultSet.getString(COLUMN_NAME) + val column = Column(schema, tableName, columnName) + column + } + + withJdbcStatement() { statement => + val metadata = statement.getConnection.getMetaData + statement.execute("create database if not exists db1") + statement.execute("create table if not exists db1.test1" + + "(id bigint, str1 string, str2 string, age int)" + + "ENGINE=OLAP DISTRIBUTED BY HASH(`id`) BUCKETS 32 " + + "PROPERTIES ('replication_num' = '1')") + statement.execute("create table if not exists db1.test2" + + "(id bigint, str1 string, str2 string, age int)" + + "ENGINE=OLAP DISTRIBUTED BY HASH(`id`) BUCKETS 32 " + + "PROPERTIES ('replication_num' = '1')") + + statement.execute("create database if not exists db2") + + statement.execute("create table if not exists db2.test1" + + "(id bigint, str1 string, str2 string, age int)" + + "ENGINE=OLAP DISTRIBUTED BY HASH(`id`) BUCKETS 32 " + + "PROPERTIES ('replication_num' = '1')") + + val resultBuffer = ArrayBuffer[Column]() + val resultSet1 = metadata.getColumns(null, "db1", null, null) + while (resultSet1.next()) { + val column = buildColumn(resultSet1) + resultBuffer += column + } + + assert(resultBuffer.contains(Column("db1", "test1", "id"))) + assert(resultBuffer.contains(Column("db1", "test1", "str1"))) + assert(resultBuffer.contains(Column("db1", "test1", "str2"))) + assert(resultBuffer.contains(Column("db1", "test1", "age"))) + + assert(resultBuffer.contains(Column("db1", "test2", "id"))) + assert(resultBuffer.contains(Column("db1", "test2", "str1"))) + assert(resultBuffer.contains(Column("db1", "test2", "str2"))) + assert(resultBuffer.contains(Column("db1", "test2", "age"))) + + resultBuffer.clear() + + val resultSet2 = metadata.getColumns(null, null, "test1", null) + while (resultSet2.next()) { + val column = buildColumn(resultSet2) + resultBuffer += column + } + + assert(resultBuffer.contains(Column("db1", "test1", "id"))) + assert(resultBuffer.contains(Column("db1", "test1", "str1"))) + assert(resultBuffer.contains(Column("db1", "test1", "str2"))) + assert(resultBuffer.contains(Column("db1", "test1", "age"))) + + assert(resultBuffer.contains(Column("db2", "test1", "id"))) + assert(resultBuffer.contains(Column("db2", "test1", "str1"))) + assert(resultBuffer.contains(Column("db2", "test1", "str2"))) + assert(resultBuffer.contains(Column("db2", "test1", "age"))) + + resultBuffer.clear() + + val resultSet3 = metadata.getColumns(null, null, null, "age") + while (resultSet3.next()) { + val column = buildColumn(resultSet3) + resultBuffer += column + } + + assert(resultBuffer.contains(Column("db1", "test1", "age"))) + assert(resultBuffer.contains(Column("db1", "test2", "age"))) + assert(resultBuffer.contains(Column("db2", "test1", "age"))) + + resultBuffer.clear() + + val resultSet4 = metadata.getColumns(null, "d%1", "t%1", "str%") + while (resultSet4.next()) { + val column = buildColumn(resultSet4) + resultBuffer += column + } + + assert(resultBuffer.contains(Column("db1", "test1", "str1"))) + assert(resultBuffer.contains(Column("db1", "test1", "str2"))) + + resultBuffer.clear() + + val resultSet5 = metadata.getColumns(null, "d%1", "t%1", "fake") + assert(!resultSet5.next()) + + statement.execute("drop table db1.test1") + statement.execute("drop table db1.test2") + statement.execute("drop database db1") + statement.execute("drop table db2.test1") + statement.execute("drop database db2") + } + } +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksOperationWithEngineSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksOperationWithEngineSuite.scala new file mode 100644 index 00000000000..acbc028f89d --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksOperationWithEngineSuite.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.starrocks + +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.engine.jdbc.connection.ConnectionProvider +import org.apache.kyuubi.operation.HiveJDBCTestHelper +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ + +class StarRocksOperationWithEngineSuite extends StarRocksOperationSuite with HiveJDBCTestHelper { + + override protected def jdbcUrl: String = jdbcConnectionUrl + + test("starrocks - test for Jdbc engine getInfo") { + val metaData = ConnectionProvider.create(kyuubiConf).getMetaData + + withSessionConf(Map(KyuubiConf.SERVER_INFO_PROVIDER.key -> "ENGINE"))()() { + withSessionHandle { (client, handle) => + val req = new TGetInfoReq() + req.setSessionHandle(handle) + req.setInfoType(TGetInfoType.CLI_DBMS_NAME) + assert(client.GetInfo(req).getInfoValue.getStringValue == metaData.getDatabaseProductName) + + val req2 = new TGetInfoReq() + req2.setSessionHandle(handle) + req2.setInfoType(TGetInfoType.CLI_DBMS_VER) + assert( + client.GetInfo(req2).getInfoValue.getStringValue == metaData.getDatabaseProductVersion) + + val req3 = new TGetInfoReq() + req3.setSessionHandle(handle) + req3.setInfoType(TGetInfoType.CLI_MAX_COLUMN_NAME_LEN) + assert(client.GetInfo(req3).getInfoValue.getLenValue == metaData.getMaxColumnNameLength) + + val req4 = new TGetInfoReq() + req4.setSessionHandle(handle) + req4.setInfoType(TGetInfoType.CLI_MAX_SCHEMA_NAME_LEN) + assert(client.GetInfo(req4).getInfoValue.getLenValue == metaData.getMaxSchemaNameLength) + + val req5 = new TGetInfoReq() + req5.setSessionHandle(handle) + req5.setInfoType(TGetInfoType.CLI_MAX_TABLE_NAME_LEN) + assert(client.GetInfo(req5).getInfoValue.getLenValue == metaData.getMaxTableNameLength) + } + } + } + + test("starrocks - JDBC ExecuteStatement operation should contain operationLog") { + withSessionHandle { (client, handle) => + val tExecuteStatementReq = new TExecuteStatementReq() + tExecuteStatementReq.setSessionHandle(handle) + tExecuteStatementReq.setStatement("SELECT 1") + val tExecuteStatementResp = client.ExecuteStatement(tExecuteStatementReq) + + val tFetchResultsReq = new TFetchResultsReq() + tFetchResultsReq.setOperationHandle(tExecuteStatementResp.getOperationHandle) + tFetchResultsReq.setFetchType(1) + tFetchResultsReq.setMaxRows(1) + + val tFetchResultsResp = client.FetchResults(tFetchResultsReq) + assert(tFetchResultsResp.getStatus.getStatusCode === TStatusCode.SUCCESS_STATUS) + } + } +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksSessionSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksSessionSuite.scala new file mode 100644 index 00000000000..f1c11c96773 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksSessionSuite.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.starrocks + +import org.apache.kyuubi.operation.HiveJDBCTestHelper + +class StarRocksSessionSuite extends WithStarRocksEngine with HiveJDBCTestHelper { + + test("starrocks - test session") { + withJdbcStatement() { statement => + val resultSet = statement.executeQuery( + "select '1' as id") + val metadata = resultSet.getMetaData + for (i <- 1 to metadata.getColumnCount) { + assert(metadata.getColumnName(i) == "id") + } + while (resultSet.next()) { + val id = resultSet.getObject(1) + assert(id == "1") + } + } + } + + override protected def jdbcUrl: String = jdbcConnectionUrl +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksStatementSuite.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksStatementSuite.scala new file mode 100644 index 00000000000..596701d7e59 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/StarRocksStatementSuite.scala @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.starrocks + +import java.sql.{Date, Timestamp} + +import org.apache.kyuubi.operation.HiveJDBCTestHelper + +class StarRocksStatementSuite extends WithStarRocksEngine with HiveJDBCTestHelper { + + test("starrocks - test select") { + withJdbcStatement("test1") { statement => + statement.execute("create database if not exists db1") + statement.execute("use db1") + statement.execute( + """CREATE TABLE db1.test1(id bigint, name varchar(255), age int) + | ENGINE=OLAP + | DISTRIBUTED BY HASH(`id`) + | PROPERTIES ('replication_num' = '1', 'in_memory' = 'true') + |""".stripMargin) + statement.execute("insert into db1.test1 values(1, 'a', 11)") + + val resultSet1 = statement.executeQuery("select * from db1.test1") + while (resultSet1.next()) { + val id = resultSet1.getObject(1) + assert(id == 1) + val name = resultSet1.getObject(2) + assert(name == "a") + val age = resultSet1.getObject(3) + assert(age == 11) + } + } + } + + test("starrocks - test types") { + withJdbcStatement("test1") { statement => + statement.execute("create database if not exists db1") + statement.execute("use db1") + statement.execute( + """ CREATE TABLE db1.type_test( + | id bigint, + | tiny_col tinyint, + | smallint_col smallint, + | int_col int, + | bigint_col bigint, + | largeint_col largeint, + | decimal_col decimal(27, 9), + | date_col date, + | datetime_col datetime, + | char_col char, + | varchar_col varchar(255), + | string_col string, + | boolean_col boolean, + | double_col double, + | float_col float) + | ENGINE=OLAP + | DISTRIBUTED BY HASH(`id`) + | PROPERTIES ('replication_num' = '1', 'in_memory' = 'true') + |""".stripMargin) + statement.execute( + """ insert into db1.type_test + | (id, tiny_col, smallint_col, int_col, bigint_col, largeint_col, decimal_col, + | date_col, datetime_col, char_col, varchar_col, string_col, boolean_col, + | double_col, float_col) + | VALUES (1, 2, 3, 4, 5, 6, 7.7, + | '2022-05-08', '2022-05-08 17:47:45', 'a', 'Hello', 'Hello, Kyuubi', true, + | 8.8, 9.9) + |""".stripMargin) + val resultSet1 = statement.executeQuery("select * from db1.type_test") + while (resultSet1.next()) { + assert(resultSet1.getObject(1) == 1) + assert(resultSet1.getObject(2) == 2) + assert(resultSet1.getObject(3) == 3) + assert(resultSet1.getObject(4) == 4) + assert(resultSet1.getObject(5) == 5) + assert(resultSet1.getObject(6) == "6") + assert(resultSet1.getObject(7) == new java.math.BigDecimal("7.700000000")) + assert(resultSet1.getObject(8) == Date.valueOf("2022-05-08")) + assert(resultSet1.getObject(9) == Timestamp.valueOf("2022-05-08 17:47:45")) + assert(resultSet1.getObject(10) == "a") + assert(resultSet1.getObject(11) == "Hello") + assert(resultSet1.getObject(12) == "Hello, Kyuubi") + assert(resultSet1.getObject(13) == true) + assert(resultSet1.getObject(14) == 8.8) + assert(resultSet1.getObject(15) == 9.9) + } + } + } + + override protected def jdbcUrl: String = jdbcConnectionUrl +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/WithStarRocksContainer.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/WithStarRocksContainer.scala new file mode 100644 index 00000000000..9c229a636cb --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/WithStarRocksContainer.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.starrocks + +import java.time.Duration + +import com.dimafeng.testcontainers.GenericContainer +import org.testcontainers.containers.wait.strategy.{Wait, WaitAllStrategy} +import org.testcontainers.containers.wait.strategy.Wait._ + +import org.apache.kyuubi.engine.jdbc.WithJdbcServerContainer + +trait WithStarRocksContainer extends WithJdbcServerContainer { + + private val starrocksDockerImage = "starrocks/allin1-ubuntu:3.1.6" + + private val STARROCKS_FE_MYSQL_PORT = 9030 + private val STARROCKS_FE_HTTP_PORT = 8030 + private val STARROCKS_BE_THRIFT_PORT = 9060 + private val STARROCKS_BE_HTTP_PORT = 8040 + private val STARROCKS_BE_HEARTBEAT_PORT = 9050 + private val ports = Seq( + STARROCKS_FE_MYSQL_PORT, + STARROCKS_FE_HTTP_PORT, + STARROCKS_BE_THRIFT_PORT, + STARROCKS_BE_HTTP_PORT, + STARROCKS_BE_HEARTBEAT_PORT) + + override val containerDef: GenericContainer.Def[GenericContainer] = GenericContainer.Def( + dockerImage = starrocksDockerImage, + exposedPorts = ports, + waitStrategy = new WaitAllStrategy().withStartupTimeout(Duration.ofMinutes(10)) + .withStrategy(Wait.forListeningPorts(ports: _*)) + .withStrategy(forLogMessage(".*broker service already added into FE service.*", 1)) + .withStrategy( + forLogMessage(".*Enjoy the journal to StarRocks blazing-fast lake-house engine.*", 1))) + + protected def feJdbcUrl: String = withContainers { container => + val queryServerHost: String = container.host + val queryServerPort: Int = container.mappedPort(STARROCKS_FE_MYSQL_PORT) + s"jdbc:mysql://$queryServerHost:$queryServerPort" + } +} diff --git a/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/WithStarRocksEngine.scala b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/WithStarRocksEngine.scala new file mode 100644 index 00000000000..6423186c050 --- /dev/null +++ b/externals/kyuubi-jdbc-engine/src/test/scala/org/apache/kyuubi/engine/jdbc/starrocks/WithStarRocksEngine.scala @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.engine.jdbc.starrocks + +import org.apache.kyuubi.config.KyuubiConf._ +import org.apache.kyuubi.engine.jdbc.WithJdbcEngine +import org.apache.kyuubi.engine.jdbc.mysql.MySQL8ConnectionProvider + +trait WithStarRocksEngine extends WithJdbcEngine with WithStarRocksContainer { + + private val user = "root" + private val password = "" + + override def withKyuubiConf: Map[String, String] = Map( + ENGINE_SHARE_LEVEL.key -> "SERVER", + ENGINE_JDBC_CONNECTION_URL.key -> feJdbcUrl, + ENGINE_JDBC_CONNECTION_USER.key -> user, + ENGINE_JDBC_CONNECTION_PASSWORD.key -> password, + ENGINE_TYPE.key -> "jdbc", + ENGINE_JDBC_SHORT_NAME.key -> "starrocks", + ENGINE_JDBC_DRIVER_CLASS.key -> MySQL8ConnectionProvider.driverClass) +} diff --git a/externals/kyuubi-spark-sql-engine/pom.xml b/externals/kyuubi-spark-sql-engine/pom.xml index 555e41a44b6..4317b2ede37 100644 --- a/externals/kyuubi-spark-sql-engine/pom.xml +++ b/externals/kyuubi-spark-sql-engine/pom.xml @@ -238,9 +238,7 @@ io.perfmark:perfmark-api io.vertx:* net.jodah:failsafe - org.apache.hive:hive-service-rpc org.apache.kyuubi:* - org.apache.thrift:* org.checkerframework:checker-qual org.codehaus.mojo:animal-sniffer-annotations @@ -265,27 +263,6 @@ - - org.apache.hive.service.rpc.thrift - ${kyuubi.shade.packageName}.org.apache.hive.service.rpc.thrift - - org.apache.hive.service.rpc.thrift.** - - - - com.facebook.fb303 - ${kyuubi.shade.packageName}.com.facebook.fb303 - - com.facebook.fb303.** - - - - org.apache.thrift - ${kyuubi.shade.packageName}.org.apache.thrift - - org.apache.thrift.** - - io.etcd ${kyuubi.shade.packageName}.io.etcd diff --git a/externals/kyuubi-spark-sql-engine/src/main/resources/python/execute_python.py b/externals/kyuubi-spark-sql-engine/src/main/resources/python/execute_python.py index e6fe7f92bcf..6729092f75d 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/resources/python/execute_python.py +++ b/externals/kyuubi-spark-sql-engine/src/main/resources/python/execute_python.py @@ -16,6 +16,8 @@ # import ast +import datetime +import decimal import io import json @@ -23,6 +25,7 @@ import re import sys import traceback +import base64 from glob import glob if sys.version_info[0] < 3: @@ -70,6 +73,8 @@ global_dict = {} +MAGIC_ENABLED = os.environ.get("MAGIC_ENABLED") == "true" + class NormalNode(object): def __init__(self, code): @@ -94,6 +99,36 @@ def execute(self): raise ExecutionError(sys.exc_info()) +class UnknownMagic(Exception): + pass + + +class MagicNode(object): + def __init__(self, line): + parts = line[1:].split(" ", 1) + if len(parts) == 1: + self.magic, self.rest = parts[0], () + else: + self.magic, self.rest = parts[0], (parts[1],) + + def execute(self): + if not self.magic: + raise UnknownMagic("magic command not specified") + + try: + handler = magic_router[self.magic] + except KeyError: + raise UnknownMagic("unknown magic command '%s'" % self.magic) + + try: + return handler(*self.rest) + except ExecutionError as e: + raise e + except Exception: + exc_type, exc_value, tb = sys.exc_info() + raise ExecutionError((exc_type, exc_value, None)) + + class ExecutionError(Exception): def __init__(self, exc_info): self.exc_info = exc_info @@ -118,6 +153,14 @@ def parse_code_into_nodes(code): try: nodes.append(NormalNode(code)) except SyntaxError: + # It's possible we hit a syntax error because of a magic command. Split the code groups + # of 'normal code', and code that starts with a '%'. possibly magic code lines, and see + # if any of the lines. Remove lines until we find a node that parses, then check if the + # next line is a magic line. + + # Split the code into chunks of normal code, and possibly magic code, which starts with + # a '%'. + normal = [] chunks = [] for i, line in enumerate(code.rstrip().split("\n")): @@ -135,24 +178,22 @@ def parse_code_into_nodes(code): # Convert the chunks into AST nodes. Let exceptions propagate. for chunk in chunks: - # TODO: look back here when Jupyter and sparkmagic are supported - # if chunk.startswith('%'): - # nodes.append(MagicNode(chunk)) - - nodes.append(NormalNode(chunk)) + if MAGIC_ENABLED and chunk.startswith("%"): + nodes.append(MagicNode(chunk)) + else: + nodes.append(NormalNode(chunk)) return nodes def execute_reply(status, content): - msg = { + return { "msg_type": "execute_reply", "content": dict( content, status=status, ), } - return json.dumps(msg) def execute_reply_ok(data): @@ -211,6 +252,9 @@ def execute_request(content): try: for node in nodes: result = node.execute() + except UnknownMagic: + exc_type, exc_value, tb = sys.exc_info() + return execute_reply_error(exc_type, exc_value, None) except ExecutionError as e: return execute_reply_error(*e.exc_info) @@ -239,6 +283,171 @@ def execute_request(content): return execute_reply_ok(result) +def magic_table_convert(value): + try: + converter = magic_table_types[type(value)] + except KeyError: + converter = magic_table_types[str] + + return converter(value) + + +def magic_table_convert_seq(items): + last_item_type = None + converted_items = [] + + for item in items: + item_type, item = magic_table_convert(item) + + if last_item_type is None: + last_item_type = item_type + elif last_item_type != item_type: + raise ValueError("value has inconsistent types") + + converted_items.append(item) + + return "ARRAY_TYPE", converted_items + + +def magic_table_convert_map(m): + last_key_type = None + last_value_type = None + converted_items = {} + + for key, value in m: + key_type, key = magic_table_convert(key) + value_type, value = magic_table_convert(value) + + if last_key_type is None: + last_key_type = key_type + elif last_value_type != value_type: + raise ValueError("value has inconsistent types") + + if last_value_type is None: + last_value_type = value_type + elif last_value_type != value_type: + raise ValueError("value has inconsistent types") + + converted_items[key] = value + + return "MAP_TYPE", converted_items + + +magic_table_types = { + type(None): lambda x: ("NULL_TYPE", x), + bool: lambda x: ("BOOLEAN_TYPE", x), + int: lambda x: ("INT_TYPE", x), + float: lambda x: ("DOUBLE_TYPE", x), + str: lambda x: ("STRING_TYPE", str(x)), + datetime.date: lambda x: ("DATE_TYPE", str(x)), + datetime.datetime: lambda x: ("TIMESTAMP_TYPE", str(x)), + decimal.Decimal: lambda x: ("DECIMAL_TYPE", str(x)), + tuple: magic_table_convert_seq, + list: magic_table_convert_seq, + dict: magic_table_convert_map, +} + + +def magic_table(name): + try: + value = global_dict[name] + except KeyError: + exc_type, exc_value, tb = sys.exc_info() + raise ExecutionError((exc_type, exc_value, None)) + + if not isinstance(value, (list, tuple)): + value = [value] + + headers = {} + data = [] + + for row in value: + cols = [] + data.append(cols) + + if "Row" == row.__class__.__name__: + row = row.asDict() + + if not isinstance(row, (list, tuple, dict)): + row = [row] + + if isinstance(row, (list, tuple)): + iterator = enumerate(row) + else: + iterator = sorted(row.items()) + + for name, col in iterator: + col_type, col = magic_table_convert(col) + + try: + header = headers[name] + except KeyError: + header = { + "name": str(name), + "type": col_type, + } + headers[name] = header + else: + # Reject columns that have a different type. (allow none value) + if col_type != "NULL_TYPE" and header["type"] != col_type: + if header["type"] == "NULL_TYPE": + header["type"] = col_type + else: + exc_type = Exception + exc_value = Exception("table rows have different types") + raise ExecutionError((exc_type, exc_value, None)) + + cols.append(col) + + headers = [v for k, v in sorted(headers.items())] + + return { + "application/vnd.livy.table.v1+json": { + "headers": headers, + "data": data, + } + } + + +def magic_json(name): + try: + value = global_dict[name] + except KeyError: + exc_type, exc_value, tb = sys.exc_info() + raise ExecutionError((exc_type, exc_value, None)) + + return { + "application/json": value, + } + + +def magic_matplot(name): + try: + value = global_dict[name] + fig = value.gcf() + imgdata = io.BytesIO() + fig.savefig(imgdata, format="png") + imgdata.seek(0) + encode = base64.b64encode(imgdata.getvalue()) + if sys.version >= "3": + encode = encode.decode() + + except: + exc_type, exc_value, tb = sys.exc_info() + raise ExecutionError((exc_type, exc_value, None)) + + return { + "image/png": encode, + } + + +magic_router = { + "table": magic_table, + "json": magic_json, + "matplot": magic_matplot, +} + + # get or create spark session spark_session = kyuubi_util.get_spark_session( os.environ.get("KYUUBI_SPARK_SESSION_UUID") @@ -278,6 +487,22 @@ def main(): break result = execute_request(content) + + try: + result = json.dumps(result) + except ValueError: + result = json.dumps( + { + "msg_type": "inspect_reply", + "content": { + "status": "error", + "ename": "ValueError", + "evalue": "cannot json-ify %s" % response, + "traceback": [], + }, + } + ) + print(result, file=sys_stdout) sys_stdout.flush() clearOutputs() diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/KyuubiSparkUtil.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/KyuubiSparkUtil.scala index b9fb9325999..2e33d8ce6db 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/KyuubiSparkUtil.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/KyuubiSparkUtil.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.util.kvstore.KVIndex import org.apache.kyuubi.Logging +import org.apache.kyuubi.config.ConfigEntry import org.apache.kyuubi.util.SemanticVersion object KyuubiSparkUtil extends Logging { @@ -98,4 +99,18 @@ object KyuubiSparkUtil extends Logging { // Given that we are on the Spark SQL engine side, the [[org.apache.spark.SPARK_VERSION]] can be // represented as the runtime version of the Spark SQL engine. lazy val SPARK_ENGINE_RUNTIME_VERSION: SemanticVersion = SemanticVersion(SPARK_VERSION) + + /** + * Get session level config value + * @param configEntry configEntry + * @param spark sparkSession + * @tparam T any type + * @return session level config value, if spark not set this config, + * default return kyuubi's config + */ + def getSessionConf[T](configEntry: ConfigEntry[T], spark: SparkSession): T = { + spark.conf.getOption(configEntry.key).map(configEntry.valueConverter).getOrElse { + SparkSQLEngine.kyuubiConf.get(configEntry) + } + } } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala index ba84e1b1b3a..3dc771e6ccf 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala @@ -26,6 +26,7 @@ import scala.concurrent.duration.Duration import scala.util.control.NonFatal import com.google.common.annotations.VisibleForTesting +import org.apache.hadoop.fs.Path import org.apache.spark.{ui, SparkConf} import org.apache.spark.kyuubi.{SparkContextHelper, SparkSQLEngineEventListener, SparkSQLEngineListener} import org.apache.spark.kyuubi.SparkUtilsHelper.getLocalDir @@ -37,6 +38,7 @@ import org.apache.kyuubi.config.{KyuubiConf, KyuubiReservedKeys} import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.config.KyuubiReservedKeys.{KYUUBI_ENGINE_SUBMIT_TIME_KEY, KYUUBI_ENGINE_URL} import org.apache.kyuubi.engine.ShareLevel +import org.apache.kyuubi.engine.spark.KyuubiSparkUtil.engineId import org.apache.kyuubi.engine.spark.SparkSQLEngine.{countDownLatch, currentEngine} import org.apache.kyuubi.engine.spark.events.{EngineEvent, EngineEventsStore, SparkEventHandlerRegister} import org.apache.kyuubi.engine.spark.session.SparkSessionImpl @@ -46,6 +48,7 @@ import org.apache.kyuubi.ha.client.RetryPolicies import org.apache.kyuubi.service.Serverable import org.apache.kyuubi.session.SessionHandle import org.apache.kyuubi.util.{SignalRegister, ThreadUtils} +import org.apache.kyuubi.util.ThreadUtils.scheduleTolerableRunnableWithFixedDelay case class SparkSQLEngine(spark: SparkSession) extends Serverable("SparkSQLEngine") { @@ -57,6 +60,7 @@ case class SparkSQLEngine(spark: SparkSession) extends Serverable("SparkSQLEngin @volatile private var lifetimeTerminatingChecker: Option[ScheduledExecutorService] = None @volatile private var stopEngineExec: Option[ThreadPoolExecutor] = None + @volatile private var engineSavePath: Option[String] = None override def initialize(conf: KyuubiConf): Unit = { val listener = new SparkSQLEngineListener(this) @@ -86,6 +90,15 @@ case class SparkSQLEngine(spark: SparkSession) extends Serverable("SparkSQLEngin maxInitTimeout > 0) { startFastFailChecker(maxInitTimeout) } + + if (backendService.sessionManager.getConf.get(OPERATION_RESULT_SAVE_TO_FILE)) { + val savePath = backendService.sessionManager.getConf.get(OPERATION_RESULT_SAVE_TO_FILE_DIR) + engineSavePath = Some(s"$savePath/$engineId") + val path = new Path(engineSavePath.get) + val fs = path.getFileSystem(spark.sparkContext.hadoopConfiguration) + fs.mkdirs(path) + fs.deleteOnExit(path) + } } override def stop(): Unit = if (shutdown.compareAndSet(false, true)) { @@ -101,6 +114,10 @@ case class SparkSQLEngine(spark: SparkSession) extends Serverable("SparkSQLEngin exec, Duration(60, TimeUnit.SECONDS)) }) + engineSavePath.foreach { p => + val path = new Path(p) + path.getFileSystem(spark.sparkContext.hadoopConfiguration).delete(path, true) + } } def gracefulStop(): Unit = if (gracefulStopDeregistered.compareAndSet(false, true)) { @@ -167,7 +184,8 @@ case class SparkSQLEngine(spark: SparkSession) extends Serverable("SparkSQLEngin } lifetimeTerminatingChecker = Some(ThreadUtils.newDaemonSingleThreadScheduledExecutor("spark-engine-lifetime-checker")) - lifetimeTerminatingChecker.get.scheduleWithFixedDelay( + scheduleTolerableRunnableWithFixedDelay( + lifetimeTerminatingChecker.get, checkTask, interval, interval, @@ -288,7 +306,8 @@ object SparkSQLEngine extends Logging { KyuubiSparkUtil.initializeSparkSession( session, - kyuubiConf.get(ENGINE_INITIALIZE_SQL) ++ kyuubiConf.get(ENGINE_SESSION_INITIALIZE_SQL)) + kyuubiConf.get(ENGINE_SPARK_INITIALIZE_SQL) ++ kyuubiConf.get( + ENGINE_SESSION_SPARK_INITIALIZE_SQL)) session.sparkContext.setLocalProperty(KYUUBI_ENGINE_URL, KyuubiSparkUtil.engineUrl) session } @@ -361,7 +380,8 @@ object SparkSQLEngine extends Logging { // blocking main thread countDownLatch.await() } catch { - case e: KyuubiException => currentEngine match { + case e: KyuubiException => + currentEngine match { case Some(engine) => engine.stop() val event = EngineEvent(engine) @@ -370,16 +390,21 @@ object SparkSQLEngine extends Logging { error(event, e) case _ => error("Current SparkSQLEngine is not created.") } + throw e } } catch { case i: InterruptedException if !sparkSessionCreated.get => - error( + val msg = s"The Engine main thread was interrupted, possibly due to `createSpark` timeout." + s" The `${ENGINE_INIT_TIMEOUT.key}` is ($initTimeout ms) " + - s" and submitted at $submitTime.", - i) - case t: Throwable => error(s"Failed to instantiate SparkSession: ${t.getMessage}", t) + s" and submitted at $submitTime." + error(msg, i) + throw new InterruptedException(msg) + case e: KyuubiException => throw e + case t: Throwable => + error(s"Failed to instantiate SparkSession: ${t.getMessage}", t) + throw t } finally { if (spark != null) { spark.stop() diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkTBinaryFrontendService.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkTBinaryFrontendService.scala index c2563b32bce..7ca2e8fbed2 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkTBinaryFrontendService.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkTBinaryFrontendService.scala @@ -23,7 +23,6 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.io.Text import org.apache.hadoop.security.{Credentials, UserGroupInformation} import org.apache.hadoop.security.token.{Token, TokenIdentifier} -import org.apache.hive.service.rpc.thrift.{TOpenSessionReq, TOpenSessionResp, TRenewDelegationTokenReq, TRenewDelegationTokenResp} import org.apache.spark.SparkContext import org.apache.spark.kyuubi.SparkContextHelper @@ -33,6 +32,7 @@ import org.apache.kyuubi.config.KyuubiReservedKeys._ import org.apache.kyuubi.ha.client.{EngineServiceDiscovery, ServiceDiscovery} import org.apache.kyuubi.service.{Serverable, Service, TBinaryFrontendService} import org.apache.kyuubi.service.TFrontendService._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TOpenSessionReq, TOpenSessionResp, TRenewDelegationTokenReq, TRenewDelegationTokenResp} import org.apache.kyuubi.util.KyuubiHadoopUtils import org.apache.kyuubi.util.reflect.DynConstructors diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecutePython.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecutePython.scala index badd835301a..f60b1d4c899 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecutePython.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecutePython.scala @@ -28,8 +28,6 @@ import javax.ws.rs.core.UriBuilder import scala.collection.JavaConverters._ -import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.module.scala.DefaultScalaModule import org.apache.commons.lang3.StringUtils import org.apache.spark.SparkFiles import org.apache.spark.api.python.KyuubiPythonGatewayServer @@ -37,9 +35,11 @@ import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.types.StructType import org.apache.kyuubi.{KyuubiSQLException, Logging, Utils} -import org.apache.kyuubi.config.KyuubiConf.{ENGINE_SPARK_PYTHON_ENV_ARCHIVE, ENGINE_SPARK_PYTHON_ENV_ARCHIVE_EXEC_PATH, ENGINE_SPARK_PYTHON_HOME_ARCHIVE} +import org.apache.kyuubi.config.KyuubiConf.{ENGINE_SPARK_PYTHON_ENV_ARCHIVE, ENGINE_SPARK_PYTHON_ENV_ARCHIVE_EXEC_PATH, ENGINE_SPARK_PYTHON_HOME_ARCHIVE, ENGINE_SPARK_PYTHON_MAGIC_ENABLED} +import org.apache.kyuubi.config.KyuubiConf.EngineSparkOutputMode.{AUTO, EngineSparkOutputMode, NOTEBOOK} import org.apache.kyuubi.config.KyuubiReservedKeys.{KYUUBI_SESSION_USER_KEY, KYUUBI_STATEMENT_ID_KEY} import org.apache.kyuubi.engine.spark.KyuubiSparkUtil._ +import org.apache.kyuubi.engine.spark.util.JsonUtils import org.apache.kyuubi.operation.{ArrayFetchIterator, OperationHandle, OperationState} import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session @@ -87,7 +87,7 @@ class ExecutePython( val response = worker.runCode(statement) val status = response.map(_.content.status).getOrElse("UNKNOWN_STATUS") if (PythonResponse.OK_STATUS.equalsIgnoreCase(status)) { - val output = response.map(_.content.getOutput()).getOrElse("") + val output = response.map(_.content.getOutput(outputMode)).getOrElse("") val ename = response.map(_.content.getEname()).getOrElse("") val evalue = response.map(_.content.getEvalue()).getOrElse("") val traceback = response.map(_.content.getTraceback()).getOrElse(Seq.empty) @@ -95,7 +95,8 @@ class ExecutePython( new ArrayFetchIterator[Row](Array(Row(output, status, ename, evalue, traceback))) setState(OperationState.FINISHED) } else { - throw KyuubiSQLException(s"Interpret error:\n$statement\n $response") + throw KyuubiSQLException(s"Interpret error:\n" + + s"${JsonUtils.toPrettyJson(Map("code" -> statement, "response" -> response.orNull))}") } } } catch { @@ -200,12 +201,12 @@ case class SessionPythonWorker( throw KyuubiSQLException("Python worker process has been exited, please check the error log" + " and re-create the session to run python code.") } - val input = ExecutePython.toJson(Map("code" -> code, "cmd" -> "run_code")) + val input = JsonUtils.toJson(Map("code" -> code, "cmd" -> "run_code")) // scalastyle:off println stdin.println(input) // scalastyle:on stdin.flush() - val pythonResponse = Option(stdout.readLine()).map(ExecutePython.fromJson[PythonResponse](_)) + val pythonResponse = Option(stdout.readLine()).map(JsonUtils.fromJson[PythonResponse](_)) // throw exception if internal python code fail if (internal && !pythonResponse.map(_.content.status).contains(PythonResponse.OK_STATUS)) { throw KyuubiSQLException(s"Internal python code $code failure: $pythonResponse") @@ -214,7 +215,7 @@ case class SessionPythonWorker( } def close(): Unit = { - val exitCmd = ExecutePython.toJson(Map("cmd" -> "exit_worker")) + val exitCmd = JsonUtils.toJson(Map("cmd" -> "exit_worker")) // scalastyle:off println stdin.println(exitCmd) // scalastyle:on @@ -233,6 +234,7 @@ object ExecutePython extends Logging { final val PY4J_REGEX = "py4j-[\\S]*.zip$".r final val PY4J_PATH = "PY4J_PATH" final val IS_PYTHON_APP_KEY = "spark.yarn.isPython" + final val MAGIC_ENABLED = "MAGIC_ENABLED" private val isPythonGatewayStart = new AtomicBoolean(false) private val kyuubiPythonPath = Utils.createTempDir() @@ -280,6 +282,7 @@ object ExecutePython extends Logging { } env.put("KYUUBI_SPARK_SESSION_UUID", sessionId) env.put("PYTHON_GATEWAY_CONNECTION_INFO", KyuubiPythonGatewayServer.CONNECTION_FILE_PATH) + env.put(MAGIC_ENABLED, getSessionConf(ENGINE_SPARK_PYTHON_MAGIC_ENABLED, spark).toString) logger.info( s""" |launch python worker command: ${builder.command().asScala.mkString(" ")} @@ -295,10 +298,8 @@ object ExecutePython extends Logging { } def getSparkPythonExecFromArchive(spark: SparkSession, session: Session): Option[String] = { - val pythonEnvArchive = spark.conf.getOption(ENGINE_SPARK_PYTHON_ENV_ARCHIVE.key) - .orElse(session.sessionManager.getConf.get(ENGINE_SPARK_PYTHON_ENV_ARCHIVE)) - val pythonEnvExecPath = spark.conf.getOption(ENGINE_SPARK_PYTHON_ENV_ARCHIVE_EXEC_PATH.key) - .getOrElse(session.sessionManager.getConf.get(ENGINE_SPARK_PYTHON_ENV_ARCHIVE_EXEC_PATH)) + val pythonEnvArchive = getSessionConf(ENGINE_SPARK_PYTHON_ENV_ARCHIVE, spark) + val pythonEnvExecPath = getSessionConf(ENGINE_SPARK_PYTHON_ENV_ARCHIVE_EXEC_PATH, spark) pythonEnvArchive.map { archive => var uri = new URI(archive) @@ -311,8 +312,7 @@ object ExecutePython extends Logging { } def getSparkPythonHomeFromArchive(spark: SparkSession, session: Session): Option[String] = { - val pythonHomeArchive = spark.conf.getOption(ENGINE_SPARK_PYTHON_HOME_ARCHIVE.key) - .orElse(session.sessionManager.getConf.get(ENGINE_SPARK_PYTHON_HOME_ARCHIVE)) + val pythonHomeArchive = getSessionConf(ENGINE_SPARK_PYTHON_HOME_ARCHIVE, spark) pythonHomeArchive.map { archive => var uri = new URI(archive) @@ -388,19 +388,6 @@ object ExecutePython extends Logging { sink.close() file } - - val mapper: ObjectMapper = new ObjectMapper().registerModule(DefaultScalaModule) - def toJson[T](obj: T): String = { - mapper.writeValueAsString(obj) - } - def fromJson[T](json: String, clz: Class[T]): T = { - mapper.readValue(json, clz) - } - - def fromJson[T](json: String)(implicit m: Manifest[T]): T = { - mapper.readValue(json, m.runtimeClass).asInstanceOf[T] - } - } case class PythonResponse( @@ -412,15 +399,28 @@ object PythonResponse { } case class PythonResponseContent( - data: Map[String, String], + data: Map[String, Object], ename: String, evalue: String, traceback: Seq[String], status: String) { - def getOutput(): String = { - Option(data) - .map(_.getOrElse("text/plain", "")) - .getOrElse("") + def getOutput(outputMode: EngineSparkOutputMode): String = { + if (data == null) return "" + + outputMode match { + case AUTO => + // If data does not contains field other than `test/plain`, keep backward compatibility, + // otherwise, return all the data. + if (data.filterNot(_._1 == "text/plain").isEmpty) { + data.get("text/plain").map { + case str: String => str + case obj => JsonUtils.toJson(obj) + }.getOrElse("") + } else { + JsonUtils.toJson(data) + } + case NOTEBOOK => JsonUtils.toJson(data) + } } def getEname(): String = { Option(ename).getOrElse("") diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteScala.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteScala.scala index 691c4fb32d3..092e6e8241c 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteScala.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteScala.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.types.StructType import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.engine.spark.KyuubiSparkUtil._ import org.apache.kyuubi.engine.spark.repl.KyuubiSparkILoop +import org.apache.kyuubi.engine.spark.util.JsonUtils import org.apache.kyuubi.operation.{ArrayFetchIterator, OperationHandle, OperationState} import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session @@ -119,7 +120,8 @@ class ExecuteScala( } } case Error => - throw KyuubiSQLException(s"Interpret error:\n$statement\n ${repl.getOutput}") + throw KyuubiSQLException(s"Interpret error:\n" + + s"${JsonUtils.toPrettyJson(Map("code" -> statement, "response" -> repl.getOutput))}") case Incomplete => throw KyuubiSQLException(s"Incomplete code:\n$statement") } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteStatement.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteStatement.scala index 17d8a741269..8b47e2075a0 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteStatement.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/ExecuteStatement.scala @@ -19,14 +19,16 @@ package org.apache.kyuubi.engine.spark.operation import java.util.concurrent.RejectedExecutionException +import scala.Array._ import scala.collection.JavaConverters._ +import org.apache.hadoop.fs.Path import org.apache.spark.sql.DataFrame import org.apache.spark.sql.kyuubi.SparkDatasetHelper._ import org.apache.spark.sql.types._ import org.apache.kyuubi.{KyuubiSQLException, Logging} -import org.apache.kyuubi.config.KyuubiConf.OPERATION_RESULT_MAX_ROWS +import org.apache.kyuubi.config.KyuubiConf.{OPERATION_RESULT_MAX_ROWS, OPERATION_RESULT_SAVE_TO_FILE, OPERATION_RESULT_SAVE_TO_FILE_DIR, OPERATION_RESULT_SAVE_TO_FILE_MINSIZE} import org.apache.kyuubi.engine.spark.KyuubiSparkUtil._ import org.apache.kyuubi.engine.spark.session.SparkSessionImpl import org.apache.kyuubi.operation.{ArrayFetchIterator, FetchIterator, IterableFetchIterator, OperationHandle, OperationState} @@ -46,6 +48,8 @@ class ExecuteStatement( override def getOperationLog: Option[OperationLog] = Option(operationLog) override protected def supportProgress: Boolean = true + private var fetchOrcStatement: Option[FetchOrcStatement] = None + private var saveFileName: Option[String] = None override protected def resultSchema: StructType = { if (result == null || result.schema.isEmpty) { new StructType().add("Result", "string") @@ -64,6 +68,15 @@ class ExecuteStatement( OperationLog.removeCurrentOperationLog() } + override def close(): Unit = { + super.close() + fetchOrcStatement.foreach(_.close()) + saveFileName.foreach { p => + val path = new Path(p) + path.getFileSystem(spark.sparkContext.hadoopConfiguration).delete(path, true) + } + } + protected def incrementalCollectResult(resultDF: DataFrame): Iterator[Any] = { resultDF.toLocalIterator().asScala } @@ -148,8 +161,7 @@ class ExecuteStatement( s"__kyuubi_operation_result_arrow_timestampAsString__=$timestampAsString") private def collectAsIterator(resultDF: DataFrame): FetchIterator[_] = { - val resultMaxRows = spark.conf.getOption(OPERATION_RESULT_MAX_ROWS.key).map(_.toInt) - .getOrElse(session.sessionManager.getConf.get(OPERATION_RESULT_MAX_ROWS)) + val resultMaxRows: Int = getSessionConf(OPERATION_RESULT_MAX_ROWS, spark) if (incrementalCollect) { if (resultMaxRows > 0) { warn(s"Ignore ${OPERATION_RESULT_MAX_ROWS.key} on incremental collect mode.") @@ -159,6 +171,31 @@ class ExecuteStatement( override def iterator: Iterator[Any] = incrementalCollectResult(resultDF) }) } else { + val resultSaveEnabled = getSessionConf(OPERATION_RESULT_SAVE_TO_FILE, spark) + lazy val resultSaveThreshold = getSessionConf(OPERATION_RESULT_SAVE_TO_FILE_MINSIZE, spark) + if (hasResultSet && resultSaveEnabled && shouldSaveResultToFs( + resultMaxRows, + resultSaveThreshold, + result)) { + val sessionId = session.handle.identifier.toString + val savePath = session.sessionManager.getConf.get(OPERATION_RESULT_SAVE_TO_FILE_DIR) + saveFileName = Some(s"$savePath/$engineId/$sessionId/$statementId") + // Rename all col name to avoid duplicate columns + val colName = range(0, result.schema.size).map(x => "col" + x) + + val codec = if (SPARK_ENGINE_RUNTIME_VERSION >= "3.2") "zstd" else "zlib" + // df.write will introduce an extra shuffle for the outermost limit, and hurt performance + if (resultMaxRows > 0) { + result.toDF(colName: _*).limit(resultMaxRows).write + .option("compression", codec).format("orc").save(saveFileName.get) + } else { + result.toDF(colName: _*).write + .option("compression", codec).format("orc").save(saveFileName.get) + } + info(s"Save result to $saveFileName") + fetchOrcStatement = Some(new FetchOrcStatement(spark)) + return fetchOrcStatement.get.getIterator(saveFileName.get, resultSchema) + } val internalArray = if (resultMaxRows <= 0) { info("Execute in full collect mode") fullCollectResult(resultDF) diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/FetchOrcStatement.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/FetchOrcStatement.scala new file mode 100644 index 00000000000..861539b95b2 --- /dev/null +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/FetchOrcStatement.scala @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.spark.operation + +import scala.Array._ +import scala.collection.mutable.ListBuffer + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{LocatedFileStatus, Path} +import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType} +import org.apache.hadoop.mapreduce.lib.input.FileSplit +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl +import org.apache.orc.mapred.OrcStruct +import org.apache.orc.mapreduce.OrcInputFormat +import org.apache.spark.sql.{Row, SparkSession} +import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} +import org.apache.spark.sql.catalyst.expressions.AttributeReference +import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection +import org.apache.spark.sql.execution.datasources.RecordReaderIterator +import org.apache.spark.sql.execution.datasources.orc.OrcDeserializer +import org.apache.spark.sql.types.StructType + +import org.apache.kyuubi.KyuubiException +import org.apache.kyuubi.engine.spark.KyuubiSparkUtil.SPARK_ENGINE_RUNTIME_VERSION +import org.apache.kyuubi.operation.{FetchIterator, IterableFetchIterator} +import org.apache.kyuubi.util.reflect.DynConstructors + +class FetchOrcStatement(spark: SparkSession) { + + var orcIter: OrcFileIterator = _ + def getIterator(path: String, orcSchema: StructType): FetchIterator[Row] = { + val conf = spark.sparkContext.hadoopConfiguration + val savePath = new Path(path) + val fsIterator = savePath.getFileSystem(conf).listFiles(savePath, false) + val list = new ListBuffer[LocatedFileStatus] + while (fsIterator.hasNext) { + val file = fsIterator.next() + if (file.getPath.getName.endsWith(".orc") && file.getLen > 0) { + list += file + } + } + val toRowConverter: InternalRow => Row = { + CatalystTypeConverters.createToScalaConverter(orcSchema) + .asInstanceOf[InternalRow => Row] + } + val colId = range(0, orcSchema.size) + val fullSchema = orcSchema.map(f => + AttributeReference(f.name, f.dataType, f.nullable, f.metadata)()) + val unsafeProjection = GenerateUnsafeProjection.generate(fullSchema, fullSchema) + val deserializer = getOrcDeserializer(orcSchema, colId) + orcIter = new OrcFileIterator(list) + val iterRow = orcIter.map(value => + unsafeProjection(deserializer.deserialize(value))) + .map(value => toRowConverter(value)) + new IterableFetchIterator[Row](iterRow.toIterable) + } + + def close(): Unit = { + orcIter.close() + } + + private def getOrcDeserializer(orcSchema: StructType, colId: Array[Int]): OrcDeserializer = { + try { + if (SPARK_ENGINE_RUNTIME_VERSION >= "3.2") { + // SPARK-34535 changed the constructor signature of OrcDeserializer + DynConstructors.builder() + .impl(classOf[OrcDeserializer], classOf[StructType], classOf[Array[Int]]) + .build[OrcDeserializer]() + .newInstance( + orcSchema, + colId) + } else { + DynConstructors.builder() + .impl( + classOf[OrcDeserializer], + classOf[StructType], + classOf[StructType], + classOf[Array[Int]]) + .build[OrcDeserializer]() + .newInstance( + new StructType, + orcSchema, + colId) + } + } catch { + case e: Throwable => + throw new KyuubiException("Failed to create OrcDeserializer", e) + } + } +} + +class OrcFileIterator(fileList: ListBuffer[LocatedFileStatus]) extends Iterator[OrcStruct] { + + private val iters = fileList.map(x => getOrcFileIterator(x)) + + var idx = 0 + + override def hasNext: Boolean = { + val hasNext = iters(idx).hasNext + if (!hasNext) { + iters(idx).close() + idx += 1 + // skip empty file + while (idx < iters.size) { + if (iters(idx).hasNext) { + return true + } else { + iters(idx).close() + idx = idx + 1 + } + } + } + hasNext + } + + override def next(): OrcStruct = { + iters(idx).next() + } + + def close(): Unit = { + iters.foreach(_.close()) + } + + private def getOrcFileIterator(file: LocatedFileStatus): RecordReaderIterator[OrcStruct] = { + val orcRecordReader = { + val split = + new FileSplit(file.getPath, 0, file.getLen, Array.empty[String]) + val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0) + val hadoopAttemptContext = + new TaskAttemptContextImpl(new Configuration(), attemptId) + val oif = new OrcInputFormat[OrcStruct] + oif.createRecordReader(split, hadoopAttemptContext) + } + new RecordReaderIterator[OrcStruct](orcRecordReader) + } +} diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/GetTables.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/GetTables.scala index 980e4fdb173..75ce9492176 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/GetTables.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/GetTables.scala @@ -20,6 +20,7 @@ package org.apache.kyuubi.engine.spark.operation import org.apache.spark.sql.types.StructType import org.apache.kyuubi.config.KyuubiConf.OPERATION_GET_TABLES_IGNORE_TABLE_PROPERTIES +import org.apache.kyuubi.engine.spark.KyuubiSparkUtil.getSessionConf import org.apache.kyuubi.engine.spark.util.SparkCatalogUtils import org.apache.kyuubi.operation.IterableFetchIterator import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ @@ -34,10 +35,7 @@ class GetTables( extends SparkOperation(session) { protected val ignoreTableProperties = - spark.conf.getOption(OPERATION_GET_TABLES_IGNORE_TABLE_PROPERTIES.key) match { - case Some(s) => s.toBoolean - case _ => session.sessionManager.getConf.get(OPERATION_GET_TABLES_IGNORE_TABLE_PROPERTIES) - } + getSessionConf(OPERATION_GET_TABLES_IGNORE_TABLE_PROPERTIES, spark) override def statement: String = { super.statement + diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/PlanOnlyStatement.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/PlanOnlyStatement.scala index 4f88083130a..f2a67047196 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/PlanOnlyStatement.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/PlanOnlyStatement.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.types.StructType import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.config.KyuubiConf.{LINEAGE_PARSER_PLUGIN_PROVIDER, OPERATION_PLAN_ONLY_EXCLUDES, OPERATION_PLAN_ONLY_OUT_STYLE} +import org.apache.kyuubi.engine.spark.KyuubiSparkUtil.getSessionConf import org.apache.kyuubi.operation.{AnalyzeMode, ArrayFetchIterator, ExecutionMode, IterableFetchIterator, JsonStyle, LineageMode, OperationHandle, OptimizeMode, OptimizeWithStatsMode, ParseMode, PhysicalMode, PlainStyle, PlanOnlyMode, PlanOnlyStyle, UnknownMode, UnknownStyle} import org.apache.kyuubi.operation.PlanOnlyMode.{notSupportedModeError, unknownModeError} import org.apache.kyuubi.operation.PlanOnlyStyle.{notSupportedStyleError, unknownStyleError} @@ -49,9 +50,7 @@ class PlanOnlyStatement( .getOrElse(session.sessionManager.getConf.get(OPERATION_PLAN_ONLY_EXCLUDES)) } - private val style = PlanOnlyStyle.fromString(spark.conf.get( - OPERATION_PLAN_ONLY_OUT_STYLE.key, - session.sessionManager.getConf.get(OPERATION_PLAN_ONLY_OUT_STYLE))) + private val style = PlanOnlyStyle.fromString(getSessionConf(OPERATION_PLAN_ONLY_OUT_STYLE, spark)) spark.conf.set(OPERATION_PLAN_ONLY_OUT_STYLE.key, style.name) override def getOperationLog: Option[OperationLog] = Option(operationLog) @@ -74,7 +73,6 @@ class PlanOnlyStatement( withLocalProperties { SQLConf.withExistingConf(spark.sessionState.conf) { val parsed = spark.sessionState.sqlParser.parsePlan(statement) - parsed match { case cmd if planExcludes.contains(cmd.getClass.getSimpleName) => result = spark.sql(statement) diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala index 1de360f0715..88ebc306b66 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala @@ -20,21 +20,20 @@ package org.apache.kyuubi.engine.spark.operation import java.io.IOException import java.time.ZoneId -import org.apache.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp, TProgressUpdateResp, TRowSet} import org.apache.spark.kyuubi.{SparkProgressMonitor, SQLOperationListener} import org.apache.spark.kyuubi.SparkUtilsHelper.redact import org.apache.spark.sql.{DataFrame, Row, SparkSession} import org.apache.spark.sql.execution.SQLExecution -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{BinaryType, StructField, StructType} import org.apache.kyuubi.{KyuubiSQLException, Utils} import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.config.KyuubiConf.{OPERATION_SPARK_LISTENER_ENABLED, SESSION_PROGRESS_ENABLE, SESSION_USER_SIGN_ENABLED} +import org.apache.kyuubi.config.KyuubiConf.{ARROW_BASED_ROWSET_TIMESTAMP_AS_STRING, ENGINE_SPARK_OUTPUT_MODE, EngineSparkOutputMode, OPERATION_SPARK_LISTENER_ENABLED, SESSION_PROGRESS_ENABLE, SESSION_USER_SIGN_ENABLED} import org.apache.kyuubi.config.KyuubiReservedKeys.{KYUUBI_SESSION_SIGN_PUBLICKEY, KYUUBI_SESSION_USER_KEY, KYUUBI_SESSION_USER_SIGN, KYUUBI_STATEMENT_ID_KEY} -import org.apache.kyuubi.engine.spark.KyuubiSparkUtil.SPARK_SCHEDULER_POOL_KEY +import org.apache.kyuubi.engine.spark.KyuubiSparkUtil.{getSessionConf, SPARK_SCHEDULER_POOL_KEY} import org.apache.kyuubi.engine.spark.events.SparkOperationEvent import org.apache.kyuubi.engine.spark.operation.SparkOperation.TIMEZONE_KEY -import org.apache.kyuubi.engine.spark.schema.{RowSet, SchemaHelper} +import org.apache.kyuubi.engine.spark.schema.{SchemaHelper, SparkArrowTRowSetGenerator, SparkTRowSetGenerator} import org.apache.kyuubi.engine.spark.session.SparkSessionImpl import org.apache.kyuubi.events.EventBus import org.apache.kyuubi.operation.{AbstractOperation, FetchIterator, OperationState, OperationStatus} @@ -42,6 +41,8 @@ import org.apache.kyuubi.operation.FetchOrientation._ import org.apache.kyuubi.operation.OperationState.OperationState import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp, TProgressUpdateResp, TRowSet} +import org.apache.kyuubi.util.ThriftUtils abstract class SparkOperation(session: Session) extends AbstractOperation(session) { @@ -63,11 +64,8 @@ abstract class SparkOperation(session: Session) override def redactedStatement: String = redact(spark.sessionState.conf.stringRedactionPattern, statement) - protected val operationSparkListenerEnabled = - spark.conf.getOption(OPERATION_SPARK_LISTENER_ENABLED.key) match { - case Some(s) => s.toBoolean - case _ => session.sessionManager.getConf.get(OPERATION_SPARK_LISTENER_ENABLED) - } + protected val operationSparkListenerEnabled: Boolean = + getSessionConf(OPERATION_SPARK_LISTENER_ENABLED, spark) protected val operationListener: Option[SQLOperationListener] = if (operationSparkListenerEnabled) { @@ -80,13 +78,13 @@ abstract class SparkOperation(session: Session) operationListener.foreach(spark.sparkContext.addSparkListener(_)) } - private val progressEnable = spark.conf.getOption(SESSION_PROGRESS_ENABLE.key) match { - case Some(s) => s.toBoolean - case _ => session.sessionManager.getConf.get(SESSION_PROGRESS_ENABLE) - } + private val progressEnable: Boolean = getSessionConf(SESSION_PROGRESS_ENABLE, spark) protected def supportProgress: Boolean = false + protected def outputMode: EngineSparkOutputMode.EngineSparkOutputMode = + EngineSparkOutputMode.withName(getSessionConf(ENGINE_SPARK_OUTPUT_MODE, spark)) + override def getStatus: OperationStatus = { if (progressEnable && supportProgress) { val progressMonitor = new SparkProgressMonitor(spark, statementId) @@ -113,9 +111,7 @@ abstract class SparkOperation(session: Session) protected val forceCancel = session.sessionManager.getConf.get(KyuubiConf.OPERATION_FORCE_CANCEL) - protected val schedulerPool = - spark.conf.getOption(KyuubiConf.OPERATION_SCHEDULER_POOL.key).orElse( - session.sessionManager.getConf.get(KyuubiConf.OPERATION_SCHEDULER_POOL)) + protected val schedulerPool = getSessionConf(KyuubiConf.OPERATION_SCHEDULER_POOL, spark) protected val isSessionUserSignEnabled: Boolean = spark.sparkContext.getConf.getBoolean( s"spark.${SESSION_USER_SIGN_ENABLED.key}", @@ -251,13 +247,16 @@ abstract class SparkOperation(session: Session) if (isArrowBasedOperation) { if (iter.hasNext) { val taken = iter.next().asInstanceOf[Array[Byte]] - RowSet.toTRowSet(taken, getProtocolVersion) + new SparkArrowTRowSetGenerator().toTRowSet( + Seq(taken), + new StructType().add(StructField(null, BinaryType)), + getProtocolVersion) } else { - RowSet.emptyTRowSet() + ThriftUtils.newEmptyRowSet } } else { val taken = iter.take(rowSetSize) - RowSet.toTRowSet( + new SparkTRowSetGenerator().toTRowSet( taken.toSeq.asInstanceOf[Seq[Row]], resultSchema, getProtocolVersion) @@ -279,7 +278,7 @@ abstract class SparkOperation(session: Session) protected def resultFormat: String = "thrift" protected def timestampAsString: Boolean = { - spark.conf.get("kyuubi.operation.result.arrow.timestampAsString", "false").toBoolean + spark.conf.get(ARROW_BASED_ROWSET_TIMESTAMP_AS_STRING.key, "false").toBoolean } protected def setSessionUserSign(): Unit = { diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkSQLOperationManager.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkSQLOperationManager.scala index ab082874630..cd365c62a6f 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkSQLOperationManager.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkSQLOperationManager.scala @@ -86,7 +86,7 @@ class SparkSQLOperationManager private (name: String) extends OperationManager(n val incrementalCollect = spark.conf.getOption(OPERATION_INCREMENTAL_COLLECT.key) .map(_.toBoolean).getOrElse(operationIncrementalCollectDefault) // TODO: respect the config of the operation ExecuteStatement, if it was set. - val resultFormat = spark.conf.get("kyuubi.operation.result.format", "thrift") + val resultFormat = spark.conf.get(OPERATION_RESULT_FORMAT.key, "thrift") resultFormat.toLowerCase match { case "arrow" => new ArrowBasedExecuteStatement( diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala index 4f935ce49f0..c5f32210891 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/RowSet.scala @@ -17,231 +17,17 @@ package org.apache.kyuubi.engine.spark.schema -import java.nio.ByteBuffer - -import scala.collection.JavaConverters._ - -import org.apache.hive.service.rpc.thrift._ -import org.apache.spark.sql.Row import org.apache.spark.sql.execution.HiveResult +import org.apache.spark.sql.execution.HiveResult.TimeFormatters import org.apache.spark.sql.types._ -import org.apache.kyuubi.util.RowSetUtils._ - object RowSet { - def toHiveString(valueAndType: (Any, DataType), nested: Boolean = false): String = { - // compatible w/ Spark 3.1 and above - val timeFormatters = HiveResult.getTimeFormatters + def toHiveString( + valueAndType: (Any, DataType), + nested: Boolean = false, + timeFormatters: TimeFormatters): String = { HiveResult.toHiveString(valueAndType, nested, timeFormatters) } - def toTRowSet( - bytes: Array[Byte], - protocolVersion: TProtocolVersion): TRowSet = { - if (protocolVersion.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) { - throw new UnsupportedOperationException - } else { - toColumnBasedSet(bytes) - } - } - - def emptyTRowSet(): TRowSet = { - new TRowSet(0, new java.util.ArrayList[TRow](0)) - } - - def toColumnBasedSet(data: Array[Byte]): TRowSet = { - val tRowSet = new TRowSet(0, new java.util.ArrayList[TRow](1)) - val tColumn = toTColumn(data) - tRowSet.addToColumns(tColumn) - tRowSet - } - - def toTRowSet( - rows: Seq[Row], - schema: StructType, - protocolVersion: TProtocolVersion): TRowSet = { - if (protocolVersion.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) { - toRowBasedSet(rows, schema) - } else { - toColumnBasedSet(rows, schema) - } - } - - def toRowBasedSet(rows: Seq[Row], schema: StructType): TRowSet = { - val rowSize = rows.length - val tRows = new java.util.ArrayList[TRow](rowSize) - var i = 0 - while (i < rowSize) { - val row = rows(i) - val tRow = new TRow() - var j = 0 - val columnSize = row.length - while (j < columnSize) { - val columnValue = toTColumnValue(j, row, schema) - tRow.addToColVals(columnValue) - j += 1 - } - i += 1 - tRows.add(tRow) - } - new TRowSet(0, tRows) - } - - def toColumnBasedSet(rows: Seq[Row], schema: StructType): TRowSet = { - val rowSize = rows.length - val tRowSet = new TRowSet(0, new java.util.ArrayList[TRow](rowSize)) - var i = 0 - val columnSize = schema.length - while (i < columnSize) { - val field = schema(i) - val tColumn = toTColumn(rows, i, field.dataType) - tRowSet.addToColumns(tColumn) - i += 1 - } - tRowSet - } - - private def toTColumn(rows: Seq[Row], ordinal: Int, typ: DataType): TColumn = { - val nulls = new java.util.BitSet() - typ match { - case BooleanType => - val values = getOrSetAsNull[java.lang.Boolean](rows, ordinal, nulls, true) - TColumn.boolVal(new TBoolColumn(values, nulls)) - - case ByteType => - val values = getOrSetAsNull[java.lang.Byte](rows, ordinal, nulls, 0.toByte) - TColumn.byteVal(new TByteColumn(values, nulls)) - - case ShortType => - val values = getOrSetAsNull[java.lang.Short](rows, ordinal, nulls, 0.toShort) - TColumn.i16Val(new TI16Column(values, nulls)) - - case IntegerType => - val values = getOrSetAsNull[java.lang.Integer](rows, ordinal, nulls, 0) - TColumn.i32Val(new TI32Column(values, nulls)) - - case LongType => - val values = getOrSetAsNull[java.lang.Long](rows, ordinal, nulls, 0L) - TColumn.i64Val(new TI64Column(values, nulls)) - - case FloatType => - val values = getOrSetAsNull[java.lang.Float](rows, ordinal, nulls, 0.toFloat) - .asScala.map(n => java.lang.Double.valueOf(n.toString)).asJava - TColumn.doubleVal(new TDoubleColumn(values, nulls)) - - case DoubleType => - val values = getOrSetAsNull[java.lang.Double](rows, ordinal, nulls, 0.toDouble) - TColumn.doubleVal(new TDoubleColumn(values, nulls)) - - case StringType => - val values = getOrSetAsNull[java.lang.String](rows, ordinal, nulls, "") - TColumn.stringVal(new TStringColumn(values, nulls)) - - case BinaryType => - val values = getOrSetAsNull[Array[Byte]](rows, ordinal, nulls, Array()) - .asScala - .map(ByteBuffer.wrap) - .asJava - TColumn.binaryVal(new TBinaryColumn(values, nulls)) - - case _ => - var i = 0 - val rowSize = rows.length - val values = new java.util.ArrayList[String](rowSize) - while (i < rowSize) { - val row = rows(i) - nulls.set(i, row.isNullAt(ordinal)) - values.add(toHiveString(row.get(ordinal) -> typ)) - i += 1 - } - TColumn.stringVal(new TStringColumn(values, nulls)) - } - } - - private def getOrSetAsNull[T]( - rows: Seq[Row], - ordinal: Int, - nulls: java.util.BitSet, - defaultVal: T): java.util.List[T] = { - val size = rows.length - val ret = new java.util.ArrayList[T](size) - var idx = 0 - while (idx < size) { - val row = rows(idx) - val isNull = row.isNullAt(ordinal) - if (isNull) { - nulls.set(idx, true) - ret.add(idx, defaultVal) - } else { - ret.add(idx, row.getAs[T](ordinal)) - } - idx += 1 - } - ret - } - - private def toTColumnValue( - ordinal: Int, - row: Row, - types: StructType): TColumnValue = { - types(ordinal).dataType match { - case BooleanType => - val boolValue = new TBoolValue - if (!row.isNullAt(ordinal)) boolValue.setValue(row.getBoolean(ordinal)) - TColumnValue.boolVal(boolValue) - - case ByteType => - val byteValue = new TByteValue - if (!row.isNullAt(ordinal)) byteValue.setValue(row.getByte(ordinal)) - TColumnValue.byteVal(byteValue) - - case ShortType => - val tI16Value = new TI16Value - if (!row.isNullAt(ordinal)) tI16Value.setValue(row.getShort(ordinal)) - TColumnValue.i16Val(tI16Value) - - case IntegerType => - val tI32Value = new TI32Value - if (!row.isNullAt(ordinal)) tI32Value.setValue(row.getInt(ordinal)) - TColumnValue.i32Val(tI32Value) - - case LongType => - val tI64Value = new TI64Value - if (!row.isNullAt(ordinal)) tI64Value.setValue(row.getLong(ordinal)) - TColumnValue.i64Val(tI64Value) - - case FloatType => - val tDoubleValue = new TDoubleValue - if (!row.isNullAt(ordinal)) { - val doubleValue = java.lang.Double.valueOf(row.getFloat(ordinal).toString) - tDoubleValue.setValue(doubleValue) - } - TColumnValue.doubleVal(tDoubleValue) - - case DoubleType => - val tDoubleValue = new TDoubleValue - if (!row.isNullAt(ordinal)) tDoubleValue.setValue(row.getDouble(ordinal)) - TColumnValue.doubleVal(tDoubleValue) - - case StringType => - val tStringValue = new TStringValue - if (!row.isNullAt(ordinal)) tStringValue.setValue(row.getString(ordinal)) - TColumnValue.stringVal(tStringValue) - - case _ => - val tStrValue = new TStringValue - if (!row.isNullAt(ordinal)) { - tStrValue.setValue(toHiveString(row.get(ordinal) -> types(ordinal).dataType)) - } - TColumnValue.stringVal(tStrValue) - } - } - - private def toTColumn(data: Array[Byte]): TColumn = { - val values = new java.util.ArrayList[ByteBuffer](1) - values.add(ByteBuffer.wrap(data)) - val nulls = new java.util.BitSet() - TColumn.binaryVal(new TBinaryColumn(values, nulls)) - } } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/SchemaHelper.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/SchemaHelper.scala index 8db46e2b7f4..3da5937015c 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/SchemaHelper.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/SchemaHelper.scala @@ -21,9 +21,10 @@ import java.util.Collections import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift._ import org.apache.spark.sql.types._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ + object SchemaHelper { /** @@ -140,6 +141,8 @@ object SchemaHelper { case dt if Array(TIMESTAMP_NTZ, DAY_TIME_INTERVAL, YEAR_MONTH_INTERVAL) .contains(dt.getClass.getSimpleName) => Some(dt.defaultSize) + case dt: DecimalType => + Some(dt.precision) case dt @ (BooleanType | _: NumericType | DateType | TimestampType | CalendarIntervalType | NullType) => Some(dt.defaultSize) diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/SparkArrowTRowSetGenerator.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/SparkArrowTRowSetGenerator.scala new file mode 100644 index 00000000000..054df0dd653 --- /dev/null +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/SparkArrowTRowSetGenerator.scala @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.spark.schema + +import java.nio.ByteBuffer + +import org.apache.spark.sql.types._ + +import org.apache.kyuubi.engine.result.TRowSetGenerator +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ + +class SparkArrowTRowSetGenerator + extends TRowSetGenerator[StructType, Array[Byte], DataType] { + override def toColumnBasedSet(rows: Seq[Array[Byte]], schema: StructType): TRowSet = { + require(schema.length == 1, "ArrowRowSetGenerator accepts only one single byte array") + require(schema.head.dataType == BinaryType, "ArrowRowSetGenerator accepts only BinaryType") + + val tRowSet = new TRowSet(0, new java.util.ArrayList[TRow](1)) + val tColumn = toTColumn(rows, 1, schema.head.dataType) + tRowSet.addToColumns(tColumn) + tRowSet + } + + override def toTColumn(rows: Seq[Array[Byte]], ordinal: Int, typ: DataType): TColumn = { + require(rows.length == 1, "ArrowRowSetGenerator accepts only one single byte array") + typ match { + case BinaryType => + val values = new java.util.ArrayList[ByteBuffer](1) + values.add(ByteBuffer.wrap(rows.head)) + TColumn.binaryVal(new TBinaryColumn(values, ByteBuffer.wrap(Array[Byte]()))) + case _ => throw new IllegalArgumentException( + s"unsupported datatype $typ, ArrowRowSetGenerator accepts only BinaryType") + } + } + + override def toRowBasedSet(rows: Seq[Array[Byte]], schema: StructType): TRowSet = { + throw new UnsupportedOperationException + } + + override def getColumnSizeFromSchemaType(schema: StructType): Int = { + throw new UnsupportedOperationException + } + + override def getColumnType(schema: StructType, ordinal: Int): DataType = { + throw new UnsupportedOperationException + } + + override def isColumnNullAt(row: Array[Byte], ordinal: Int): Boolean = { + throw new UnsupportedOperationException + } + + override def getColumnAs[T](row: Array[Byte], ordinal: Int): T = { + throw new UnsupportedOperationException + } + + override def toTColumnValue(row: Array[Byte], ordinal: Int, types: StructType): TColumnValue = { + throw new UnsupportedOperationException + } + +} diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/SparkTRowSetGenerator.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/SparkTRowSetGenerator.scala new file mode 100644 index 00000000000..1d1b5ef6aab --- /dev/null +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/schema/SparkTRowSetGenerator.scala @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.spark.schema + +import org.apache.spark.sql.Row +import org.apache.spark.sql.execution.HiveResult +import org.apache.spark.sql.types._ + +import org.apache.kyuubi.engine.result.TRowSetGenerator +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ + +class SparkTRowSetGenerator + extends TRowSetGenerator[StructType, Row, DataType] { + + // reused time formatters in single RowSet generation, see KYUUBI-5811 + private val tf = HiveResult.getTimeFormatters + + override def getColumnSizeFromSchemaType(schema: StructType): Int = schema.length + + override def getColumnType(schema: StructType, ordinal: Int): DataType = schema(ordinal).dataType + + override def isColumnNullAt(row: Row, ordinal: Int): Boolean = row.isNullAt(ordinal) + + override def getColumnAs[T](row: Row, ordinal: Int): T = row.getAs[T](ordinal) + + override def toTColumn(rows: Seq[Row], ordinal: Int, typ: DataType): TColumn = { + typ match { + case BooleanType => asBooleanTColumn(rows, ordinal) + case ByteType => asByteTColumn(rows, ordinal) + case ShortType => asShortTColumn(rows, ordinal) + case IntegerType => asIntegerTColumn(rows, ordinal) + case LongType => asLongTColumn(rows, ordinal) + case FloatType => asFloatTColumn(rows, ordinal) + case DoubleType => asDoubleTColumn(rows, ordinal) + case StringType => asStringTColumn(rows, ordinal) + case BinaryType => asByteArrayTColumn(rows, ordinal) + case _ => + val timeFormatters = tf + asStringTColumn( + rows, + ordinal, + "NULL", + (row, ordinal) => + RowSet.toHiveString( + getColumnAs[Any](row, ordinal) -> typ, + timeFormatters = timeFormatters)) + } + } + + override def toTColumnValue(row: Row, ordinal: Int, types: StructType): TColumnValue = { + getColumnType(types, ordinal) match { + case BooleanType => asBooleanTColumnValue(row, ordinal) + case ByteType => asByteTColumnValue(row, ordinal) + case ShortType => asShortTColumnValue(row, ordinal) + case IntegerType => asIntegerTColumnValue(row, ordinal) + case LongType => asLongTColumnValue(row, ordinal) + case FloatType => asFloatTColumnValue(row, ordinal) + case DoubleType => asDoubleTColumnValue(row, ordinal) + case StringType => asStringTColumnValue(row, ordinal) + case _ => asStringTColumnValue( + row, + ordinal, + rawValue => RowSet.toHiveString(rawValue -> types(ordinal).dataType, timeFormatters = tf)) + } + } + +} diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSQLSessionManager.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSQLSessionManager.scala index 79f38ce35a4..aab2d51068f 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSQLSessionManager.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSQLSessionManager.scala @@ -19,7 +19,7 @@ package org.apache.kyuubi.engine.spark.session import java.util.concurrent.{ScheduledExecutorService, TimeUnit} -import org.apache.hive.service.rpc.thrift.TProtocolVersion +import org.apache.hadoop.fs.Path import org.apache.spark.api.python.KyuubiPythonGatewayServer import org.apache.spark.sql.SparkSession @@ -29,9 +29,12 @@ import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.ShareLevel import org.apache.kyuubi.engine.ShareLevel._ import org.apache.kyuubi.engine.spark.{KyuubiSparkUtil, SparkSQLEngine} +import org.apache.kyuubi.engine.spark.KyuubiSparkUtil.engineId import org.apache.kyuubi.engine.spark.operation.SparkSQLOperationManager import org.apache.kyuubi.session._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.util.ThreadUtils +import org.apache.kyuubi.util.ThreadUtils.scheduleTolerableRunnableWithFixedDelay /** * A [[SessionManager]] constructed with [[SparkSession]] which give it the ability to talk with @@ -66,8 +69,9 @@ class SparkSQLSessionManager private (name: String, spark: SparkSession) if (!userIsolatedSparkSession) { userIsolatedSparkSessionThread = Some(ThreadUtils.newDaemonSingleThreadScheduledExecutor("user-isolated-cache-checker")) - userIsolatedSparkSessionThread.foreach { - _.scheduleWithFixedDelay( + userIsolatedSparkSessionThread.foreach { thread => + scheduleTolerableRunnableWithFixedDelay( + thread, () => { userIsolatedCacheLock.synchronized { val iter = userIsolatedCacheCount.entrySet().iterator() @@ -128,7 +132,9 @@ class SparkSQLSessionManager private (name: String, spark: SparkSession) private def newSparkSession(rootSparkSession: SparkSession): SparkSession = { val newSparkSession = rootSparkSession.newSession() - KyuubiSparkUtil.initializeSparkSession(newSparkSession, conf.get(ENGINE_SESSION_INITIALIZE_SQL)) + KyuubiSparkUtil.initializeSparkSession( + newSparkSession, + conf.get(ENGINE_SESSION_SPARK_INITIALIZE_SQL)) newSparkSession } @@ -180,6 +186,12 @@ class SparkSQLSessionManager private (name: String, spark: SparkSession) info("Session stopped due to shared level is Connection.") stopSession() } + if (conf.get(OPERATION_RESULT_SAVE_TO_FILE)) { + val path = new Path(s"${conf.get(OPERATION_RESULT_SAVE_TO_FILE_DIR)}/" + + s"$engineId/${sessionHandle.identifier}") + path.getFileSystem(spark.sparkContext.hadoopConfiguration).delete(path, true) + info(s"Delete session result file $path") + } } private def stopSession(): Unit = { diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSessionImpl.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSessionImpl.scala index 8d9012cbdc6..08bd09b4483 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSessionImpl.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/session/SparkSessionImpl.scala @@ -18,7 +18,6 @@ package org.apache.kyuubi.engine.spark.session import org.apache.commons.lang3.StringUtils -import org.apache.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} import org.apache.spark.sql.{AnalysisException, SparkSession} import org.apache.kyuubi.KyuubiSQLException @@ -30,6 +29,7 @@ import org.apache.kyuubi.engine.spark.util.SparkCatalogUtils import org.apache.kyuubi.events.EventBus import org.apache.kyuubi.operation.{Operation, OperationHandle} import org.apache.kyuubi.session._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} class SparkSessionImpl( protocol: TProtocolVersion, diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/JsonUtils.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/JsonUtils.scala new file mode 100644 index 00000000000..192c6dbb40c --- /dev/null +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/JsonUtils.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.spark.util + +import com.fasterxml.jackson.databind.{DeserializationFeature, JsonNode, ObjectMapper} +import com.fasterxml.jackson.module.scala.DefaultScalaModule + +object JsonUtils { + val mapper: ObjectMapper = new ObjectMapper() + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + .registerModule(DefaultScalaModule) + + def toJson[T](obj: T): String = { + mapper.writeValueAsString(obj) + } + + def toPrettyJson[T](obj: T): String = { + mapper.writerWithDefaultPrettyPrinter().writeValueAsString(obj) + } + + def fromJson[T](json: String, clz: Class[T]): T = { + mapper.readValue(json, clz) + } + + def fromJson[T](json: String)(implicit m: Manifest[T]): T = { + mapper.readValue(json, m.runtimeClass).asInstanceOf[T] + } + + def readTree(content: String): JsonNode = { + mapper.readTree(content) + } +} diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/kyuubi/SQLOperationListener.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/kyuubi/SQLOperationListener.scala index 686cb1f359b..a7d409c7ca5 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/kyuubi/SQLOperationListener.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/kyuubi/SQLOperationListener.scala @@ -27,10 +27,9 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd import org.apache.kyuubi.Logging -import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.{ENGINE_SPARK_SHOW_PROGRESS, ENGINE_SPARK_SHOW_PROGRESS_TIME_FORMAT, ENGINE_SPARK_SHOW_PROGRESS_UPDATE_INTERVAL} import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_STATEMENT_ID_KEY -import org.apache.kyuubi.engine.spark.KyuubiSparkUtil.SPARK_SQL_EXECUTION_ID_KEY +import org.apache.kyuubi.engine.spark.KyuubiSparkUtil.{getSessionConf, SPARK_SQL_EXECUTION_ID_KEY} import org.apache.kyuubi.engine.spark.operation.ExecuteStatement import org.apache.kyuubi.operation.Operation import org.apache.kyuubi.operation.log.OperationLog @@ -50,15 +49,14 @@ class SQLOperationListener( private lazy val activeStages = new ConcurrentHashMap[SparkStageAttempt, SparkStageInfo]() private var executionId: Option[Long] = None - private val conf: KyuubiConf = operation.getSession.sessionManager.getConf private lazy val consoleProgressBar = - if (conf.get(ENGINE_SPARK_SHOW_PROGRESS)) { + if (getSessionConf(ENGINE_SPARK_SHOW_PROGRESS, spark)) { Some(new SparkConsoleProgressBar( operation, activeJobs, activeStages, - conf.get(ENGINE_SPARK_SHOW_PROGRESS_UPDATE_INTERVAL), - conf.get(ENGINE_SPARK_SHOW_PROGRESS_TIME_FORMAT))) + getSessionConf(ENGINE_SPARK_SHOW_PROGRESS_UPDATE_INTERVAL, spark), + getSessionConf(ENGINE_SPARK_SHOW_PROGRESS_TIME_FORMAT, spark))) } else { None } diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/kyuubi/SparkProgressMonitor.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/kyuubi/SparkProgressMonitor.scala index 1d9ef53eae9..80cf292755c 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/kyuubi/SparkProgressMonitor.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/kyuubi/SparkProgressMonitor.scala @@ -21,12 +21,12 @@ import java.util import scala.collection.JavaConverters._ import scala.collection.immutable.SortedMap -import org.apache.hive.service.rpc.thrift.TJobExecutionStatus import org.apache.spark.kyuubi.SparkProgressMonitor._ import org.apache.spark.sql.SparkSession import org.apache.spark.status.api.v1.StageStatus import org.apache.kyuubi.engine.spark.operation.progress.{SparkOperationProgressStatus, SparkStage, SparkStageProgress} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TJobExecutionStatus class SparkProgressMonitor(spark: SparkSession, jobGroup: String) { diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala index c0f9d61c210..16f597cdb34 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/sql/kyuubi/SparkDatasetHelper.scala @@ -25,7 +25,9 @@ import org.apache.spark.network.util.{ByteUnit, JavaUtils} import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils import org.apache.spark.sql.execution.{CollectLimitExec, LocalTableScanExec, SparkPlan, SQLExecution} +import org.apache.spark.sql.execution.HiveResult import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec import org.apache.spark.sql.execution.arrow.KyuubiArrowConverters import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} @@ -105,17 +107,31 @@ object SparkDatasetHelper extends Logging { val quotedCol = (name: String) => col(quoteIfNeeded(name)) // an udf to call `RowSet.toHiveString` on complex types(struct/array/map) and timestamp type. + // TODO: reuse the timeFormatters on greater scale if possible, + // recreating timeFormatters may cause performance issue, see [KYUUBI#5811] val toHiveStringUDF = udf[String, Row, String]((row, schemaDDL) => { val dt = DataType.fromDDL(schemaDDL) dt match { case StructType(Array(StructField(_, st: StructType, _, _))) => - RowSet.toHiveString((row, st), nested = true) + RowSet.toHiveString( + (row, st), + nested = true, + timeFormatters = HiveResult.getTimeFormatters) case StructType(Array(StructField(_, at: ArrayType, _, _))) => - RowSet.toHiveString((row.toSeq.head, at), nested = true) + RowSet.toHiveString( + (row.toSeq.head, at), + nested = true, + timeFormatters = HiveResult.getTimeFormatters) case StructType(Array(StructField(_, mt: MapType, _, _))) => - RowSet.toHiveString((row.toSeq.head, mt), nested = true) + RowSet.toHiveString( + (row.toSeq.head, mt), + nested = true, + timeFormatters = HiveResult.getTimeFormatters) case StructType(Array(StructField(_, tt: TimestampType, _, _))) => - RowSet.toHiveString((row.toSeq.head, tt), nested = true) + RowSet.toHiveString( + (row.toSeq.head, tt), + nested = true, + timeFormatters = HiveResult.getTimeFormatters) case _ => throw new UnsupportedOperationException } @@ -278,4 +294,32 @@ object SparkDatasetHelper extends Logging { val executionId = sc.getLocalProperty(SQLExecution.EXECUTION_ID_KEY) SQLMetrics.postDriverMetricUpdates(sc, executionId, metrics.values.toSeq) } + + def shouldSaveResultToFs(resultMaxRows: Int, minSize: Long, result: DataFrame): Boolean = { + if (isCommandExec(result.queryExecution.executedPlan.nodeName)) { + return false + } + lazy val limit = result.queryExecution.executedPlan match { + case collectLimit: CollectLimitExec => collectLimit.limit + case _ => resultMaxRows + } + lazy val stats = if (limit > 0) { + limit * EstimationUtils.getSizePerRow( + result.queryExecution.executedPlan.output) + } else { + result.queryExecution.optimizedPlan.stats.sizeInBytes + } + lazy val colSize = + if (result == null || result.schema.isEmpty) { + 0 + } else { + result.schema.size + } + minSize > 0 && colSize > 0 && stats >= minSize + } + + private def isCommandExec(nodeName: String): Boolean = { + nodeName == "org.apache.spark.sql.execution.command.ExecutedCommandExec" || + nodeName == "org.apache.spark.sql.execution.CommandResultExec" + } } diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/IndividualSparkSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/IndividualSparkSuite.scala index 8fca1d0ca2b..e924aa3de49 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/IndividualSparkSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/IndividualSparkSuite.scala @@ -104,13 +104,17 @@ class SparkEngineSuites extends KyuubiFunSuite { withSystemProperty(Map( s"spark.$KYUUBI_ENGINE_SUBMIT_TIME_KEY" -> String.valueOf(submitTime), s"spark.${ENGINE_INIT_TIMEOUT.key}" -> String.valueOf(timeout), - s"spark.${ENGINE_INITIALIZE_SQL.key}" -> + s"spark.${ENGINE_SPARK_INITIALIZE_SQL.key}" -> "select 1 where java_method('java.lang.Thread', 'sleep', 60000L) is null")) { SparkSQLEngine.setupConf() SparkSQLEngine.currentEngine = None val logAppender = new LogAppender("test createSpark timeout") withLogAppender(logAppender) { - SparkSQLEngine.main(Array.empty) + try { + SparkSQLEngine.main(Array.empty) + } catch { + case e: Exception => error("", e) + } } assert(SparkSQLEngine.currentEngine.isEmpty) val errorMsg = s"The Engine main thread was interrupted, possibly due to `createSpark`" + diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/events/handler/SparkJsonLoggingEventHandlerSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/events/handler/SparkJsonLoggingEventHandlerSuite.scala index ddaa962193b..e7e6768e7af 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/events/handler/SparkJsonLoggingEventHandlerSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/events/handler/SparkJsonLoggingEventHandlerSuite.scala @@ -22,7 +22,6 @@ import java.nio.file.Paths import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, Path} -import org.apache.hive.service.rpc.thrift.TExecuteStatementReq import org.scalatest.time.SpanSugar._ import org.apache.kyuubi.Utils @@ -32,6 +31,7 @@ import org.apache.kyuubi.engine.spark.events.{EngineEvent, SessionEvent} import org.apache.kyuubi.events.EventLoggerType._ import org.apache.kyuubi.events.JsonProtocol import org.apache.kyuubi.operation.{HiveJDBCTestHelper, OperationHandle} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TExecuteStatementReq class SparkJsonLoggingEventHandlerSuite extends WithSparkSQLEngine with HiveJDBCTestHelper { diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationProgressSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationProgressSuite.scala index a82443f41a1..def45d3873c 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationProgressSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationProgressSuite.scala @@ -19,12 +19,12 @@ package org.apache.kyuubi.engine.spark.operation import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift.{TExecuteStatementReq, TGetOperationStatusReq, TJobExecutionStatus} import org.scalatest.concurrent.PatienceConfiguration.Timeout import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.engine.spark.WithSparkSQLEngine import org.apache.kyuubi.operation.HiveJDBCTestHelper +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TExecuteStatementReq, TGetOperationStatusReq, TJobExecutionStatus} class SparkOperationProgressSuite extends WithSparkSQLEngine with HiveJDBCTestHelper { diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationSuite.scala index adab0231d63..fb9873fd05f 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationSuite.scala @@ -25,7 +25,6 @@ import org.apache.hadoop.hive.thrift.{DelegationTokenIdentifier => HiveTokenIden import org.apache.hadoop.io.Text import org.apache.hadoop.security.{Credentials, UserGroupInformation} import org.apache.hadoop.security.token.{Token, TokenIdentifier} -import org.apache.hive.service.rpc.thrift._ import org.apache.spark.SPARK_VERSION import org.apache.spark.kyuubi.SparkContextHelper import org.apache.spark.sql.catalyst.analysis.FunctionRegistry @@ -38,6 +37,7 @@ import org.apache.kyuubi.engine.spark.util.SparkCatalogUtils import org.apache.kyuubi.jdbc.hive.KyuubiStatement import org.apache.kyuubi.operation.{HiveMetadataTests, SparkQueryTests} import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ import org.apache.kyuubi.util.KyuubiHadoopUtils import org.apache.kyuubi.util.SemanticVersion @@ -154,6 +154,7 @@ class SparkOperationSuite extends WithSparkSQLEngine with HiveMetadataTests with val colSize = rowSet.getInt(COLUMN_SIZE) schema(pos).dataType match { case StringType | BinaryType | _: ArrayType | _: MapType => assert(colSize === 0) + case d: DecimalType => assert(colSize === d.precision) case StructType(fields) if fields.length == 1 => assert(colSize === 0) case o => assert(colSize === o.defaultSize) } diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/schema/RowSetSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/schema/RowSetSuite.scala index 5d2ba4a0d11..228bdcaf2c0 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/schema/RowSetSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/schema/RowSetSuite.scala @@ -24,12 +24,13 @@ import java.time.{Instant, LocalDate} import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.spark.sql.Row +import org.apache.spark.sql.execution.HiveResult import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval import org.apache.kyuubi.KyuubiFunSuite +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion class RowSetSuite extends KyuubiFunSuite { @@ -98,7 +99,7 @@ class RowSetSuite extends KyuubiFunSuite { private val rows: Seq[Row] = (0 to 10).map(genRow) ++ Seq(Row.fromSeq(Seq.fill(17)(null))) test("column based set") { - val tRowSet = RowSet.toColumnBasedSet(rows, schema) + val tRowSet = new SparkTRowSetGenerator().toColumnBasedSet(rows, schema) assert(tRowSet.getColumns.size() === schema.size) assert(tRowSet.getRowsSize === 0) @@ -165,14 +166,18 @@ class RowSetSuite extends KyuubiFunSuite { dateCol.getValues.asScala.zipWithIndex.foreach { case (b, 11) => assert(b === "NULL") case (b, i) => - assert(b === RowSet.toHiveString(Date.valueOf(s"2018-11-${i + 1}") -> DateType)) + assert(b === RowSet.toHiveString( + Date.valueOf(s"2018-11-${i + 1}") -> DateType, + timeFormatters = HiveResult.getTimeFormatters)) } val tsCol = cols.next().getStringVal tsCol.getValues.asScala.zipWithIndex.foreach { case (b, 11) => assert(b === "NULL") case (b, i) => assert(b === - RowSet.toHiveString(Timestamp.valueOf(s"2018-11-17 13:33:33.$i") -> TimestampType)) + RowSet.toHiveString( + Timestamp.valueOf(s"2018-11-17 13:33:33.$i") -> TimestampType, + timeFormatters = HiveResult.getTimeFormatters)) } val binCol = cols.next().getBinaryVal @@ -185,14 +190,16 @@ class RowSetSuite extends KyuubiFunSuite { arrCol.getValues.asScala.zipWithIndex.foreach { case (b, 11) => assert(b === "NULL") case (b, i) => assert(b === RowSet.toHiveString( - Array.fill(i)(java.lang.Double.valueOf(s"$i.$i")).toSeq -> ArrayType(DoubleType))) + Array.fill(i)(java.lang.Double.valueOf(s"$i.$i")).toSeq -> ArrayType(DoubleType), + timeFormatters = HiveResult.getTimeFormatters)) } val mapCol = cols.next().getStringVal mapCol.getValues.asScala.zipWithIndex.foreach { case (b, 11) => assert(b === "NULL") case (b, i) => assert(b === RowSet.toHiveString( - Map(i -> java.lang.Double.valueOf(s"$i.$i")) -> MapType(IntegerType, DoubleType))) + Map(i -> java.lang.Double.valueOf(s"$i.$i")) -> MapType(IntegerType, DoubleType), + timeFormatters = HiveResult.getTimeFormatters)) } val intervalCol = cols.next().getStringVal @@ -203,7 +210,7 @@ class RowSetSuite extends KyuubiFunSuite { } test("row based set") { - val tRowSet = RowSet.toRowBasedSet(rows, schema) + val tRowSet = new SparkTRowSetGenerator().toRowBasedSet(rows, schema) assert(tRowSet.getColumnCount === 0) assert(tRowSet.getRowsSize === rows.size) val iter = tRowSet.getRowsIterator @@ -241,7 +248,9 @@ class RowSetSuite extends KyuubiFunSuite { val r8 = iter.next().getColVals assert(r8.get(12).getStringVal.getValue === Array.fill(7)(7.7d).mkString("[", ",", "]")) assert(r8.get(13).getStringVal.getValue === - RowSet.toHiveString(Map(7 -> 7.7d) -> MapType(IntegerType, DoubleType))) + RowSet.toHiveString( + Map(7 -> 7.7d) -> MapType(IntegerType, DoubleType), + timeFormatters = HiveResult.getTimeFormatters)) val r9 = iter.next().getColVals assert(r9.get(14).getStringVal.getValue === new CalendarInterval(8, 8, 8).toString) @@ -249,7 +258,7 @@ class RowSetSuite extends KyuubiFunSuite { test("to row set") { TProtocolVersion.values().foreach { proto => - val set = RowSet.toTRowSet(rows, schema, proto) + val set = new SparkTRowSetGenerator().toTRowSet(rows, schema, proto) if (proto.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) { assert(!set.isSetColumns, proto.toString) assert(set.isSetRows, proto.toString) diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/schema/SchemaHelperSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/schema/SchemaHelperSuite.scala index 6bd0364f754..b2514e7dd73 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/schema/SchemaHelperSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/schema/SchemaHelperSuite.scala @@ -21,11 +21,11 @@ import java.time.ZoneId import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift.{TCLIServiceConstants, TTypeId} import org.apache.spark.sql.types._ import org.apache.kyuubi.KyuubiFunSuite import org.apache.kyuubi.engine.spark.schema.SchemaHelper._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TCLIServiceConstants, TTypeId} class SchemaHelperSuite extends KyuubiFunSuite { diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/session/SingleSessionSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/session/SingleSessionSuite.scala index 0f0e07411a4..82a85bfcf44 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/session/SingleSessionSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/session/SingleSessionSuite.scala @@ -28,7 +28,7 @@ class SingleSessionSuite extends WithSparkSQLEngine with HiveJDBCTestHelper { ENGINE_SHARE_LEVEL.key -> "SERVER", ENGINE_SINGLE_SPARK_SESSION.key -> "true", ( - ENGINE_SESSION_INITIALIZE_SQL.key, + ENGINE_SESSION_SPARK_INITIALIZE_SQL.key, "CREATE DATABASE IF NOT EXISTS INIT_DB_SOLO;" + "CREATE TABLE IF NOT EXISTS INIT_DB_SOLO.test(a int) USING CSV;" + "INSERT INTO INIT_DB_SOLO.test VALUES (2);")) diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/session/UserIsolatedSessionSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/session/UserIsolatedSessionSuite.scala index 9d31e180f7e..ccfea6b89c4 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/session/UserIsolatedSessionSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/session/UserIsolatedSessionSuite.scala @@ -17,11 +17,10 @@ package org.apache.kyuubi.engine.spark.session -import org.apache.hive.service.rpc.thrift.{TExecuteStatementReq, TFetchResultsReq, TOpenSessionReq} - import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.engine.spark.WithSparkSQLEngine import org.apache.kyuubi.operation.HiveJDBCTestHelper +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TExecuteStatementReq, TFetchResultsReq, TOpenSessionReq} class UserIsolatedSessionSuite extends WithSparkSQLEngine with HiveJDBCTestHelper { diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/spark/kyuubi/SQLOperationListenerSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/spark/kyuubi/SQLOperationListenerSuite.scala index f732f7c3846..d6987ad2e67 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/spark/kyuubi/SQLOperationListenerSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/spark/kyuubi/SQLOperationListenerSuite.scala @@ -19,19 +19,17 @@ package org.apache.spark.kyuubi import scala.collection.JavaConverters.asScalaBufferConverter -import org.apache.hive.service.rpc.thrift.{TExecuteStatementReq, TFetchOrientation, TFetchResultsReq, TOperationHandle} import org.scalatest.time.SpanSugar._ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.OPERATION_SPARK_LISTENER_ENABLED import org.apache.kyuubi.engine.spark.WithSparkSQLEngine import org.apache.kyuubi.operation.HiveJDBCTestHelper +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TExecuteStatementReq, TFetchOrientation, TFetchResultsReq, TOperationHandle} class SQLOperationListenerSuite extends WithSparkSQLEngine with HiveJDBCTestHelper { - override def withKyuubiConf: Map[String, String] = Map( - KyuubiConf.ENGINE_SPARK_SHOW_PROGRESS.key -> "true", - KyuubiConf.ENGINE_SPARK_SHOW_PROGRESS_UPDATE_INTERVAL.key -> "200") + override def withKyuubiConf: Map[String, String] = Map.empty override protected def jdbcUrl: String = getJdbcUrl @@ -58,19 +56,23 @@ class SQLOperationListenerSuite extends WithSparkSQLEngine with HiveJDBCTestHelp } test("operation listener with progress job info") { - val sql = "SELECT java_method('java.lang.Thread', 'sleep', 10000l) FROM range(1, 3, 1, 2);" - withSessionHandle { (client, handle) => - val req = new TExecuteStatementReq() - req.setSessionHandle(handle) - req.setStatement(sql) - val tExecuteStatementResp = client.ExecuteStatement(req) - val opHandle = tExecuteStatementResp.getOperationHandle - val fetchResultsReq = new TFetchResultsReq(opHandle, TFetchOrientation.FETCH_NEXT, 1000) - fetchResultsReq.setFetchType(1.toShort) - eventually(timeout(90.seconds), interval(500.milliseconds)) { - val resultsResp = client.FetchResults(fetchResultsReq) - val logs = resultsResp.getResults.getColumns.get(0).getStringVal.getValues.asScala - assert(logs.exists(_.matches(".*\\[Job .* Stages\\] \\[Stage .*\\]"))) + withSessionConf(Map( + KyuubiConf.ENGINE_SPARK_SHOW_PROGRESS.key -> "true", + KyuubiConf.ENGINE_SPARK_SHOW_PROGRESS_UPDATE_INTERVAL.key -> "200"))()() { + val sql = "SELECT java_method('java.lang.Thread', 'sleep', 10000l) FROM range(1, 3, 1, 2);" + withSessionHandle { (client, handle) => + val req = new TExecuteStatementReq() + req.setSessionHandle(handle) + req.setStatement(sql) + val tExecuteStatementResp = client.ExecuteStatement(req) + val opHandle = tExecuteStatementResp.getOperationHandle + val fetchResultsReq = new TFetchResultsReq(opHandle, TFetchOrientation.FETCH_NEXT, 1000) + fetchResultsReq.setFetchType(1.toShort) + eventually(timeout(90.seconds), interval(500.milliseconds)) { + val resultsResp = client.FetchResults(fetchResultsReq) + val logs = resultsResp.getResults.getColumns.get(0).getStringVal.getValues.asScala + assert(logs.exists(_.matches(".*\\[Job .* Stages\\] \\[Stage .*\\]"))) + } } } } diff --git a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/operation/ExecuteStatement.scala b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/operation/ExecuteStatement.scala index 3e7cce80cdf..3de2ae59f42 100644 --- a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/operation/ExecuteStatement.scala +++ b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/operation/ExecuteStatement.scala @@ -19,17 +19,16 @@ package org.apache.kyuubi.engine.trino.operation import java.util.concurrent.RejectedExecutionException -import org.apache.hive.service.rpc.thrift.TFetchResultsResp - import org.apache.kyuubi.{KyuubiSQLException, Logging} import org.apache.kyuubi.engine.trino.TrinoStatement import org.apache.kyuubi.engine.trino.event.TrinoOperationEvent -import org.apache.kyuubi.engine.trino.schema.RowSet +import org.apache.kyuubi.engine.trino.schema.TrinoTRowSetGenerator import org.apache.kyuubi.events.EventBus import org.apache.kyuubi.operation.{ArrayFetchIterator, FetchIterator, OperationState} import org.apache.kyuubi.operation.FetchOrientation.{FETCH_FIRST, FETCH_NEXT, FETCH_PRIOR, FetchOrientation} import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TFetchResultsResp class ExecuteStatement( session: Session, @@ -97,7 +96,8 @@ class ExecuteStatement( throw KyuubiSQLException(s"Fetch orientation[$order] is not supported in $mode mode") } val taken = iter.take(rowSetSize) - val resultRowSet = RowSet.toTRowSet(taken.toList, schema, getProtocolVersion) + val resultRowSet = new TrinoTRowSetGenerator() + .toTRowSet(taken.toList, schema, getProtocolVersion) resultRowSet.setStartRowOffset(iter.getPosition) val fetchResultsResp = new TFetchResultsResp(OK_STATUS) fetchResultsResp.setResults(resultRowSet) diff --git a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/operation/TrinoOperation.scala b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/operation/TrinoOperation.scala index 11eaa1bc1d7..822f1726a3b 100644 --- a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/operation/TrinoOperation.scala +++ b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/operation/TrinoOperation.scala @@ -21,13 +21,11 @@ import java.io.IOException import io.trino.client.Column import io.trino.client.StatementClient -import org.apache.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp} import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.Utils import org.apache.kyuubi.engine.trino.TrinoContext -import org.apache.kyuubi.engine.trino.schema.RowSet -import org.apache.kyuubi.engine.trino.schema.SchemaHelper +import org.apache.kyuubi.engine.trino.schema.{SchemaHelper, TrinoTRowSetGenerator} import org.apache.kyuubi.engine.trino.session.TrinoSessionImpl import org.apache.kyuubi.operation.AbstractOperation import org.apache.kyuubi.operation.FetchIterator @@ -35,6 +33,7 @@ import org.apache.kyuubi.operation.FetchOrientation.{FETCH_FIRST, FETCH_NEXT, FE import org.apache.kyuubi.operation.OperationState import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp} abstract class TrinoOperation(session: Session) extends AbstractOperation(session) { @@ -66,7 +65,8 @@ abstract class TrinoOperation(session: Session) extends AbstractOperation(sessio case FETCH_FIRST => iter.fetchAbsolute(0) } val taken = iter.take(rowSetSize) - val resultRowSet = RowSet.toTRowSet(taken.toList, schema, getProtocolVersion) + val resultRowSet = + new TrinoTRowSetGenerator().toTRowSet(taken.toSeq, schema, getProtocolVersion) resultRowSet.setStartRowOffset(iter.getPosition) val resp = new TFetchResultsResp(OK_STATUS) resp.setResults(resultRowSet) diff --git a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/schema/RowSet.scala b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/schema/RowSet.scala index 6e23a3e1f98..22e09f38138 100644 --- a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/schema/RowSet.scala +++ b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/schema/RowSet.scala @@ -17,233 +17,16 @@ package org.apache.kyuubi.engine.trino.schema -import java.nio.ByteBuffer import java.nio.charset.StandardCharsets -import java.util import scala.collection.JavaConverters._ import io.trino.client.ClientStandardTypes._ import io.trino.client.ClientTypeSignature -import io.trino.client.Column import io.trino.client.Row -import org.apache.hive.service.rpc.thrift.TBinaryColumn -import org.apache.hive.service.rpc.thrift.TBoolColumn -import org.apache.hive.service.rpc.thrift.TBoolValue -import org.apache.hive.service.rpc.thrift.TByteColumn -import org.apache.hive.service.rpc.thrift.TByteValue -import org.apache.hive.service.rpc.thrift.TColumn -import org.apache.hive.service.rpc.thrift.TColumnValue -import org.apache.hive.service.rpc.thrift.TDoubleColumn -import org.apache.hive.service.rpc.thrift.TDoubleValue -import org.apache.hive.service.rpc.thrift.TI16Column -import org.apache.hive.service.rpc.thrift.TI16Value -import org.apache.hive.service.rpc.thrift.TI32Column -import org.apache.hive.service.rpc.thrift.TI32Value -import org.apache.hive.service.rpc.thrift.TI64Column -import org.apache.hive.service.rpc.thrift.TI64Value -import org.apache.hive.service.rpc.thrift.TProtocolVersion -import org.apache.hive.service.rpc.thrift.TRow -import org.apache.hive.service.rpc.thrift.TRowSet -import org.apache.hive.service.rpc.thrift.TStringColumn -import org.apache.hive.service.rpc.thrift.TStringValue - -import org.apache.kyuubi.util.RowSetUtils.bitSetToBuffer object RowSet { - def toTRowSet( - rows: Seq[List[_]], - schema: List[Column], - protocolVersion: TProtocolVersion): TRowSet = { - if (protocolVersion.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) { - toRowBasedSet(rows, schema) - } else { - toColumnBasedSet(rows, schema) - } - } - - def toRowBasedSet(rows: Seq[List[_]], schema: List[Column]): TRowSet = { - val rowSize = rows.length - val tRows = new util.ArrayList[TRow](rowSize) - var i = 0 - while (i < rowSize) { - val row = rows(i) - val tRow = new TRow() - val columnSize = row.size - var j = 0 - while (j < columnSize) { - val columnValue = toTColumnValue(j, row, schema) - tRow.addToColVals(columnValue) - j += 1 - } - tRows.add(tRow) - i += 1 - } - new TRowSet(0, tRows) - } - - def toColumnBasedSet(rows: Seq[List[_]], schema: List[Column]): TRowSet = { - val size = rows.size - val tRowSet = new TRowSet(0, new java.util.ArrayList[TRow](size)) - val columnSize = schema.length - var i = 0 - while (i < columnSize) { - val field = schema(i) - val tColumn = toTColumn(rows, i, field.getTypeSignature) - tRowSet.addToColumns(tColumn) - i += 1 - } - tRowSet - } - - private def toTColumn( - rows: Seq[Seq[Any]], - ordinal: Int, - typ: ClientTypeSignature): TColumn = { - val nulls = new java.util.BitSet() - typ.getRawType match { - case BOOLEAN => - val values = getOrSetAsNull[java.lang.Boolean](rows, ordinal, nulls, true) - TColumn.boolVal(new TBoolColumn(values, nulls)) - - case TINYINT => - val values = getOrSetAsNull[java.lang.Byte](rows, ordinal, nulls, 0.toByte) - TColumn.byteVal(new TByteColumn(values, nulls)) - - case SMALLINT => - val values = getOrSetAsNull[java.lang.Short](rows, ordinal, nulls, 0.toShort) - TColumn.i16Val(new TI16Column(values, nulls)) - - case INTEGER => - val values = getOrSetAsNull[java.lang.Integer](rows, ordinal, nulls, 0) - TColumn.i32Val(new TI32Column(values, nulls)) - - case BIGINT => - val values = getOrSetAsNull[java.lang.Long](rows, ordinal, nulls, 0L) - TColumn.i64Val(new TI64Column(values, nulls)) - - case REAL => - val values = getOrSetAsNull[java.lang.Float](rows, ordinal, nulls, 0.toFloat) - .asScala.map(n => java.lang.Double.valueOf(n.toString)).asJava - TColumn.doubleVal(new TDoubleColumn(values, nulls)) - - case DOUBLE => - val values = getOrSetAsNull[java.lang.Double](rows, ordinal, nulls, 0.toDouble) - TColumn.doubleVal(new TDoubleColumn(values, nulls)) - - case VARCHAR => - val values = getOrSetAsNull[String](rows, ordinal, nulls, "") - TColumn.stringVal(new TStringColumn(values, nulls)) - - case VARBINARY => - val values = getOrSetAsNull[Array[Byte]](rows, ordinal, nulls, Array()) - .asScala - .map(ByteBuffer.wrap) - .asJava - TColumn.binaryVal(new TBinaryColumn(values, nulls)) - - case _ => - val rowSize = rows.length - val values = new util.ArrayList[String](rowSize) - var i = 0 - while (i < rowSize) { - val row = rows(i) - nulls.set(i, row(ordinal) == null) - val value = - if (row(ordinal) == null) { - "" - } else { - toHiveString(row(ordinal), typ) - } - values.add(value) - i += 1 - } - TColumn.stringVal(new TStringColumn(values, nulls)) - } - } - - private def getOrSetAsNull[T]( - rows: Seq[Seq[Any]], - ordinal: Int, - nulls: java.util.BitSet, - defaultVal: T): java.util.List[T] = { - val size = rows.length - val ret = new java.util.ArrayList[T](size) - var idx = 0 - while (idx < size) { - val row = rows(idx) - val isNull = row(ordinal) == null - if (isNull) { - nulls.set(idx, true) - ret.add(idx, defaultVal) - } else { - ret.add(idx, row(ordinal).asInstanceOf[T]) - } - idx += 1 - } - ret - } - - private def toTColumnValue( - ordinal: Int, - row: List[Any], - types: List[Column]): TColumnValue = { - - types(ordinal).getTypeSignature.getRawType match { - case BOOLEAN => - val boolValue = new TBoolValue - if (row(ordinal) != null) boolValue.setValue(row(ordinal).asInstanceOf[Boolean]) - TColumnValue.boolVal(boolValue) - - case TINYINT => - val byteValue = new TByteValue - if (row(ordinal) != null) byteValue.setValue(row(ordinal).asInstanceOf[Byte]) - TColumnValue.byteVal(byteValue) - - case SMALLINT => - val tI16Value = new TI16Value - if (row(ordinal) != null) tI16Value.setValue(row(ordinal).asInstanceOf[Short]) - TColumnValue.i16Val(tI16Value) - - case INTEGER => - val tI32Value = new TI32Value - if (row(ordinal) != null) tI32Value.setValue(row(ordinal).asInstanceOf[Int]) - TColumnValue.i32Val(tI32Value) - - case BIGINT => - val tI64Value = new TI64Value - if (row(ordinal) != null) tI64Value.setValue(row(ordinal).asInstanceOf[Long]) - TColumnValue.i64Val(tI64Value) - - case REAL => - val tDoubleValue = new TDoubleValue - if (row(ordinal) != null) { - val doubleValue = java.lang.Double.valueOf(row(ordinal).asInstanceOf[Float].toString) - tDoubleValue.setValue(doubleValue) - } - TColumnValue.doubleVal(tDoubleValue) - - case DOUBLE => - val tDoubleValue = new TDoubleValue - if (row(ordinal) != null) tDoubleValue.setValue(row(ordinal).asInstanceOf[Double]) - TColumnValue.doubleVal(tDoubleValue) - - case VARCHAR => - val tStringValue = new TStringValue - if (row(ordinal) != null) tStringValue.setValue(row(ordinal).asInstanceOf[String]) - TColumnValue.stringVal(tStringValue) - - case _ => - val tStrValue = new TStringValue - if (row(ordinal) != null) { - tStrValue.setValue( - toHiveString(row(ordinal), types(ordinal).getTypeSignature)) - } - TColumnValue.stringVal(tStrValue) - } - } - /** * A simpler impl of Trino's toHiveString */ diff --git a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/schema/SchemaHelper.scala b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/schema/SchemaHelper.scala index e89f5e8cd97..ad44445c30a 100644 --- a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/schema/SchemaHelper.scala +++ b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/schema/SchemaHelper.scala @@ -24,15 +24,16 @@ import scala.collection.JavaConverters._ import io.trino.client.ClientStandardTypes._ import io.trino.client.ClientTypeSignature import io.trino.client.Column -import org.apache.hive.service.rpc.thrift.TCLIServiceConstants -import org.apache.hive.service.rpc.thrift.TColumnDesc -import org.apache.hive.service.rpc.thrift.TPrimitiveTypeEntry -import org.apache.hive.service.rpc.thrift.TTableSchema -import org.apache.hive.service.rpc.thrift.TTypeDesc -import org.apache.hive.service.rpc.thrift.TTypeEntry -import org.apache.hive.service.rpc.thrift.TTypeId -import org.apache.hive.service.rpc.thrift.TTypeQualifiers -import org.apache.hive.service.rpc.thrift.TTypeQualifierValue + +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TCLIServiceConstants +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TColumnDesc +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TPrimitiveTypeEntry +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTableSchema +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeDesc +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeEntry +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeQualifiers +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeQualifierValue object SchemaHelper { diff --git a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/schema/TrinoTRowSetGenerator.scala b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/schema/TrinoTRowSetGenerator.scala new file mode 100644 index 00000000000..57d91b371f0 --- /dev/null +++ b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/schema/TrinoTRowSetGenerator.scala @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.trino.schema + +import io.trino.client.{ClientTypeSignature, Column} +import io.trino.client.ClientStandardTypes._ + +import org.apache.kyuubi.engine.result.TRowSetGenerator +import org.apache.kyuubi.engine.trino.schema.RowSet.toHiveString +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ + +class TrinoTRowSetGenerator + extends TRowSetGenerator[Seq[Column], Seq[_], ClientTypeSignature] { + + override def getColumnSizeFromSchemaType(schema: Seq[Column]): Int = schema.length + + override def getColumnType(schema: Seq[Column], ordinal: Int): ClientTypeSignature = + schema(ordinal).getTypeSignature + + override def isColumnNullAt(row: Seq[_], ordinal: Int): Boolean = row(ordinal) == null + + override def getColumnAs[T](row: Seq[_], ordinal: Int): T = row(ordinal).asInstanceOf[T] + + override def toTColumn(rows: Seq[Seq[_]], ordinal: Int, typ: ClientTypeSignature): TColumn = { + typ.getRawType match { + case BOOLEAN => asBooleanTColumn(rows, ordinal) + case TINYINT => asByteTColumn(rows, ordinal) + case SMALLINT => asShortTColumn(rows, ordinal) + case INTEGER => asIntegerTColumn(rows, ordinal) + case BIGINT => asLongTColumn(rows, ordinal) + case REAL => asFloatTColumn(rows, ordinal) + case DOUBLE => asDoubleTColumn(rows, ordinal) + case VARCHAR => asStringTColumn(rows, ordinal) + case VARBINARY => asByteArrayTColumn(rows, ordinal) + case _ => + asStringTColumn( + rows, + ordinal, + convertFunc = (row, ordinal) => toHiveString(getColumnAs[Any](row, ordinal), typ)) + } + } + + override def toTColumnValue(row: Seq[_], ordinal: Int, types: Seq[Column]): TColumnValue = { + getColumnType(types, ordinal).getRawType match { + case BOOLEAN => asBooleanTColumnValue(row, ordinal) + case TINYINT => asByteTColumnValue(row, ordinal) + case SMALLINT => asShortTColumnValue(row, ordinal) + case INTEGER => asIntegerTColumnValue(row, ordinal) + case BIGINT => asLongTColumnValue(row, ordinal) + case REAL => asFloatTColumnValue(row, ordinal) + case DOUBLE => asDoubleTColumnValue(row, ordinal) + case VARCHAR => asStringTColumnValue(row, ordinal) + case _ => + asStringTColumnValue( + row, + ordinal, + rawValue => toHiveString(rawValue, types(ordinal).getTypeSignature)) + } + } + +} diff --git a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionImpl.scala b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionImpl.scala index 0b3ac01a9ef..950a0814b5d 100644 --- a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionImpl.scala +++ b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionImpl.scala @@ -28,7 +28,6 @@ import io.airlift.units.Duration import io.trino.client.ClientSession import io.trino.client.OkHttpUtil import okhttp3.OkHttpClient -import org.apache.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.Utils.currentUser @@ -39,6 +38,7 @@ import org.apache.kyuubi.engine.trino.event.TrinoSessionEvent import org.apache.kyuubi.events.EventBus import org.apache.kyuubi.operation.{Operation, OperationHandle} import org.apache.kyuubi.session.{AbstractSession, SessionHandle, SessionManager, USE_CATALOG, USE_DATABASE} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TGetInfoType, TGetInfoValue, TProtocolVersion} class TrinoSessionImpl( protocol: TProtocolVersion, diff --git a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionManager.scala b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionManager.scala index e18b8f75817..55aa2f3fa78 100644 --- a/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionManager.scala +++ b/externals/kyuubi-trino-engine/src/main/scala/org/apache/kyuubi/engine/trino/session/TrinoSessionManager.scala @@ -17,14 +17,13 @@ package org.apache.kyuubi.engine.trino.session -import org.apache.hive.service.rpc.thrift.TProtocolVersion - import org.apache.kyuubi.config.KyuubiConf.ENGINE_SHARE_LEVEL import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_HANDLE_KEY import org.apache.kyuubi.engine.ShareLevel import org.apache.kyuubi.engine.trino.TrinoSqlEngine import org.apache.kyuubi.engine.trino.operation.TrinoOperationManager import org.apache.kyuubi.session.{Session, SessionHandle, SessionManager} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion class TrinoSessionManager extends SessionManager("TrinoSessionManager") { diff --git a/externals/kyuubi-trino-engine/src/test/scala/org/apache/kyuubi/engine/trino/operation/TrinoOperationSuite.scala b/externals/kyuubi-trino-engine/src/test/scala/org/apache/kyuubi/engine/trino/operation/TrinoOperationSuite.scala index 90939a3e4e0..c49c4965bfc 100644 --- a/externals/kyuubi-trino-engine/src/test/scala/org/apache/kyuubi/engine/trino/operation/TrinoOperationSuite.scala +++ b/externals/kyuubi-trino-engine/src/test/scala/org/apache/kyuubi/engine/trino/operation/TrinoOperationSuite.scala @@ -22,13 +22,13 @@ import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.Set import io.trino.client.ClientStandardTypes._ -import org.apache.hive.service.rpc.thrift._ import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.engine.trino.{TrinoQueryTests, TrinoStatement, WithTrinoEngine} import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ class TrinoOperationSuite extends WithTrinoEngine with TrinoQueryTests { override def withKyuubiConf: Map[String, String] = Map( diff --git a/externals/kyuubi-trino-engine/src/test/scala/org/apache/kyuubi/engine/trino/schema/RowSetSuite.scala b/externals/kyuubi-trino-engine/src/test/scala/org/apache/kyuubi/engine/trino/schema/RowSetSuite.scala index d6187bbf881..461c453ecd2 100644 --- a/externals/kyuubi-trino-engine/src/test/scala/org/apache/kyuubi/engine/trino/schema/RowSetSuite.scala +++ b/externals/kyuubi-trino-engine/src/test/scala/org/apache/kyuubi/engine/trino/schema/RowSetSuite.scala @@ -28,11 +28,11 @@ import io.trino.client.ClientStandardTypes._ import io.trino.client.ClientTypeSignature import io.trino.client.Column import io.trino.client.Row -import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.KyuubiFunSuite import org.apache.kyuubi.engine.trino.schema.RowSet.toHiveString import org.apache.kyuubi.engine.trino.util.TestUtils._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion class RowSetSuite extends KyuubiFunSuite { @@ -126,7 +126,7 @@ class RowSetSuite extends KyuubiFunSuite { def uuidSuffix(value: Int): String = if (value > 9) value.toString else s"f$value" test("column based set") { - val tRowSet = RowSet.toColumnBasedSet(rows, schema) + val tRowSet = new TrinoTRowSetGenerator().toColumnBasedSet(rows, schema) assert(tRowSet.getColumns.size() === schema.size) assert(tRowSet.getRowsSize === 0) @@ -277,7 +277,7 @@ class RowSetSuite extends KyuubiFunSuite { } test("row based set") { - val tRowSet = RowSet.toRowBasedSet(rows, schema) + val tRowSet = new TrinoTRowSetGenerator().toRowBasedSet(rows, schema) assert(tRowSet.getColumnCount === 0) assert(tRowSet.getRowsSize === rows.size) val iter = tRowSet.getRowsIterator @@ -333,7 +333,7 @@ class RowSetSuite extends KyuubiFunSuite { test("to row set") { TProtocolVersion.values().foreach { proto => - val set = RowSet.toTRowSet(rows, schema, proto) + val set = new TrinoTRowSetGenerator().toTRowSet(rows, schema, proto) if (proto.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) { assert(!set.isSetColumns, proto.toString) assert(set.isSetRows, proto.toString) diff --git a/externals/kyuubi-trino-engine/src/test/scala/org/apache/kyuubi/engine/trino/schema/SchemaHelperSuite.scala b/externals/kyuubi-trino-engine/src/test/scala/org/apache/kyuubi/engine/trino/schema/SchemaHelperSuite.scala index 6f6bdc25fa4..451cc0573f8 100644 --- a/externals/kyuubi-trino-engine/src/test/scala/org/apache/kyuubi/engine/trino/schema/SchemaHelperSuite.scala +++ b/externals/kyuubi-trino-engine/src/test/scala/org/apache/kyuubi/engine/trino/schema/SchemaHelperSuite.scala @@ -21,12 +21,12 @@ import scala.collection.JavaConverters._ import io.trino.client.ClientStandardTypes._ import io.trino.client.Column -import org.apache.hive.service.rpc.thrift.TCLIServiceConstants -import org.apache.hive.service.rpc.thrift.TTypeId import org.apache.kyuubi.KyuubiFunSuite import org.apache.kyuubi.engine.trino.schema.SchemaHelper._ import org.apache.kyuubi.engine.trino.util.TestUtils._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TCLIServiceConstants +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId class SchemaHelperSuite extends KyuubiFunSuite { diff --git a/integration-tests/kyuubi-flink-it/src/test/scala/org/apache/kyuubi/it/flink/operation/FlinkOperationSuite.scala b/integration-tests/kyuubi-flink-it/src/test/scala/org/apache/kyuubi/it/flink/operation/FlinkOperationSuite.scala index 55476bfd003..8bd6ecf99ee 100644 --- a/integration-tests/kyuubi-flink-it/src/test/scala/org/apache/kyuubi/it/flink/operation/FlinkOperationSuite.scala +++ b/integration-tests/kyuubi-flink-it/src/test/scala/org/apache/kyuubi/it/flink/operation/FlinkOperationSuite.scala @@ -17,13 +17,12 @@ package org.apache.kyuubi.it.flink.operation -import org.apache.hive.service.rpc.thrift.{TGetInfoReq, TGetInfoType} - import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.it.flink.WithKyuubiServerAndFlinkMiniCluster import org.apache.kyuubi.operation.HiveJDBCTestHelper import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant.TABLE_CAT +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TGetInfoReq, TGetInfoType} class FlinkOperationSuite extends WithKyuubiServerAndFlinkMiniCluster with HiveJDBCTestHelper { @@ -98,6 +97,8 @@ class FlinkOperationSuite extends WithKyuubiServerAndFlinkMiniCluster req.setSessionHandle(handle) req.setInfoType(TGetInfoType.CLI_DBMS_NAME) assert(client.GetInfo(req).getInfoValue.getStringValue === "Apache Flink") + req.setInfoType(TGetInfoType.CLI_ODBC_KEYWORDS) + assert(client.GetInfo(req).getInfoValue.getStringValue === "Unimplemented") } } } diff --git a/integration-tests/kyuubi-flink-it/src/test/scala/org/apache/kyuubi/it/flink/operation/FlinkOperationSuiteOnYarn.scala b/integration-tests/kyuubi-flink-it/src/test/scala/org/apache/kyuubi/it/flink/operation/FlinkOperationSuiteOnYarn.scala index ee6b9bb98ea..c48d91435f1 100644 --- a/integration-tests/kyuubi-flink-it/src/test/scala/org/apache/kyuubi/it/flink/operation/FlinkOperationSuiteOnYarn.scala +++ b/integration-tests/kyuubi-flink-it/src/test/scala/org/apache/kyuubi/it/flink/operation/FlinkOperationSuiteOnYarn.scala @@ -17,13 +17,12 @@ package org.apache.kyuubi.it.flink.operation -import org.apache.hive.service.rpc.thrift.{TGetInfoReq, TGetInfoType} - import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.it.flink.WithKyuubiServerAndYarnMiniCluster import org.apache.kyuubi.operation.HiveJDBCTestHelper import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant.TABLE_CAT +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TGetInfoReq, TGetInfoType} class FlinkOperationSuiteOnYarn extends WithKyuubiServerAndYarnMiniCluster with HiveJDBCTestHelper { diff --git a/integration-tests/kyuubi-gluten-it/pom.xml b/integration-tests/kyuubi-gluten-it/pom.xml new file mode 100644 index 00000000000..ac49c286ade --- /dev/null +++ b/integration-tests/kyuubi-gluten-it/pom.xml @@ -0,0 +1,127 @@ + + + + 4.0.0 + + org.apache.kyuubi + integration-tests + 1.9.0-SNAPSHOT + ../pom.xml + + + kyuubi-gluten-it_${scala.binary.version} + Kyuubi Test Gluten IT + https://kyuubi.apache.org/ + + + 1.1.0-SNAPSHOT + 3.4.2 + 3.4 + + + + + org.apache.kyuubi + kyuubi-common_${scala.binary.version} + ${project.version} + + + + org.apache.kyuubi + kyuubi-common_${scala.binary.version} + ${project.version} + test-jar + test + + + + org.apache.kyuubi + kyuubi-spark-connector-common_${scala.binary.version} + ${project.version} + test-jar + test + + + + org.apache.kyuubi + kyuubi-spark-connector-tpcds_${scala.binary.version} + ${project.version} + + + + org.apache.kyuubi + kyuubi-spark-connector-tpch_${scala.binary.version} + ${project.version} + + + + org.apache.spark + spark-sql_${scala.binary.version} + provided + + + + org.apache.spark + spark-hive_${scala.binary.version} + test + + + + + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes + + + + + gluten-spark-3.4 + + org.apache.kyuubi.tags.GlutenTest + 3.4.2 + 3.4 + + + + io.glutenproject + gluten-velox-bundle-spark3.4_2.12-ubuntu_22.04 + ${gluten.version} + system + ${project.basedir}/../../gluten/package/target/gluten-velox-bundle-spark3.4_2.12-ubuntu_22.04-${gluten.version}.jar + + + + + gluten-spark-3.3 + + org.apache.kyuubi.tags.GlutenTest + 3.3.1 + 3.3 + + + + io.glutenproject + gluten-velox-bundle-spark3.3_2.12-ubuntu_22.04 + ${gluten.version} + system + ${project.basedir}/../../gluten/package/target/gluten-velox-bundle-spark3.3_2.12-ubuntu_22.04-${gluten.version}.jar + + + + + diff --git a/integration-tests/kyuubi-gluten-it/src/test/resources/load-tpcds-tiny.sql b/integration-tests/kyuubi-gluten-it/src/test/resources/load-tpcds-tiny.sql new file mode 100644 index 00000000000..952a9cf3a37 --- /dev/null +++ b/integration-tests/kyuubi-gluten-it/src/test/resources/load-tpcds-tiny.sql @@ -0,0 +1,146 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +CREATE DATABASE IF NOT EXISTS spark_catalog.tpcds_tiny; + +USE spark_catalog.tpcds_tiny; + +-- +-- Name: catalog_sales; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS catalog_sales USING parquet PARTITIONED BY (cs_sold_date_sk) +AS SELECT * FROM tpcds.tiny.catalog_sales; + +-- +-- Name: catalog_returns; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS catalog_returns USING parquet PARTITIONED BY (cr_returned_date_sk) +AS SELECT * FROM tpcds.tiny.catalog_returns; + +-- +-- Name: inventory; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS inventory USING parquet PARTITIONED BY (inv_date_sk) +AS SELECT * FROM tpcds.tiny.inventory; + +-- +-- Name: store_sales; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS store_sales USING parquet PARTITIONED BY (ss_sold_date_sk) +AS SELECT * FROM tpcds.tiny.store_sales; + +-- +-- Name: store_returns; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS store_returns USING parquet PARTITIONED BY (sr_returned_date_sk) +AS SELECT * FROM tpcds.tiny.store_returns; + +-- +-- Name: web_sales; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS web_sales USING parquet PARTITIONED BY (ws_sold_date_sk) +AS SELECT * FROM tpcds.tiny.web_sales; + +-- +-- Name: web_returns; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS web_returns USING parquet PARTITIONED BY (wr_returned_date_sk) +AS SELECT * FROM tpcds.tiny.web_returns; + +-- +-- Name: call_center; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS call_center USING parquet AS SELECT * FROM tpcds.tiny.call_center; + +-- +-- Name: catalog_page; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS catalog_page USING parquet AS SELECT * FROM tpcds.tiny.catalog_page; + +-- +-- Name: customer; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS customer USING parquet AS SELECT * FROM tpcds.tiny.customer; + +-- +-- Name: customer_address; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS customer_address USING parquet AS SELECT * FROM tpcds.tiny.customer_address; + +-- +-- Name: customer_demographics; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS customer_demographics USING parquet AS SELECT * FROM tpcds.tiny.customer_demographics; + +-- +-- Name: date_dim; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS date_dim USING parquet AS SELECT * FROM tpcds.tiny.date_dim; + +-- +-- Name: household_demographics; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS household_demographics USING parquet AS SELECT * FROM tpcds.tiny.household_demographics; + +-- +-- Name: income_band; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS income_band USING parquet AS SELECT * FROM tpcds.tiny.income_band; + +-- +-- Name: item; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS item USING parquet AS SELECT * FROM tpcds.tiny.item; + +-- +-- Name: promotion; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS promotion USING parquet AS SELECT * FROM tpcds.tiny.promotion; + +-- +-- Name: reason; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS reason USING parquet AS SELECT * FROM tpcds.tiny.reason; + +-- +-- Name: ship_mode; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS ship_mode USING parquet AS SELECT * FROM tpcds.tiny.ship_mode; + +-- +-- Name: store; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS store USING parquet AS SELECT * FROM tpcds.tiny.store; + +-- +-- Name: time_dim; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS time_dim USING parquet AS SELECT * FROM tpcds.tiny.time_dim; + +-- +-- Name: warehouse; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS warehouse USING parquet AS SELECT * FROM tpcds.tiny.warehouse; + +-- +-- Name: web_page; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS web_page USING parquet AS SELECT * FROM tpcds.tiny.web_page; + +-- +-- Name: web_site; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS web_site USING parquet AS SELECT * FROM tpcds.tiny.web_site; diff --git a/integration-tests/kyuubi-gluten-it/src/test/resources/load-tpch-tiny.sql b/integration-tests/kyuubi-gluten-it/src/test/resources/load-tpch-tiny.sql new file mode 100644 index 00000000000..8f2228f549c --- /dev/null +++ b/integration-tests/kyuubi-gluten-it/src/test/resources/load-tpch-tiny.sql @@ -0,0 +1,59 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +CREATE DATABASE IF NOT EXISTS spark_catalog.tpch_tiny; + +USE spark_catalog.tpch_tiny; + +-- +-- Name: customer; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS customer USING parquet AS SELECT * FROM tpch.tiny.customer; + +-- +-- Name: orders; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS orders USING parquet AS SELECT * FROM tpch.tiny.orders; + +-- +-- Name: lineitem; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS lineitem USING parquet AS SELECT * FROM tpch.tiny.lineitem; + +-- +-- Name: part; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS part USING parquet AS SELECT * FROM tpch.tiny.part; + +-- +-- Name: partsupp; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS partsupp USING parquet AS SELECT * FROM tpch.tiny.partsupp; + +-- +-- Name: supplier; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS supplier USING parquet AS SELECT * FROM tpch.tiny.supplier; + +-- +-- Name: nation; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS nation USING parquet AS SELECT * FROM tpch.tiny.nation; + +-- +-- Name: region; Type: TABLE; Tablespace: +-- +CREATE TABLE IF NOT EXISTS region USING parquet AS SELECT * FROM tpch.tiny.region; diff --git a/integration-tests/kyuubi-gluten-it/src/test/resources/log4j2-test.xml b/integration-tests/kyuubi-gluten-it/src/test/resources/log4j2-test.xml new file mode 100644 index 00000000000..3110216c17c --- /dev/null +++ b/integration-tests/kyuubi-gluten-it/src/test/resources/log4j2-test.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/GlutenSuite.scala b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/GlutenSuite.scala new file mode 100644 index 00000000000..67e9a92b66b --- /dev/null +++ b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/GlutenSuite.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.it.gluten + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession + +import org.apache.kyuubi.{GlutenSuiteMixin, KyuubiFunSuite} +import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession +import org.apache.kyuubi.tags.GlutenTest + +@GlutenTest +class GlutenSuite extends KyuubiFunSuite with GlutenSuiteMixin { + + lazy val sparkConf: SparkConf = { + val glutenConf = new SparkConf().setMaster("local[*]") + .set("spark.ui.enabled", "false") + extraConfigs.foreach { case (k, v) => glutenConf.set(k, v) } + glutenConf + } + + test("KYUUBI #5467:test gluten select") { + withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + val result = spark.sql("SELECT 1").head() + assert(result.get(0) == 1) + } + } + + test("KYUUBI #5467: test gluten plan") { + withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + val plan = spark.sql("explain SELECT 1").head().getString(0) + assert(plan.contains("VeloxColumnarToRowExec") && plan.contains( + "VeloxColumnarToRowExec") && plan.contains("RowToVeloxColumnar")) + } + } +} diff --git a/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/TPCUtils.scala b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/TPCUtils.scala new file mode 100644 index 00000000000..667a237809e --- /dev/null +++ b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/TPCUtils.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.it.gluten + +import scala.io.{Codec, Source} + +import org.apache.kyuubi.Utils + +object TPCUtils { + def loadTPCFile(resourceFile: String): String = { + val in = Utils.getContextOrKyuubiClassLoader + .getResourceAsStream(resourceFile) + val str: String = Source.fromInputStream(in)(Codec.UTF8).mkString + in.close() + str + } +} diff --git a/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/tpcds/GlutenTPCDSQuerySuite.scala b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/tpcds/GlutenTPCDSQuerySuite.scala new file mode 100644 index 00000000000..9110974a323 --- /dev/null +++ b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/tpcds/GlutenTPCDSQuerySuite.scala @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.it.gluten.tpcds + +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession +import org.scalatest.tags.Slow + +import org.apache.kyuubi.{GlutenSuiteMixin, KyuubiFunSuite} +import org.apache.kyuubi.it.gluten.TPCUtils.loadTPCFile +import org.apache.kyuubi.spark.connector.common.GoldenFileUtils.LICENSE_HEADER +import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession +import org.apache.kyuubi.spark.connector.tpcds.TPCDSCatalog +import org.apache.kyuubi.tags.GlutenTest + +@Slow +@GlutenTest +class GlutenTPCDSQuerySuite extends KyuubiFunSuite with GlutenSuiteMixin { + + val queries: Set[String] = (1 to 99).map(i => s"q$i").toSet - + ("q14", "q23", "q24", "q39") + + ("q14a", "q14b", "q23a", "q23b", "q24a", "q24b", "q39a", "q39b") - + // TODO:Fix gluten tpc-ds query test + ("q1", "q4", "q7", "q11", "q12", "q17", "q20", "q21", "q25", "q26", "q29", "q30", "q34", "q37", + "q39a", "q39b", "q40", "q43", "q46", "q49", "q56", "q58", "q59", "q60", "q68", "q73", "q74", + "q78", "q79", "q81", "q82", "q83", "q84", "q91", "q98") + lazy val sparkConf: SparkConf = { + val glutenConf = new SparkConf().setMaster("local[*]") + .set("spark.ui.enabled", "false") + .set("spark.sql.catalogImplementation", "in-memory") + .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName) + .set("spark.sql.catalog.tpcds.useTableSchema_2_6", "true") + extraConfigs.foreach { case (k, v) => glutenConf.set(k, v) } + glutenConf + } + + test("KYUUBI #5467:gluten tpc-ds tiny query suite") { + val viewSuffix = "view" + withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + loadTPDSTINY(spark) + queries.map { queryName => + queryName -> loadTPCFile(s"kyuubi/tpcds_3.2/$queryName.sql") + }.foreach { case (name, sql) => + try { + val result = spark.sql(sql).collect() + val schema = spark.sql(sql).schema + val schemaDDL = LICENSE_HEADER + schema.toDDL + "\n" + spark.createDataFrame(result.toList.asJava, schema).createTempView(s"$name$viewSuffix") + val sumHashResult = LICENSE_HEADER + spark.sql( + s"select sum(hash(*)) from $name$viewSuffix").collect().head.get(0) + "\n" + val expectHash = loadTPCFile(s"kyuubi/tpcds_3.2/$name.output.hash") + val expectSchema = loadTPCFile(s"kyuubi/tpcds_3.2/$name.output.schema") + assert(schemaDDL == expectSchema) + assert(sumHashResult == expectHash) + } catch { + case cause: Throwable => + fail(name, cause) + } + } + } + } + + def loadTPDSTINY(sc: SparkSession): Unit = { + val queryContent: String = loadTPCFile("load-tpcds-tiny.sql") + queryContent.split(";\n").filterNot(_.trim.isEmpty).foreach { sql => + sc.sql(sql) + } + } +} diff --git a/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/tpch/GlutenTPCHQuerySuite.scala b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/tpch/GlutenTPCHQuerySuite.scala new file mode 100644 index 00000000000..98b4e94489d --- /dev/null +++ b/integration-tests/kyuubi-gluten-it/src/test/scala/org/apache/kyuubi/it/gluten/tpch/GlutenTPCHQuerySuite.scala @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.it.gluten.tpch + +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession +import org.scalatest.tags.Slow + +import org.apache.kyuubi.{GlutenSuiteMixin, KyuubiFunSuite} +import org.apache.kyuubi.it.gluten.TPCUtils.loadTPCFile +import org.apache.kyuubi.spark.connector.common.GoldenFileUtils.LICENSE_HEADER +import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession +import org.apache.kyuubi.spark.connector.tpch.TPCHCatalog +import org.apache.kyuubi.tags.GlutenTest + +@Slow +@GlutenTest +class GlutenTPCHQuerySuite extends KyuubiFunSuite with GlutenSuiteMixin { + // TODO: Fix the inconsistency in q9 results. + val queries: Set[String] = (1 to 22).map(i => s"q$i").toSet - "q9" + + lazy val sparkConf: SparkConf = { + val glutenConf = new SparkConf().setMaster("local[*]") + .set("spark.ui.enabled", "false") + .set("spark.sql.catalogImplementation", "in-memory") + .set("spark.sql.catalog.tpch", classOf[TPCHCatalog].getName) + extraConfigs.foreach { case (k, v) => glutenConf.set(k, v) } + glutenConf + } + + test("KYUUBI #5467:gluten tpc-h tiny query suite") { + val viewSuffix = "view" + withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { spark => + loadTPCHTINY(spark) + queries.map { queryName => + queryName -> loadTPCFile(s"kyuubi/tpch/$queryName.sql") + }.foreach { case (name, sql) => + val result = spark.sql(sql).collect() + val schema = spark.sql(sql).schema + val schemaDDL = LICENSE_HEADER + schema.toDDL + "\n" + spark.createDataFrame(result.toList.asJava, schema).createTempView(s"$name$viewSuffix") + val sumHashResult = LICENSE_HEADER + spark.sql( + s"select sum(hash(*)) from $name$viewSuffix").collect().head.get(0) + "\n" + val expectHash = loadTPCFile(s"kyuubi/tpch/$name.output.hash") + val expectSchema = loadTPCFile(s"kyuubi/tpch/$name.output.schema") + assert(schemaDDL == expectSchema, s"query $name schema not match") + assert(sumHashResult == expectHash, s"query $name result not match") + } + } + } + + def loadTPCHTINY(sc: SparkSession): Unit = { + val queryContent: String = loadTPCFile("load-tpch-tiny.sql") + queryContent.split(";\n").filterNot(_.trim.isEmpty).foreach { sql => + sc.sql(sql) + } + } +} diff --git a/integration-tests/kyuubi-hive-it/src/test/scala/org/apache/kyuubi/it/hive/operation/KyuubiOperationHiveEnginePerUserSuite.scala b/integration-tests/kyuubi-hive-it/src/test/scala/org/apache/kyuubi/it/hive/operation/KyuubiOperationHiveEnginePerUserSuite.scala index 07e2bc0f2c7..fd9e76bc3d4 100644 --- a/integration-tests/kyuubi-hive-it/src/test/scala/org/apache/kyuubi/it/hive/operation/KyuubiOperationHiveEnginePerUserSuite.scala +++ b/integration-tests/kyuubi-hive-it/src/test/scala/org/apache/kyuubi/it/hive/operation/KyuubiOperationHiveEnginePerUserSuite.scala @@ -17,11 +17,10 @@ package org.apache.kyuubi.it.hive.operation -import org.apache.hive.service.rpc.thrift.{TGetInfoReq, TGetInfoType} - import org.apache.kyuubi.{HiveEngineTests, Utils, WithKyuubiServer} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TGetInfoReq, TGetInfoType} class KyuubiOperationHiveEnginePerUserSuite extends WithKyuubiServer with HiveEngineTests { @@ -62,6 +61,20 @@ class KyuubiOperationHiveEnginePerUserSuite extends WithKyuubiServer with HiveEn } } + test("[KYUUBI #5865] Hive engine CLI_ODBC_KEYWORDS") { + withSessionConf(Map(KyuubiConf.SERVER_INFO_PROVIDER.key -> "ENGINE"))()() { + withSessionHandle { (client, handle) => + val req = new TGetInfoReq() + req.setSessionHandle(handle) + req.setInfoType(TGetInfoType.CLI_ODBC_KEYWORDS) + val value = client.GetInfo(req).getInfoValue.getStringValue + assert(value.contains("DATABASE") || value === "Unimplemented") + // excluded keywords + assert(!value.contains("ADD")) + } + } + } + test("kyuubi defined function - system_user, session_user") { withJdbcStatement("hive_engine_test") { statement => val rs = statement.executeQuery("SELECT system_user(), session_user()") diff --git a/integration-tests/kyuubi-jdbc-it/pom.xml b/integration-tests/kyuubi-jdbc-it/pom.xml index 95ffd2038c1..7921d94e217 100644 --- a/integration-tests/kyuubi-jdbc-it/pom.xml +++ b/integration-tests/kyuubi-jdbc-it/pom.xml @@ -78,6 +78,24 @@ testcontainers-scala-scalatest_${scala.binary.version} test + + + com.dimafeng + testcontainers-scala-mysql_${scala.binary.version} + test + + + + com.mysql + mysql-connector-j + test + + + + com.dimafeng + testcontainers-scala-postgresql_${scala.binary.version} + test + @@ -108,6 +126,13 @@ true ${project.build.directory} + + org.postgresql + postgresql + ${postgresql.version} + true + ${project.build.directory} + diff --git a/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/mysql/OperationWithServerSuite.scala b/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/mysql/OperationWithServerSuite.scala new file mode 100644 index 00000000000..263de3d1528 --- /dev/null +++ b/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/mysql/OperationWithServerSuite.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.it.jdbc.mysql + +import org.apache.kyuubi.engine.jdbc.mysql.MySQLOperationSuite + +class OperationWithServerSuite extends MySQLOperationSuite + with WithKyuubiServerAndMySQLContainer { + + override protected def jdbcUrl: String = getJdbcUrl + +} diff --git a/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/mysql/WithKyuubiServerAndMySQLContainer.scala b/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/mysql/WithKyuubiServerAndMySQLContainer.scala new file mode 100644 index 00000000000..da94df8e799 --- /dev/null +++ b/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/mysql/WithKyuubiServerAndMySQLContainer.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.it.jdbc.mysql + +import java.nio.file.{Files, Path, Paths} + +import org.apache.kyuubi.{Utils, WithKyuubiServer} +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.config.KyuubiConf.{ENGINE_JDBC_EXTRA_CLASSPATH, KYUUBI_ENGINE_ENV_PREFIX, KYUUBI_HOME} +import org.apache.kyuubi.engine.jdbc.mysql.WithMySQLEngine + +trait WithKyuubiServerAndMySQLContainer extends WithKyuubiServer with WithMySQLEngine { + + private val kyuubiHome: String = Utils + .getCodeSourceLocation(getClass).split("integration-tests").head + + private val mysqlJdbcConnectorPath: String = { + val keyword = "mysql-connector" + + val jarsDir = Paths.get(kyuubiHome) + .resolve("integration-tests") + .resolve("kyuubi-jdbc-it") + .resolve("target") + + Files.list(jarsDir) + .filter { p: Path => p.getFileName.toString contains keyword } + .findFirst + .orElseThrow { () => new IllegalStateException(s"Can not find $keyword in $jarsDir.") } + .toAbsolutePath + .toString + } + + override protected val conf: KyuubiConf = { + KyuubiConf() + .set(s"$KYUUBI_ENGINE_ENV_PREFIX.$KYUUBI_HOME", kyuubiHome) + .set(ENGINE_JDBC_EXTRA_CLASSPATH, mysqlJdbcConnectorPath) + } + + override def beforeAll(): Unit = { + val configs = withKyuubiConf + configs.foreach(config => conf.set(config._1, config._2)) + super.beforeAll() + } +} diff --git a/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/postgresql/OperationWithServerSuite.scala b/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/postgresql/OperationWithServerSuite.scala new file mode 100644 index 00000000000..41c31d38585 --- /dev/null +++ b/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/postgresql/OperationWithServerSuite.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.it.jdbc.postgresql + +import org.apache.kyuubi.engine.jdbc.postgresql.PostgreSQLOperationSuite + +class OperationWithServerSuite extends PostgreSQLOperationSuite + with WithKyuubiServerAndPostgreSQLContainer { + + override protected def jdbcUrl: String = getJdbcUrl + +} diff --git a/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/postgresql/SessionWithServerSuite.scala b/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/postgresql/SessionWithServerSuite.scala new file mode 100644 index 00000000000..79f34c79a47 --- /dev/null +++ b/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/postgresql/SessionWithServerSuite.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.it.jdbc.postgresql + +import org.apache.kyuubi.engine.jdbc.postgresql.SessionSuite + +class SessionWithServerSuite extends SessionSuite + with WithKyuubiServerAndPostgreSQLContainer { + + override protected def jdbcUrl: String = getJdbcUrl + +} diff --git a/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/postgresql/StatementWithServerSuite.scala b/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/postgresql/StatementWithServerSuite.scala new file mode 100644 index 00000000000..1c309371f74 --- /dev/null +++ b/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/postgresql/StatementWithServerSuite.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.it.jdbc.postgresql + +import org.apache.kyuubi.engine.jdbc.postgresql.StatementSuite + +class StatementWithServerSuite extends StatementSuite + with WithKyuubiServerAndPostgreSQLContainer { + + override protected def jdbcUrl: String = getJdbcUrl + +} diff --git a/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/postgresql/WithKyuubiServerAndPostgreSQLContainer.scala b/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/postgresql/WithKyuubiServerAndPostgreSQLContainer.scala new file mode 100644 index 00000000000..2e75d516a61 --- /dev/null +++ b/integration-tests/kyuubi-jdbc-it/src/test/scala/org/apache/kyuubi/it/jdbc/postgresql/WithKyuubiServerAndPostgreSQLContainer.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.it.jdbc.postgresql + +import java.nio.file.{Files, Path, Paths} +import java.time.Duration + +import org.apache.kyuubi.{Utils, WithKyuubiServer} +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.config.KyuubiConf.{ENGINE_IDLE_TIMEOUT, ENGINE_JDBC_EXTRA_CLASSPATH, KYUUBI_ENGINE_ENV_PREFIX, KYUUBI_HOME} +import org.apache.kyuubi.engine.jdbc.postgresql.WithPostgreSQLEngine + +trait WithKyuubiServerAndPostgreSQLContainer extends WithKyuubiServer with WithPostgreSQLEngine { + + private val kyuubiHome: String = Utils + .getCodeSourceLocation(getClass).split("integration-tests").head + + private val postgresqlJdbcConnectorPath: String = { + val keyword = "postgresql" + + val jarsDir = Paths.get(kyuubiHome) + .resolve("integration-tests") + .resolve("kyuubi-jdbc-it") + .resolve("target") + + Files.list(jarsDir) + .filter { p: Path => p.getFileName.toString contains keyword } + .findFirst + .orElseThrow { () => new IllegalStateException(s"Can not find $keyword in $jarsDir.") } + .toAbsolutePath + .toString + } + + override protected val conf: KyuubiConf = { + KyuubiConf() + .set(s"$KYUUBI_ENGINE_ENV_PREFIX.$KYUUBI_HOME", kyuubiHome) + .set(ENGINE_JDBC_EXTRA_CLASSPATH, postgresqlJdbcConnectorPath) + .set(ENGINE_IDLE_TIMEOUT, Duration.ofMinutes(1).toMillis) + } + + override def beforeAll(): Unit = { + val configs = withKyuubiConf + configs.foreach(config => conf.set(config._1, config._2)) + super.beforeAll() + } +} diff --git a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/deployment/KyuubiOnKubernetesTestsSuite.scala b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/deployment/KyuubiOnKubernetesTestsSuite.scala index 95e15e6ebdd..9d47ab99815 100644 --- a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/deployment/KyuubiOnKubernetesTestsSuite.scala +++ b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/deployment/KyuubiOnKubernetesTestsSuite.scala @@ -55,7 +55,7 @@ class KyuubiOnKubernetesWithSparkTestsBase extends WithKyuubiServerOnKubernetes Map( "spark.master" -> s"k8s://$miniKubeApiMaster", // We should update spark docker image in ./github/workflows/master.yml at the same time - "spark.kubernetes.container.image" -> "apache/spark:3.4.1", + "spark.kubernetes.container.image" -> "apache/spark:3.4.2", "spark.kubernetes.container.image.pullPolicy" -> "IfNotPresent", "spark.executor.memory" -> "512M", "spark.driver.memory" -> "1024M", diff --git a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala index 09532efe3d1..ea804575ecb 100644 --- a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala +++ b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala @@ -50,7 +50,7 @@ abstract class SparkOnKubernetesSuiteBase // TODO Support more Spark version // Spark official docker image: https://hub.docker.com/r/apache/spark/tags KyuubiConf().set("spark.master", s"k8s://$apiServerAddress") - .set("spark.kubernetes.container.image", "apache/spark:3.4.1") + .set("spark.kubernetes.container.image", "apache/spark:3.4.2") .set("spark.kubernetes.container.image.pullPolicy", "IfNotPresent") .set("spark.executor.instances", "1") .set("spark.executor.memory", "512M") diff --git a/integration-tests/pom.xml b/integration-tests/pom.xml index 35d0b4f9ea7..d28f391b4c2 100644 --- a/integration-tests/pom.xml +++ b/integration-tests/pom.xml @@ -31,6 +31,7 @@ kyuubi-flink-it + kyuubi-gluten-it kyuubi-hive-it kyuubi-trino-it kyuubi-jdbc-it diff --git a/kyuubi-assembly/pom.xml b/kyuubi-assembly/pom.xml index 4fa0d9a0fd3..a853ac7f5e6 100644 --- a/kyuubi-assembly/pom.xml +++ b/kyuubi-assembly/pom.xml @@ -70,7 +70,7 @@ org.apache.kyuubi - ${kyuubi-shaded-zookeeper.artifacts} + ${kyuubi-relocated-zookeeper.artifacts} diff --git a/kyuubi-common/pom.xml b/kyuubi-common/pom.xml index 0d5c491b51c..c9d32b148ff 100644 --- a/kyuubi-common/pom.xml +++ b/kyuubi-common/pom.xml @@ -99,18 +99,8 @@ - org.apache.thrift - libfb303 - - - - org.apache.thrift - libthrift - - - - org.apache.hive - hive-service-rpc + org.apache.kyuubi + kyuubi-relocated-hive-service-rpc diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/KyuubiSQLException.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/KyuubiSQLException.scala index 570ee6d3873..42579fb962f 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/KyuubiSQLException.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/KyuubiSQLException.scala @@ -23,9 +23,8 @@ import java.sql.SQLException import scala.annotation.tailrec import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift.{TStatus, TStatusCode} - import org.apache.kyuubi.Utils.stringifyException +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TStatus, TStatusCode} import org.apache.kyuubi.util.reflect.DynConstructors /** diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala index accfca4c98f..896ed9df29d 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala @@ -40,6 +40,7 @@ import org.apache.hadoop.util.ShutdownHookManager import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.internal.Tests.IS_TESTING +import org.apache.kyuubi.util.command.CommandLineUtils._ object Utils extends Logging { @@ -325,7 +326,7 @@ object Utils extends Logging { require(args.length % 2 == 0, s"Illegal size of arguments.") for (i <- args.indices by 2) { require( - args(i) == "--conf", + args(i) == CONF, s"Unrecognized main arguments prefix ${args(i)}," + s"the argument format is '--conf k=v'.") @@ -336,25 +337,24 @@ object Utils extends Logging { } } - val REDACTION_REPLACEMENT_TEXT = "*********(redacted)" - - private val PATTERN_FOR_KEY_VALUE_ARG = "(.+?)=(.+)".r - - def redactCommandLineArgs(conf: KyuubiConf, commands: Array[String]): Array[String] = { - val redactionPattern = conf.get(SERVER_SECRET_REDACTION_PATTERN) - var nextKV = false - commands.map { - case PATTERN_FOR_KEY_VALUE_ARG(key, value) if nextKV => - val (_, newValue) = redact(redactionPattern, Seq((key, value))).head - nextKV = false - s"$key=$newValue" - - case cmd if cmd == "--conf" => - nextKV = true - cmd - - case cmd => - cmd + def redactCommandLineArgs(conf: KyuubiConf, commands: Iterable[String]): Iterable[String] = { + conf.get(SERVER_SECRET_REDACTION_PATTERN) match { + case Some(redactionPattern) => + var nextKV = false + commands.map { + case PATTERN_FOR_KEY_VALUE_ARG(key, value) if nextKV => + val (_, newValue) = redact(redactionPattern, Seq((key, value))).head + nextKV = false + genKeyValuePair(key, newValue) + + case cmd if cmd == CONF => + nextKV = true + cmd + + case cmd => + cmd + } + case _ => commands } } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/cli/Handle.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/cli/Handle.scala index dd1ccb8fd90..2e90b105ec1 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/cli/Handle.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/cli/Handle.scala @@ -20,7 +20,7 @@ package org.apache.kyuubi.cli import java.nio.ByteBuffer import java.util.UUID -import org.apache.hive.service.rpc.thrift.THandleIdentifier +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.THandleIdentifier private[kyuubi] object Handle { final private val SECRET_ID = UUID.fromString("c2ee5b97-3ea0-41fc-ac16-9bd708ed8f38") diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala index a5c0aee0a32..784b82cddeb 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala @@ -1231,6 +1231,54 @@ object KyuubiConf { .checkValue(_ > 0, "must be positive number") .createWithDefault(Duration.ofMinutes(5).toMillis) + val KUBERNETES_SPARK_CLEANUP_TERMINATED_DRIVER_POD_KIND_CHECK_INTERVAL: ConfigEntry[Long] = + buildConf("kyuubi.kubernetes.spark.cleanupTerminatedDriverPod.checkInterval") + .doc("Kyuubi server use guava cache as the cleanup trigger with time-based eviction, " + + "but the eviction would not happened until any get/put operation happened. " + + "This option schedule a daemon thread evict cache periodically.") + .version("1.8.1") + .timeConf + .createWithDefaultString("PT1M") + + val KUBERNETES_SPARK_CLEANUP_TERMINATED_DRIVER_POD_KIND: ConfigEntry[String] = + buildConf("kyuubi.kubernetes.spark.cleanupTerminatedDriverPod.kind") + .doc("Kyuubi server will delete the spark driver pod after " + + s"the application terminates for ${KUBERNETES_TERMINATED_APPLICATION_RETAIN_PERIOD.key}. " + + "Available options are NONE, ALL, COMPLETED and " + + "default value is None which means none of the pod will be deleted") + .version("1.8.1") + .stringConf + .createWithDefault(KubernetesCleanupDriverPodStrategy.NONE.toString) + + object KubernetesCleanupDriverPodStrategy extends Enumeration { + type KubernetesCleanupDriverPodStrategy = Value + val NONE, ALL, COMPLETED = Value + } + + val KUBERNETES_APPLICATION_STATE_CONTAINER: ConfigEntry[String] = + buildConf("kyuubi.kubernetes.application.state.container") + .doc("The container name to retrieve the application state from.") + .version("1.8.1") + .stringConf + .createWithDefault("spark-kubernetes-driver") + + val KUBERNETES_APPLICATION_STATE_SOURCE: ConfigEntry[String] = + buildConf("kyuubi.kubernetes.application.state.source") + .doc("The source to retrieve the application state from. The valid values are " + + "pod and container. If the source is container and there is container inside the pod " + + s"with the name of ${KUBERNETES_APPLICATION_STATE_CONTAINER.key}, the application state " + + s"will be from the matched container state. " + + s"Otherwise, the application state will be from the pod state.") + .version("1.8.1") + .stringConf + .checkValues(KubernetesApplicationStateSource) + .createWithDefault(KubernetesApplicationStateSource.POD.toString) + + object KubernetesApplicationStateSource extends Enumeration { + type KubernetesApplicationStateSource = Value + val POD, CONTAINER = Value + } + // /////////////////////////////////////////////////////////////////////////////////////////////// // SQL Engine Configuration // // /////////////////////////////////////////////////////////////////////////////////////////////// @@ -1426,7 +1474,7 @@ object KyuubiConf { val ENGINE_ALIVE_MAX_FAILURES: ConfigEntry[Int] = buildConf("kyuubi.session.engine.alive.max.failures") .doc("The maximum number of failures allowed for the engine.") - .version("1.8.0") + .version("1.8.1") .intConf .checkValue(_ > 0, "Must be positive") .createWithDefault(3) @@ -1468,6 +1516,22 @@ object KyuubiConf { .timeConf .createWithDefault(Duration.ofSeconds(10).toMillis) + object EngineOpenOnFailure extends Enumeration { + type EngineOpenOnFailure = Value + val RETRY, DEREGISTER_IMMEDIATELY, DEREGISTER_AFTER_RETRY = Value + } + + val ENGINE_OPEN_ON_FAILURE: ConfigEntry[String] = + buildConf("kyuubi.session.engine.open.onFailure") + .doc("The behavior when opening engine failed:
      " + + s"
    • RETRY: retry to open engine for ${ENGINE_OPEN_MAX_ATTEMPTS.key} times.
    • " + + "
    • DEREGISTER_IMMEDIATELY: deregister the engine immediately.
    • " + + "
    • DEREGISTER_AFTER_RETRY: deregister the engine after retry to open engine for " + + s"${ENGINE_OPEN_MAX_ATTEMPTS.key} times.
    ") + .version("1.8.1") + .stringConf + .createWithDefault(EngineOpenOnFailure.RETRY.toString) + val ENGINE_INIT_TIMEOUT: ConfigEntry[Long] = buildConf("kyuubi.session.engine.initialize.timeout") .doc("Timeout for starting the background engine, e.g. SparkSQLEngine.") .version("1.0.0") @@ -1893,6 +1957,33 @@ object KyuubiConf { .intConf .createWithDefault(0) + val OPERATION_RESULT_SAVE_TO_FILE: ConfigEntry[Boolean] = + buildConf("kyuubi.operation.result.saveToFile.enabled") + .doc("The switch for Spark query result save to file.") + .version("1.9.0") + .booleanConf + .createWithDefault(false) + + val OPERATION_RESULT_SAVE_TO_FILE_DIR: ConfigEntry[String] = + buildConf("kyuubi.operation.result.saveToFile.dir") + .doc("The Spark query result save dir, it should be a public accessible to every engine." + + " Results are saved in ORC format, and the directory structure is" + + " `/OPERATION_RESULT_SAVE_TO_FILE_DIR/engineId/sessionId/statementId`." + + " Each query result will delete when query finished.") + .version("1.9.0") + .stringConf + .createWithDefault("/tmp/kyuubi/tmp_kyuubi_result") + + val OPERATION_RESULT_SAVE_TO_FILE_MINSIZE: ConfigEntry[Long] = + buildConf("kyuubi.operation.result.saveToFile.minSize") + .doc("The minSize of Spark result save to file, default value is 200 MB." + + "we use spark's `EstimationUtils#getSizePerRowestimate` to estimate" + + " the output size of the execution plan.") + .version("1.9.0") + .longConf + .checkValue(_ > 0, "must be positive value") + .createWithDefault(200 * 1024 * 1024) + val OPERATION_INCREMENTAL_COLLECT: ConfigEntry[Boolean] = buildConf("kyuubi.operation.incremental.collect") .internal @@ -1932,6 +2023,16 @@ object KyuubiConf { .stringConf .createWithDefault("server_operation_logs") + val PROXY_USER: OptionalConfigEntry[String] = + buildConf("kyuubi.session.proxy.user") + .doc("An alternative to hive.server2.proxy.user. " + + "The current behavior is consistent with hive.server2.proxy.user " + + "and now only takes effect in RESTFul API. " + + "When both parameters are set, kyuubi.session.proxy.user takes precedence.") + .version("1.9.0") + .stringConf + .createOptional + @deprecated("using kyuubi.engine.share.level instead", "1.2.0") val LEGACY_ENGINE_SHARE_LEVEL: ConfigEntry[String] = buildConf("kyuubi.session.engine.share.level") @@ -2015,7 +2116,7 @@ object KyuubiConf { " all the capacity of the Hive Server2." + "
  • JDBC: specify this engine type will launch a JDBC engine which can forward " + " queries to the database system through the certain JDBC driver, " + - " for now, it supports Doris and Phoenix.
  • " + + " for now, it supports Doris, MySQL, Phoenix, PostgreSQL and StarRocks." + "
  • CHAT: specify this engine type will launch a Chat engine.
  • " + "") .version("1.4.0") @@ -2091,6 +2192,13 @@ object KyuubiConf { .toSequence(";") .createWithDefault(Nil) + val ENGINE_SESSION_FLINK_INITIALIZE_SQL: ConfigEntry[Seq[String]] = + buildConf("kyuubi.session.engine.flink.initialize.sql") + .doc("The initialize sql for Flink session. " + + "It fallback to `kyuubi.engine.session.initialize.sql`") + .version("1.8.1") + .fallbackConf(ENGINE_SESSION_INITIALIZE_SQL) + val ENGINE_DEREGISTER_EXCEPTION_CLASSES: ConfigEntry[Set[String]] = buildConf("kyuubi.engine.deregister.exception.classes") .doc("A comma-separated list of exception classes. If there is any exception thrown," + @@ -2468,14 +2576,15 @@ object KyuubiConf { .checkValues(OperationLanguages) .createWithDefault(OperationLanguages.SQL.toString) - val SESSION_CONF_ADVISOR: OptionalConfigEntry[String] = + val SESSION_CONF_ADVISOR: OptionalConfigEntry[Seq[String]] = buildConf("kyuubi.session.conf.advisor") - .doc("A config advisor plugin for Kyuubi Server. This plugin can provide some custom " + + .doc("A config advisor plugin for Kyuubi Server. This plugin can provide a list of custom " + "configs for different users or session configs and overwrite the session configs before " + "opening a new session. This config value should be a subclass of " + "`org.apache.kyuubi.plugin.SessionConfAdvisor` which has a zero-arg constructor.") .version("1.5.0") .stringConf + .toSequence() .createOptional val GROUP_PROVIDER: ConfigEntry[String] = @@ -2533,6 +2642,13 @@ object KyuubiConf { .stringConf .createWithDefault("yyyy-MM-dd HH:mm:ss.SSS") + val ENGINE_SESSION_SPARK_INITIALIZE_SQL: ConfigEntry[Seq[String]] = + buildConf("kyuubi.session.engine.spark.initialize.sql") + .doc("The initialize sql for Spark session. " + + "It fallback to `kyuubi.engine.session.initialize.sql`") + .version("1.8.1") + .fallbackConf(ENGINE_SESSION_INITIALIZE_SQL) + val ENGINE_TRINO_MEMORY: ConfigEntry[String] = buildConf("kyuubi.engine.trino.memory") .doc("The heap memory for the Trino query engine") @@ -2607,6 +2723,12 @@ object KyuubiConf { .stringConf .createOptional + val ENGINE_FLINK_INITIALIZE_SQL: ConfigEntry[Seq[String]] = + buildConf("kyuubi.engine.flink.initialize.sql") + .doc("The initialize sql for Flink engine. It fallback to `kyuubi.engine.initialize.sql`.") + .version("1.8.1") + .fallbackConf(ENGINE_INITIALIZE_SQL) + val SERVER_LIMIT_CONNECTIONS_PER_USER: OptionalConfigEntry[Int] = buildConf("kyuubi.server.limit.connections.per.user") .doc("Maximum kyuubi server connections per user." + @@ -2717,7 +2839,9 @@ object KyuubiConf { val SERVER_ADMINISTRATORS: ConfigEntry[Set[String]] = buildConf("kyuubi.server.administrators") .doc("Comma-separated list of Kyuubi service administrators. " + - "We use this config to grant admin permission to any service accounts.") + "We use this config to grant admin permission to any service accounts when " + + s"security mechanism is enabled. Note, when ${AUTHENTICATION_METHOD.key} is " + + "configured to NOSASL or NONE, everyone is treated as administrator.") .version("1.8.0") .serverOnly .stringConf @@ -2777,9 +2901,31 @@ object KyuubiConf { val ENGINE_JDBC_CONNECTION_PROVIDER: OptionalConfigEntry[String] = buildConf("kyuubi.engine.jdbc.connection.provider") - .doc("The connection provider is used for getting a connection from the server") + .doc("A JDBC connection provider plugin for the Kyuubi Server " + + "to establish a connection to the JDBC URL." + + " The configuration value should be a subclass of " + + "`org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider`. " + + "Kyuubi provides the following built-in implementations: " + + "
  • doris: For establishing Doris connections.
  • " + + "
  • mysql: For establishing MySQL connections.
  • " + + "
  • phoenix: For establishing Phoenix connections.
  • " + + "
  • postgresql: For establishing PostgreSQL connections.
  • " + + "
  • starrocks: For establishing StarRocks connections.
  • ") .version("1.6.0") .stringConf + .transform { + case "Doris" | "doris" | "DorisConnectionProvider" => + "org.apache.kyuubi.engine.jdbc.doris.DorisConnectionProvider" + case "MySQL" | "mysql" | "MySQLConnectionProvider" => + "org.apache.kyuubi.engine.jdbc.mysql.MySQLConnectionProvider" + case "Phoenix" | "phoenix" | "PhoenixConnectionProvider" => + "org.apache.kyuubi.engine.jdbc.phoenix.PhoenixConnectionProvider" + case "PostgreSQL" | "postgresql" | "PostgreSQLConnectionProvider" => + "org.apache.kyuubi.engine.jdbc.postgresql.PostgreSQLConnectionProvider" + case "StarRocks" | "starrocks" | "StarRocksConnectionProvider" => + "org.apache.kyuubi.engine.jdbc.starrocks.StarRocksConnectionProvider" + case other => other + } .createOptional val ENGINE_JDBC_SHORT_NAME: OptionalConfigEntry[String] = @@ -2807,6 +2953,13 @@ object KyuubiConf { .toSequence(";") .createWithDefault(Nil) + val ENGINE_JDBC_FETCH_SIZE: ConfigEntry[Int] = + buildConf("kyuubi.engine.jdbc.fetch.size") + .doc("The fetch size of JDBC engine") + .version("1.9.0") + .intConf + .createWithDefault(1000) + val ENGINE_OPERATION_CONVERT_CATALOG_DATABASE_ENABLED: ConfigEntry[Boolean] = buildConf("kyuubi.engine.operation.convert.catalog.database.enabled") .doc("When set to true, The engine converts the JDBC methods of set/get Catalog " + @@ -2952,12 +3105,15 @@ object KyuubiConf { .doc("The provider for the Chat engine. Candidates:
      " + "
    • ECHO: simply replies a welcome message.
    • " + "
    • GPT: a.k.a ChatGPT, powered by OpenAI.
    • " + + "
    • ERNIE: ErnieBot, powered by Baidu.
    • " + "
    ") .version("1.8.0") .stringConf .transform { case "ECHO" | "echo" => "org.apache.kyuubi.engine.chat.provider.EchoProvider" case "GPT" | "gpt" | "ChatGPT" => "org.apache.kyuubi.engine.chat.provider.ChatGPTProvider" + case "ERNIE" | "ernie" | "ErnieBot" => + "org.apache.kyuubi.engine.chat.provider.ErnieBotProvider" case other => other } .createWithDefault("ECHO") @@ -2978,6 +3134,23 @@ object KyuubiConf { .stringConf .createWithDefault("gpt-3.5-turbo") + val ENGINE_ERNIE_BOT_ACCESS_TOKEN: OptionalConfigEntry[String] = + buildConf("kyuubi.engine.chat.ernie.token") + .doc("The token to access ernie bot open API, which could be got at " + + "https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Ilkkrb0i5") + .version("1.9.0") + .stringConf + .createOptional + + val ENGINE_ERNIE_BOT_MODEL: ConfigEntry[String] = + buildConf("kyuubi.engine.chat.ernie.model") + .doc("ID of the model used in ernie bot. " + + "Available models are completions_pro, ernie_bot_8k, completions and eb-instant" + + "[Model overview](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/6lp69is2a).") + .version("1.9.0") + .stringConf + .createWithDefault("completions") + val ENGINE_CHAT_EXTRA_CLASSPATH: OptionalConfigEntry[String] = buildConf("kyuubi.engine.chat.extra.classpath") .doc("The extra classpath for the Chat engine, for configuring the location " + @@ -2993,6 +3166,13 @@ object KyuubiConf { .stringConf .createOptional + val ENGINE_ERNIE_BOT_HTTP_PROXY: OptionalConfigEntry[String] = + buildConf("kyuubi.engine.chat.ernie.http.proxy") + .doc("HTTP proxy url for API calling in ernie bot engine. e.g. http://127.0.0.1:1088") + .version("1.9.0") + .stringConf + .createOptional + val ENGINE_CHAT_GPT_HTTP_CONNECT_TIMEOUT: ConfigEntry[Long] = buildConf("kyuubi.engine.chat.gpt.http.connect.timeout") .doc("The timeout[ms] for establishing the connection with the Chat GPT server. " + @@ -3002,6 +3182,15 @@ object KyuubiConf { .checkValue(_ >= 0, "must be 0 or positive number") .createWithDefault(Duration.ofSeconds(120).toMillis) + val ENGINE_ERNIE_HTTP_CONNECT_TIMEOUT: ConfigEntry[Long] = + buildConf("kyuubi.engine.chat.ernie.http.connect.timeout") + .doc("The timeout[ms] for establishing the connection with the ernie bot server. " + + "A timeout value of zero is interpreted as an infinite timeout.") + .version("1.9.0") + .timeConf + .checkValue(_ >= 0, "must be 0 or positive number") + .createWithDefault(Duration.ofSeconds(120).toMillis) + val ENGINE_CHAT_GPT_HTTP_SOCKET_TIMEOUT: ConfigEntry[Long] = buildConf("kyuubi.engine.chat.gpt.http.socket.timeout") .doc("The timeout[ms] for waiting for data packets after Chat GPT server " + @@ -3011,6 +3200,15 @@ object KyuubiConf { .checkValue(_ >= 0, "must be 0 or positive number") .createWithDefault(Duration.ofSeconds(120).toMillis) + val ENGINE_ERNIE_HTTP_SOCKET_TIMEOUT: ConfigEntry[Long] = + buildConf("kyuubi.engine.chat.ernie.http.socket.timeout") + .doc("The timeout[ms] for waiting for data packets after ernie bot server " + + "connection is established. A timeout value of zero is interpreted as an infinite timeout.") + .version("1.9.0") + .timeConf + .checkValue(_ >= 0, "must be 0 or positive number") + .createWithDefault(Duration.ofSeconds(120).toMillis) + val ENGINE_JDBC_MEMORY: ConfigEntry[String] = buildConf("kyuubi.engine.jdbc.memory") .doc("The heap memory for the JDBC query engine") @@ -3067,6 +3265,29 @@ object KyuubiConf { .stringConf .createWithDefault("bin/python") + val ENGINE_SPARK_PYTHON_MAGIC_ENABLED: ConfigEntry[Boolean] = + buildConf("kyuubi.engine.spark.python.magic.enabled") + .internal + .doc("Whether to enable pyspark magic node, which is helpful for notebook." + + " See details in KYUUBI #5877") + .version("1.9.0") + .booleanConf + .createWithDefault(true) + + object EngineSparkOutputMode extends Enumeration { + type EngineSparkOutputMode = Value + val AUTO, NOTEBOOK = Value + } + + val ENGINE_SPARK_OUTPUT_MODE: ConfigEntry[String] = + buildConf("kyuubi.engine.spark.output.mode") + .doc("The output mode of Spark engine:
      " + + "
    • AUTO: For PySpark, the extracted `text/plain` from python response as output.
    • " + + "
    • NOTEBOOK: For PySpark, the original python response as output.
    ") + .version("1.9.0") + .stringConf + .createWithDefault(EngineSparkOutputMode.AUTO.toString) + val ENGINE_SPARK_REGISTER_ATTRIBUTES: ConfigEntry[Seq[String]] = buildConf("kyuubi.engine.spark.register.attributes") .internal @@ -3076,6 +3297,12 @@ object KyuubiConf { .toSequence() .createWithDefault(Seq("spark.driver.memory", "spark.executor.memory")) + val ENGINE_SPARK_INITIALIZE_SQL: ConfigEntry[Seq[String]] = + buildConf("kyuubi.engine.spark.initialize.sql") + .doc("The initialize sql for Spark engine. It fallback to `kyuubi.engine.initialize.sql`.") + .version("1.8.1") + .fallbackConf(ENGINE_INITIALIZE_SQL) + val ENGINE_HIVE_EVENT_LOGGERS: ConfigEntry[Seq[String]] = buildConf("kyuubi.engine.hive.event.loggers") .doc("A comma-separated list of engine history loggers, where engine/session/operation etc" + @@ -3150,4 +3377,24 @@ object KyuubiConf { .serverOnly .intConf .createOptional + + val KUBERNETES_FORCIBLY_REWRITE_DRIVER_POD_NAME: ConfigEntry[Boolean] = + buildConf("kyuubi.kubernetes.spark.forciblyRewriteDriverPodName.enabled") + .doc("Whether to forcibly rewrite Spark driver pod name with 'kyuubi--driver'. " + + "If disabled, Kyuubi will try to preserve the application name while satisfying K8s' " + + "pod name policy, but some vendors may have stricter pod name policies, thus the " + + "generated name may become illegal.") + .version("1.8.1") + .booleanConf + .createWithDefault(false) + + val KUBERNETES_FORCIBLY_REWRITE_EXEC_POD_NAME_PREFIX: ConfigEntry[Boolean] = + buildConf("kyuubi.kubernetes.spark.forciblyRewriteExecutorPodNamePrefix.enabled") + .doc("Whether to forcibly rewrite Spark executor pod name prefix with 'kyuubi-'. " + + "If disabled, Kyuubi will try to preserve the application name while satisfying K8s' " + + "pod name policy, but some vendors may have stricter Pod name policies, thus the " + + "generated name may become illegal.") + .version("1.8.1") + .booleanConf + .createWithDefault(false) } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/result/TColumnGenerator.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/result/TColumnGenerator.scala new file mode 100644 index 00000000000..e2c8f1ea6e5 --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/result/TColumnGenerator.scala @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.result +import java.lang.{Boolean => JBoolean, Byte => JByte, Double => JDouble, Float => JFloat, Long => JLong, Short => JShort} +import java.nio.ByteBuffer +import java.util.{ArrayList => JArrayList, BitSet => JBitSet, List => JList} + +import scala.collection.JavaConverters._ + +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ + +trait TColumnGenerator[RowT] extends TRowSetColumnGetter[RowT] { + protected def getColumnToList[T]( + rows: Seq[RowT], + ordinal: Int, + defaultVal: T, + convertFunc: (RowT, Int) => T = null): (JList[T], ByteBuffer) = { + val rowSize = rows.length + val ret = new JArrayList[T](rowSize) + val nulls = new JBitSet() + var idx = 0 + while (idx < rowSize) { + val row = rows(idx) + val isNull = isColumnNullAt(row, ordinal) + if (isNull) { + nulls.set(idx, true) + ret.add(defaultVal) + } else { + val value = Option(convertFunc) match { + case Some(f) => f(row, ordinal) + case _ => getColumnAs[T](row, ordinal) + } + ret.add(value) + } + idx += 1 + } + (ret, ByteBuffer.wrap(nulls.toByteArray)) + } + + def asBooleanTColumn(rows: Seq[RowT], ordinal: Int): TColumn = { + val (values, nulls) = getColumnToList[JBoolean](rows, ordinal, true) + TColumn.boolVal(new TBoolColumn(values, nulls)) + } + + def asByteTColumn(rows: Seq[RowT], ordinal: Int): TColumn = { + val (values, nulls) = getColumnToList[JByte](rows, ordinal, 0.toByte) + TColumn.byteVal(new TByteColumn(values, nulls)) + } + + def asShortTColumn(rows: Seq[RowT], ordinal: Int): TColumn = { + val (values, nulls) = getColumnToList[JShort](rows, ordinal, 0.toShort) + TColumn.i16Val(new TI16Column(values, nulls)) + } + + def asIntegerTColumn(rows: Seq[RowT], ordinal: Int): TColumn = { + val (values, nulls) = getColumnToList[Integer](rows, ordinal, 0) + TColumn.i32Val(new TI32Column(values, nulls)) + } + + def asLongTColumn(rows: Seq[RowT], ordinal: Int): TColumn = { + val (values, nulls) = getColumnToList[JLong](rows, ordinal, 0.toLong) + TColumn.i64Val(new TI64Column(values, nulls)) + } + + def asFloatTColumn(rows: Seq[RowT], ordinal: Int): TColumn = { + val (values, nulls) = getColumnToList[JFloat](rows, ordinal, 0.toFloat) + val doubleValues = values.asScala.map(f => JDouble.valueOf(f.toString)).asJava + TColumn.doubleVal(new TDoubleColumn(doubleValues, nulls)) + } + + def asDoubleTColumn(rows: Seq[RowT], ordinal: Int): TColumn = { + val (values, nulls) = getColumnToList[JDouble](rows, ordinal, 0.toDouble) + TColumn.doubleVal(new TDoubleColumn(values, nulls)) + } + + def asStringTColumn( + rows: Seq[RowT], + ordinal: Int, + defaultVal: String = "", + convertFunc: (RowT, Int) => String = null): TColumn = { + val (values, nulls) = getColumnToList[String](rows, ordinal, defaultVal, convertFunc) + TColumn.stringVal(new TStringColumn(values, nulls)) + } + + def asByteArrayTColumn(rows: Seq[RowT], ordinal: Int): TColumn = { + val (values, nulls) = getColumnToList[Array[Byte]](rows, ordinal, defaultVal = Array[Byte]()) + val byteBufferValues = values.asScala.map(ByteBuffer.wrap).asJava + TColumn.binaryVal(new TBinaryColumn(byteBufferValues, nulls)) + } +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/result/TColumnValueGenerator.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/result/TColumnValueGenerator.scala new file mode 100644 index 00000000000..0ff3a250df7 --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/result/TColumnValueGenerator.scala @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.result + +import java.lang.{Boolean => JBoolean, Byte => JByte, Double => JDouble, Float => JFloat, Long => JLong, Short => JShort} + +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ + +trait TColumnValueGenerator[RowT] extends TRowSetColumnGetter[RowT] { + + def asBooleanTColumnValue(row: RowT, ordinal: Int): TColumnValue = { + val tValue = new TBoolValue + if (!isColumnNullAt(row, ordinal)) { + tValue.setValue(getColumnAs[JBoolean](row, ordinal)) + } + TColumnValue.boolVal(tValue) + } + + def asByteTColumnValue(row: RowT, ordinal: Int): TColumnValue = { + val tValue = new TByteValue + if (!isColumnNullAt(row, ordinal)) { + tValue.setValue(getColumnAs[JByte](row, ordinal)) + } + TColumnValue.byteVal(tValue) + } + + def asShortTColumnValue(row: RowT, ordinal: Int): TColumnValue = { + val tValue = new TI16Value + if (!isColumnNullAt(row, ordinal)) { + tValue.setValue(getColumnAs[JShort](row, ordinal)) + } + TColumnValue.i16Val(tValue) + } + + def asIntegerTColumnValue(row: RowT, ordinal: Int): TColumnValue = { + val tValue = new TI32Value + if (!isColumnNullAt(row, ordinal)) { + tValue.setValue(getColumnAs[Integer](row, ordinal)) + } + TColumnValue.i32Val(tValue) + } + + def asLongTColumnValue(row: RowT, ordinal: Int): TColumnValue = { + val tValue = new TI64Value + if (!isColumnNullAt(row, ordinal)) { + tValue.setValue(getColumnAs[JLong](row, ordinal)) + } + TColumnValue.i64Val(tValue) + } + + def asFloatTColumnValue(row: RowT, ordinal: Int): TColumnValue = { + val tValue = new TDoubleValue + if (!isColumnNullAt(row, ordinal)) { + tValue.setValue(getColumnAs[JFloat](row, ordinal).toDouble) + } + TColumnValue.doubleVal(tValue) + } + + def asDoubleTColumnValue(row: RowT, ordinal: Int): TColumnValue = { + val tValue = new TDoubleValue + if (!isColumnNullAt(row, ordinal)) { + tValue.setValue(getColumnAs[JDouble](row, ordinal)) + } + TColumnValue.doubleVal(tValue) + } + + def asStringTColumnValue( + row: RowT, + ordinal: Int, + convertFunc: Any => String = null): TColumnValue = { + val tValue = new TStringValue + if (!isColumnNullAt(row, ordinal)) { + val str = getColumnAs[Any](row, ordinal) match { + case strObj: String => strObj + case obj if convertFunc != null => convertFunc(obj) + case anyObj => String.valueOf(anyObj) + } + tValue.setValue(str) + } + TColumnValue.stringVal(tValue) + } +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/result/TRowSetColumnGetter.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/result/TRowSetColumnGetter.scala new file mode 100644 index 00000000000..3f6b6a16ada --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/result/TRowSetColumnGetter.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.result + +trait TRowSetColumnGetter[RowT] { + protected def isColumnNullAt(row: RowT, ordinal: Int): Boolean + + protected def getColumnAs[T](row: RowT, ordinal: Int): T +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/result/TRowSetGenerator.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/result/TRowSetGenerator.scala new file mode 100644 index 00000000000..096e45ad8a9 --- /dev/null +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/result/TRowSetGenerator.scala @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.engine.result +import java.util.{ArrayList => JArrayList} + +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ + +trait TRowSetGenerator[SchemaT, RowT, ColumnT] + extends TColumnValueGenerator[RowT] with TColumnGenerator[RowT] { + + def getColumnSizeFromSchemaType(schema: SchemaT): Int + + def getColumnType(schema: SchemaT, ordinal: Int): ColumnT + + def toTColumn(rows: Seq[RowT], ordinal: Int, typ: ColumnT): TColumn + + def toTColumnValue(row: RowT, ordinal: Int, types: SchemaT): TColumnValue + + def toTRowSet(rows: Seq[RowT], schema: SchemaT, protocolVersion: TProtocolVersion): TRowSet = { + if (protocolVersion.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) { + toRowBasedSet(rows, schema) + } else { + toColumnBasedSet(rows, schema) + } + } + + def toRowBasedSet(rows: Seq[RowT], schema: SchemaT): TRowSet = { + val rowSize = rows.length + val tRows = new JArrayList[TRow](rowSize) + var i = 0 + while (i < rowSize) { + val row = rows(i) + var j = 0 + val columnSize = getColumnSizeFromSchemaType(schema) + val tColumnValues = new JArrayList[TColumnValue](columnSize) + while (j < columnSize) { + val columnValue = toTColumnValue(row, j, schema) + tColumnValues.add(columnValue) + j += 1 + } + i += 1 + val tRow = new TRow(tColumnValues) + tRows.add(tRow) + } + new TRowSet(0, tRows) + } + + def toColumnBasedSet(rows: Seq[RowT], schema: SchemaT): TRowSet = { + val rowSize = rows.length + val tRowSet = new TRowSet(0, new JArrayList[TRow](rowSize)) + var i = 0 + val columnSize = getColumnSizeFromSchemaType(schema) + val tColumns = new JArrayList[TColumn](columnSize) + while (i < columnSize) { + val tColumn = toTColumn(rows, i, getColumnType(schema, i)) + tColumns.add(tColumn) + i += 1 + } + tRowSet.setColumns(tColumns) + tRowSet + } +} diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/AbstractOperation.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/AbstractOperation.scala index 0a185b94266..05dd7fda907 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/AbstractOperation.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/AbstractOperation.scala @@ -17,13 +17,13 @@ package org.apache.kyuubi.operation +import java.io.IOException import java.util.concurrent.{Future, ScheduledExecutorService, TimeUnit} import java.util.concurrent.locks.ReentrantLock import scala.collection.JavaConverters._ import org.apache.commons.lang3.StringUtils -import org.apache.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp, TProgressUpdateResp, TProtocolVersion, TStatus, TStatusCode} import org.apache.kyuubi.{KyuubiSQLException, Logging, Utils} import org.apache.kyuubi.config.KyuubiConf.OPERATION_IDLE_TIMEOUT @@ -31,6 +31,7 @@ import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.operation.OperationState._ import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp, TProgressUpdateResp, TProtocolVersion, TStatus, TStatusCode} import org.apache.kyuubi.util.ThreadUtils abstract class AbstractOperation(session: Session) extends Operation with Logging { @@ -104,6 +105,7 @@ abstract class AbstractOperation(session: Session) extends Operation with Loggin this.operationException = opEx } + def getOperationJobProgress: TProgressUpdateResp = operationJobProgress def setOperationJobProgress(opJobProgress: TProgressUpdateResp): Unit = { this.operationJobProgress = opJobProgress } @@ -247,4 +249,19 @@ abstract class AbstractOperation(session: Session) extends Operation with Loggin ok.setInfoMessages(hints.asJava) ok } + + /** + * Close the OperationLog, after running the block + */ + def withClosingOperationLog[T](f: => T): T = { + try { + f + } finally { + try { + getOperationLog.foreach(_.close()) + } catch { + case e: IOException => error(e.getMessage, e) + } + } + } } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/FetchOrientation.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/FetchOrientation.scala index b5136e91d20..71e9397072d 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/FetchOrientation.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/FetchOrientation.scala @@ -17,7 +17,7 @@ package org.apache.kyuubi.operation -import org.apache.hive.service.rpc.thrift.TFetchOrientation +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TFetchOrientation object FetchOrientation extends Enumeration { type FetchOrientation = Value diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/Operation.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/Operation.scala index c20a16f61d0..e216385180f 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/Operation.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/Operation.scala @@ -19,11 +19,10 @@ package org.apache.kyuubi.operation import java.util.concurrent.Future -import org.apache.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp} - import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp} trait Operation { diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationHandle.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationHandle.scala index 419bdc9c471..9a93c549077 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationHandle.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationHandle.scala @@ -19,9 +19,8 @@ package org.apache.kyuubi.operation import java.util.UUID -import org.apache.hive.service.rpc.thrift.TOperationHandle - import org.apache.kyuubi.cli.Handle +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TOperationHandle case class OperationHandle(identifier: UUID) { diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationManager.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationManager.scala index 38dabcc1a89..0b19e68ffb9 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationManager.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationManager.scala @@ -19,8 +19,6 @@ package org.apache.kyuubi.operation import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift._ - import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiReservedKeys._ @@ -29,6 +27,7 @@ import org.apache.kyuubi.operation.OperationState._ import org.apache.kyuubi.operation.log.LogDivertAppender import org.apache.kyuubi.service.AbstractService import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ /** * The [[OperationManager]] manages all the operations during their lifecycle. diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationState.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationState.scala index 67a517a23ec..7d00a8cf5f3 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationState.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationState.scala @@ -19,10 +19,9 @@ package org.apache.kyuubi.operation import scala.language.implicitConversions -import org.apache.hive.service.rpc.thrift.TOperationState -import org.apache.hive.service.rpc.thrift.TOperationState._ - import org.apache.kyuubi.KyuubiSQLException +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TOperationState +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TOperationState._ object OperationState extends Enumeration { diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationStatus.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationStatus.scala index 9b139c9dc43..4ea82c32088 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationStatus.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/OperationStatus.scala @@ -17,10 +17,9 @@ package org.apache.kyuubi.operation -import org.apache.hive.service.rpc.thrift.TProgressUpdateResp - import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.operation.OperationState.OperationState +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProgressUpdateResp case class OperationStatus( state: OperationState, diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala index 2e133df28b8..b3bd46d35a4 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala @@ -26,12 +26,11 @@ import java.util.{ArrayList => JArrayList, List => JList} import scala.collection.JavaConverters._ import scala.collection.mutable.ListBuffer -import org.apache.hive.service.rpc.thrift.{TColumn, TRow, TRowSet, TStringColumn} - import org.apache.kyuubi.{KyuubiSQLException, Logging} import org.apache.kyuubi.operation.FetchOrientation.{FETCH_FIRST, FETCH_NEXT, FetchOrientation} import org.apache.kyuubi.operation.OperationHandle import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TColumn, TRow, TRowSet, TStringColumn} import org.apache.kyuubi.util.ThriftUtils object OperationLog extends Logging { @@ -233,8 +232,6 @@ class OperationLog(path: Path) { } def close(): Unit = synchronized { - if (!initialized) return - closeExtraReaders() trySafely { diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/AbstractBackendService.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/AbstractBackendService.scala index 443b353546e..0ecb6e38ffd 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/AbstractBackendService.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/AbstractBackendService.scala @@ -21,12 +21,11 @@ import java.util.concurrent.{ExecutionException, TimeoutException, TimeUnit} import scala.concurrent.CancellationException -import org.apache.hive.service.rpc.thrift._ - import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.operation.{OperationHandle, OperationStatus} import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.session.SessionHandle +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ /** * A Shorthand for implementing [[BackendService]]s diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/BackendService.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/BackendService.scala index 85df9024cc4..0f2691a01e0 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/BackendService.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/BackendService.scala @@ -17,11 +17,10 @@ package org.apache.kyuubi.service -import org.apache.hive.service.rpc.thrift._ - import org.apache.kyuubi.operation.{OperationHandle, OperationStatus} import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.session.{SessionHandle, SessionManager} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ /** * A [[BackendService]] in Kyuubi architecture is responsible for talking to the SQL engine diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TBinaryFrontendService.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TBinaryFrontendService.scala index 2f441937476..19e2e31eafe 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TBinaryFrontendService.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TBinaryFrontendService.scala @@ -23,13 +23,12 @@ import java.util.Locale import java.util.concurrent.{SynchronousQueue, ThreadPoolExecutor, TimeUnit} import javax.net.ssl.{KeyManagerFactory, SSLServerSocket} -import org.apache.hive.service.rpc.thrift._ -import org.apache.thrift.protocol.TBinaryProtocol -import org.apache.thrift.server.{TServer, TThreadPoolServer} -import org.apache.thrift.transport.{TServerSocket, TSSLTransportFactory} - import org.apache.kyuubi.{KyuubiException, Logging} import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ +import org.apache.kyuubi.shaded.thrift.protocol.TBinaryProtocol +import org.apache.kyuubi.shaded.thrift.server.{TServer, TThreadPoolServer} +import org.apache.kyuubi.shaded.thrift.transport.{TServerSocket, TSSLTransportFactory} import org.apache.kyuubi.util.NamedThreadFactory /** diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TFrontendService.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TFrontendService.scala index 7cc23779fee..a742993c5ad 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TFrontendService.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/TFrontendService.scala @@ -24,18 +24,18 @@ import scala.collection.JavaConverters._ import scala.language.implicitConversions import org.apache.hadoop.conf.Configuration -import org.apache.hive.service.rpc.thrift._ -import org.apache.thrift.protocol.TProtocol -import org.apache.thrift.server.{ServerContext, TServerEventHandler} -import org.apache.thrift.transport.TTransport import org.apache.kyuubi.{KyuubiSQLException, Logging, Utils} import org.apache.kyuubi.Utils.stringifyException -import org.apache.kyuubi.config.KyuubiConf.{FRONTEND_ADVERTISED_HOST, FRONTEND_CONNECTION_URL_USE_HOSTNAME, SESSION_CLOSE_ON_DISCONNECT} +import org.apache.kyuubi.config.KyuubiConf.{FRONTEND_ADVERTISED_HOST, FRONTEND_CONNECTION_URL_USE_HOSTNAME, PROXY_USER, SESSION_CLOSE_ON_DISCONNECT} import org.apache.kyuubi.config.KyuubiReservedKeys._ import org.apache.kyuubi.operation.{FetchOrientation, OperationHandle} import org.apache.kyuubi.service.authentication.KyuubiAuthenticationFactory import org.apache.kyuubi.session.SessionHandle +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ +import org.apache.kyuubi.shaded.thrift.protocol.TProtocol +import org.apache.kyuubi.shaded.thrift.server.{ServerContext, TServerEventHandler} +import org.apache.kyuubi.shaded.thrift.transport.TTransport import org.apache.kyuubi.util.{KyuubiHadoopUtils, NamedThreadFactory} /** @@ -127,7 +127,8 @@ abstract class TFrontendService(name: String) sessionConf: java.util.Map[String, String], ipAddress: String, realUser: String): String = { - val proxyUser = sessionConf.get(KyuubiAuthenticationFactory.HS2_PROXY_USER) + val proxyUser = Option(sessionConf.get(PROXY_USER.key)) + .getOrElse(sessionConf.get(KyuubiAuthenticationFactory.HS2_PROXY_USER)) if (proxyUser == null) { realUser } else { diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/FEServiceProcessorFactory.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/FEServiceProcessorFactory.scala index 79180314521..ea6156c362c 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/FEServiceProcessorFactory.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/FEServiceProcessorFactory.scala @@ -17,9 +17,9 @@ package org.apache.kyuubi.service.authentication -import org.apache.hive.service.rpc.thrift.TCLIService.{Iface, Processor} -import org.apache.thrift.{TProcessor, TProcessorFactory} -import org.apache.thrift.transport.TTransport +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TCLIService.{Iface, Processor} +import org.apache.kyuubi.shaded.thrift.{TProcessor, TProcessorFactory} +import org.apache.kyuubi.shaded.thrift.transport.TTransport private[authentication] case class FEServiceProcessorFactory( saslServer: HadoopThriftAuthBridgeServer, diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/HadoopThriftAuthBridgeServer.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/HadoopThriftAuthBridgeServer.scala index 5f5c7000823..6c1dfa5daee 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/HadoopThriftAuthBridgeServer.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/HadoopThriftAuthBridgeServer.scala @@ -28,11 +28,11 @@ import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.security.{SaslRpcServer, UserGroupInformation} import org.apache.hadoop.security.SaslRpcServer.AuthMethod import org.apache.hadoop.security.token.SecretManager.InvalidToken -import org.apache.thrift.{TException, TProcessor} -import org.apache.thrift.protocol.TProtocol -import org.apache.thrift.transport._ import org.apache.kyuubi.Logging +import org.apache.kyuubi.shaded.thrift.{TException, TProcessor} +import org.apache.kyuubi.shaded.thrift.protocol.TProtocol +import org.apache.kyuubi.shaded.thrift.transport._ class HadoopThriftAuthBridgeServer(secretMgr: KyuubiDelegationTokenManager) { import HadoopThriftAuthBridgeServer._ diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/KyuubiAuthenticationFactory.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/KyuubiAuthenticationFactory.scala index 1b62f6030e7..736f8e1e15e 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/KyuubiAuthenticationFactory.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/KyuubiAuthenticationFactory.scala @@ -25,22 +25,21 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.security.UserGroupInformation import org.apache.hadoop.security.authentication.util.KerberosName import org.apache.hadoop.security.authorize.ProxyUsers -import org.apache.hive.service.rpc.thrift.TCLIService.Iface -import org.apache.thrift.TProcessorFactory -import org.apache.thrift.transport.{TSaslServerTransport, TTransportException, TTransportFactory} import org.apache.kyuubi.{KyuubiSQLException, Logging} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.service.authentication.AuthMethods.AuthMethod import org.apache.kyuubi.service.authentication.AuthTypes._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TCLIService.Iface +import org.apache.kyuubi.shaded.thrift.TProcessorFactory +import org.apache.kyuubi.shaded.thrift.transport.{TSaslServerTransport, TTransportException, TTransportFactory} class KyuubiAuthenticationFactory(conf: KyuubiConf, isServer: Boolean = true) extends Logging { - private val authTypes = conf.get(AUTHENTICATION_METHOD).map(AuthTypes.withName) - private val none = authTypes.contains(NONE) - private val noSasl = authTypes == Set(NOSASL) - private val kerberosEnabled = authTypes.contains(KERBEROS) + val authTypes: Set[AuthType] = conf.get(AUTHENTICATION_METHOD).map(AuthTypes.withName) + val noSaslEnabled: Boolean = authTypes == Set(NOSASL) + val kerberosEnabled: Boolean = authTypes.contains(KERBEROS) private val plainAuthTypeOpt = authTypes.filterNot(_.equals(KERBEROS)) .filterNot(_.equals(NOSASL)).headOption @@ -71,7 +70,7 @@ class KyuubiAuthenticationFactory(conf: KyuubiConf, isServer: Boolean = true) ex } def getTTransportFactory: TTransportFactory = { - if (noSasl) { + if (noSaslEnabled) { new TTransportFactory() } else { var transportFactory: TSaslServerTransport.Factory = null @@ -119,33 +118,8 @@ class KyuubiAuthenticationFactory(conf: KyuubiConf, isServer: Boolean = true) ex hadoopAuthServer.map(_.getRemoteAddress).map(_.getHostAddress) .orElse(Option(TSetIpAddressProcessor.getUserIpAddress)) } - - def isNoSaslEnabled: Boolean = { - noSasl - } - - def isKerberosEnabled: Boolean = { - kerberosEnabled - } - - def isPlainAuthEnabled: Boolean = { - plainAuthTypeOpt.isDefined - } - - def isNoneEnabled: Boolean = { - none - } - - def getValidPasswordAuthMethod: AuthMethod = { - debug(authTypes) - if (none) AuthMethods.NONE - else if (authTypes.contains(LDAP)) AuthMethods.LDAP - else if (authTypes.contains(JDBC)) AuthMethods.JDBC - else if (authTypes.contains(CUSTOM)) AuthMethods.CUSTOM - else throw new IllegalArgumentException("No valid Password Auth detected") - } } -object KyuubiAuthenticationFactory { +object KyuubiAuthenticationFactory extends Logging { val HS2_PROXY_USER = "hive.server2.proxy.user" @throws[KyuubiSQLException] @@ -177,4 +151,13 @@ object KyuubiAuthenticationFactory { e) } } + + def getValidPasswordAuthMethod(authTypes: Set[AuthType]): AuthMethod = { + if (authTypes == Set(NOSASL)) AuthMethods.NONE + else if (authTypes.contains(NONE)) AuthMethods.NONE + else if (authTypes.contains(LDAP)) AuthMethods.LDAP + else if (authTypes.contains(JDBC)) AuthMethods.JDBC + else if (authTypes.contains(CUSTOM)) AuthMethods.CUSTOM + else throw new IllegalArgumentException("No valid Password Auth detected") + } } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/PlainSASLHelper.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/PlainSASLHelper.scala index 3959341ed5f..2d880a344e6 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/PlainSASLHelper.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/PlainSASLHelper.scala @@ -22,13 +22,12 @@ import java.util.Collections import javax.security.auth.callback.{Callback, CallbackHandler, NameCallback, PasswordCallback, UnsupportedCallbackException} import javax.security.sasl.AuthorizeCallback -import org.apache.hive.service.rpc.thrift.TCLIService.Iface -import org.apache.thrift.{TProcessor, TProcessorFactory} -import org.apache.thrift.transport.{TSaslClientTransport, TSaslServerTransport, TTransport, TTransportFactory} - import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.service.authentication.AuthMethods.AuthMethod import org.apache.kyuubi.service.authentication.PlainSASLServer.SaslPlainProvider +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TCLIService.Iface +import org.apache.kyuubi.shaded.thrift.{TProcessor, TProcessorFactory} +import org.apache.kyuubi.shaded.thrift.transport.{TSaslClientTransport, TSaslServerTransport, TTransport, TTransportFactory} object PlainSASLHelper { diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/TSetIpAddressProcessor.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/TSetIpAddressProcessor.scala index ebf82f26f44..6a890593642 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/TSetIpAddressProcessor.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/service/authentication/TSetIpAddressProcessor.scala @@ -17,12 +17,11 @@ package org.apache.kyuubi.service.authentication -import org.apache.hive.service.rpc.thrift.TCLIService.{Iface, Processor} -import org.apache.thrift.TException -import org.apache.thrift.protocol.TProtocol -import org.apache.thrift.transport.{TSaslClientTransport, TSaslServerTransport, TSocket, TTransport} - import org.apache.kyuubi.Logging +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TCLIService.{Iface, Processor} +import org.apache.kyuubi.shaded.thrift.TException +import org.apache.kyuubi.shaded.thrift.protocol.TProtocol +import org.apache.kyuubi.shaded.thrift.transport.{TSaslClientTransport, TSaslServerTransport, TSocket, TTransport} class TSetIpAddressProcessor[I <: Iface]( iface: Iface) extends Processor[Iface](iface) with Logging { diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/session/AbstractSession.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/session/AbstractSession.scala index a9e33f5a060..a00a12c1fb8 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/session/AbstractSession.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/session/AbstractSession.scala @@ -19,14 +19,13 @@ package org.apache.kyuubi.session import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift._ - import org.apache.kyuubi.{KyuubiSQLException, Logging} import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_CLIENT_IP_KEY import org.apache.kyuubi.operation.{Operation, OperationHandle} import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.operation.log.OperationLog +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ abstract class AbstractSession( val protocol: TProtocolVersion, diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/session/Session.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/session/Session.scala index 2cdac9f3a78..c618c048093 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/session/Session.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/session/Session.scala @@ -17,10 +17,9 @@ package org.apache.kyuubi.session -import org.apache.hive.service.rpc.thrift.{TFetchResultsResp, TGetInfoType, TGetInfoValue, TGetResultSetMetadataResp, TProtocolVersion} - import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.operation.OperationHandle +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TFetchResultsResp, TGetInfoType, TGetInfoValue, TGetResultSetMetadataResp, TProtocolVersion} trait Session { diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/session/SessionHandle.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/session/SessionHandle.scala index d66999defe9..53b976884c3 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/session/SessionHandle.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/session/SessionHandle.scala @@ -19,9 +19,8 @@ package org.apache.kyuubi.session import java.util.UUID -import org.apache.hive.service.rpc.thrift.TSessionHandle - import org.apache.kyuubi.cli.Handle +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TSessionHandle case class SessionHandle(identifier: UUID) { diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/session/SessionManager.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/session/SessionManager.scala index a83335102a8..5c71118f143 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/session/SessionManager.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/session/SessionManager.scala @@ -25,14 +25,14 @@ import scala.collection.JavaConverters._ import scala.concurrent.duration.Duration import scala.util.control.NonFatal -import org.apache.hive.service.rpc.thrift.TProtocolVersion - import org.apache.kyuubi.{KyuubiSQLException, Utils} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.operation.OperationManager import org.apache.kyuubi.service.CompositeService +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.util.ThreadUtils +import org.apache.kyuubi.util.ThreadUtils.scheduleTolerableRunnableWithFixedDelay /** * The [[SessionManager]] holds the all the connected [[Session]]s, provides us the APIs to @@ -90,7 +90,7 @@ abstract class SessionManager(name: String) extends CompositeService(name) { conf: Map[String, String]): Session protected def logSessionCountInfo(session: Session, action: String): Unit = { - info(s"${session.user}'s session with" + + info(s"${session.user}'s ${session.getClass.getSimpleName} with" + s" ${session.handle}${session.name.map("/" + _).getOrElse("")} is $action," + s" current opening sessions $getOpenSessionCount") } @@ -303,27 +303,33 @@ abstract class SessionManager(name: String) extends CompositeService(name) { val checkTask = new Runnable { override def run(): Unit = { + info(s"Checking sessions timeout, current count: $getOpenSessionCount") val current = System.currentTimeMillis if (!shutdown) { for (session <- handleToSession.values().asScala) { - if (session.lastAccessTime + session.sessionIdleTimeoutThreshold <= current && - session.getNoOperationTime > session.sessionIdleTimeoutThreshold) { - info(s"Closing session ${session.handle.identifier} that has been idle for more" + - s" than ${session.sessionIdleTimeoutThreshold} ms") - try { + try { + if (session.lastAccessTime + session.sessionIdleTimeoutThreshold <= current && + session.getNoOperationTime > session.sessionIdleTimeoutThreshold) { + info(s"Closing session ${session.handle.identifier} that has been idle for more" + + s" than ${session.sessionIdleTimeoutThreshold} ms") closeSession(session.handle) - } catch { - case NonFatal(e) => warn(s"Error closing idle session ${session.handle}", e) + } else { + session.closeExpiredOperations() } - } else { - session.closeExpiredOperations() + } catch { + case NonFatal(e) => warn(s"Error checking session ${session.handle} timeout", e) } } } } } - timeoutChecker.scheduleWithFixedDelay(checkTask, interval, interval, TimeUnit.MILLISECONDS) + scheduleTolerableRunnableWithFixedDelay( + timeoutChecker, + checkTask, + interval, + interval, + TimeUnit.MILLISECONDS) } private[kyuubi] def startTerminatingChecker(stop: () => Unit): Unit = if (!isServer) { @@ -341,7 +347,12 @@ abstract class SessionManager(name: String) extends CompositeService(name) { } } } - timeoutChecker.scheduleWithFixedDelay(checkTask, interval, interval, TimeUnit.MILLISECONDS) + scheduleTolerableRunnableWithFixedDelay( + timeoutChecker, + checkTask, + interval, + interval, + TimeUnit.MILLISECONDS) } } } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala index 996589cb742..4951004b671 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala @@ -98,6 +98,12 @@ object JdbcUtils extends Logging { } } + def mapResultSet[R](rs: ResultSet)(rowMapper: ResultSet => R): Seq[R] = { + val builder = Seq.newBuilder[R] + while (rs.next()) builder += rowMapper(rs) + builder.result + } + def redactPassword(password: Option[String]): String = { password match { case Some(s) if StringUtils.isNotBlank(s) => s"${"*" * s.length}(length:${s.length})" diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/RowSetUtils.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/RowSetUtils.scala index f320fd90293..c79c2032740 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/RowSetUtils.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/RowSetUtils.scala @@ -17,15 +17,12 @@ package org.apache.kyuubi.util -import java.nio.ByteBuffer import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, ZoneId} import java.time.chrono.IsoChronology import java.time.format.DateTimeFormatterBuilder import java.time.temporal.ChronoField import java.util.{Date, Locale} -import scala.language.implicitConversions - import org.apache.commons.lang3.time.FastDateFormat private[kyuubi] object RowSetUtils { @@ -77,8 +74,4 @@ private[kyuubi] object RowSetUtils { timeZone.map(timestampFormatter.withZone(_).format(i)) .getOrElse(timestampFormatter.format(i)) } - - implicit def bitSetToBuffer(bitSet: java.util.BitSet): ByteBuffer = { - ByteBuffer.wrap(bitSet.toByteArray) - } } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/ThreadUtils.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/ThreadUtils.scala index 76d3f416f84..aeab37b6f1e 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/ThreadUtils.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/ThreadUtils.scala @@ -17,7 +17,7 @@ package org.apache.kyuubi.util -import java.util.concurrent.{Executors, ExecutorService, LinkedBlockingQueue, ScheduledExecutorService, ScheduledThreadPoolExecutor, ThreadPoolExecutor, TimeUnit} +import java.util.concurrent._ import scala.concurrent.Awaitable import scala.concurrent.duration.{Duration, FiniteDuration} @@ -109,4 +109,27 @@ object ThreadUtils extends Logging { thread.setUncaughtExceptionHandler(NamedThreadFactory.kyuubiUncaughtExceptionHandler) thread.start() } + + /** + * Schedule a runnable to the scheduled executor service. + * The exceptions thrown in the runnable will be caught and logged. + */ + def scheduleTolerableRunnableWithFixedDelay( + scheduler: ScheduledExecutorService, + runnable: Runnable, + initialDelay: Long, + delay: Long, + timeUnit: TimeUnit): Unit = { + scheduler.scheduleWithFixedDelay( + () => + try { + runnable.run() + } catch { + case t: Throwable => + error(s"Uncaught exception in thread ${Thread.currentThread().getName}", t) + }, + initialDelay, + delay, + timeUnit) + } } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/ThriftUtils.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/ThriftUtils.scala index bfe0bd64bc2..dbfbbc37d07 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/ThriftUtils.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/ThriftUtils.scala @@ -17,9 +17,8 @@ package org.apache.kyuubi.util -import org.apache.hive.service.rpc.thrift.{TRow, TRowSet, TStatus, TStatusCode} - import org.apache.kyuubi.KyuubiSQLException +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TRow, TRowSet, TStatus, TStatusCode} object ThriftUtils { diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/GlutenSuiteMixin.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/GlutenSuiteMixin.scala new file mode 100644 index 00000000000..6095e163017 --- /dev/null +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/GlutenSuiteMixin.scala @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi + +trait GlutenSuiteMixin { + protected def extraJars: String = { + System.getProperty("java.class.path") + .split(":") + .filter(_.contains("gluten-velox-bundle-spark")).head + } + + protected def extraConfigs: Map[String, String] = Map( + "spark.plugins" -> "io.glutenproject.GlutenPlugin", + "spark.memory.offHeap.size" -> "4g", + "spark.memory.offHeap.enabled" -> "true", + "spark.shuffle.manager" -> "org.apache.spark.shuffle.sort.ColumnarShuffleManager", + "spark.jars" -> extraJars) +} diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/HiveEngineTests.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/HiveEngineTests.scala index 028f755f6c8..29df9072423 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/HiveEngineTests.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/HiveEngineTests.scala @@ -23,6 +23,7 @@ import org.apache.commons.lang3.{JavaVersion, SystemUtils} import org.apache.kyuubi.operation.HiveJDBCTestHelper import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._ +import org.apache.kyuubi.util.JdbcUtils /** * hive tests disabled for JAVA 11 @@ -229,14 +230,12 @@ trait HiveEngineTests extends HiveJDBCTestHelper { assume(SystemUtils.isJavaVersionAtMost(JavaVersion.JAVA_1_8)) withJdbcStatement() { statement => val resultSet = statement.getConnection.getMetaData.getTableTypes - val expected = Set("TABLE", "VIEW", "MATERIALIZED_VIEW") - var tableTypes = Set[String]() - while (resultSet.next()) { - assert(expected.contains(resultSet.getString(TABLE_TYPE))) - tableTypes += resultSet.getString(TABLE_TYPE) - } - assert(!resultSet.next()) - assert(expected.size === tableTypes.size) + val hive2_1Expected = Set("TABLE", "VIEW", "INDEX_TABLE") + val hive2_3Expected = Set("TABLE", "VIEW", "MATERIALIZED_VIEW", "INDEX_TABLE") + val hive3Expected = Set("TABLE", "VIEW", "MATERIALIZED_VIEW") + val tableTypes = JdbcUtils.mapResultSet(resultSet) { rs => rs.getString(TABLE_TYPE) }.toSet + assert(tableTypes === hive2_1Expected || tableTypes === hive2_3Expected || + tableTypes === hive3Expected) } } @@ -387,10 +386,12 @@ trait HiveEngineTests extends HiveJDBCTestHelper { assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.TIMESTAMP) typeInfo.next() - assert(typeInfo.getString(TYPE_NAME) === "TIMESTAMP WITH LOCAL TIME ZONE") - assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.OTHER) + // Hive3 supports TIMESTAMP WITH LOCAL TIME ZONE + if (typeInfo.getString(TYPE_NAME) == "TIMESTAMP WITH LOCAL TIME ZONE") { + assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.OTHER) + typeInfo.next() + } - typeInfo.next() assert(typeInfo.getString(TYPE_NAME) === "INTERVAL_YEAR_MONTH") assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.OTHER) diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/KyuubiSQLExceptionSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/KyuubiSQLExceptionSuite.scala index 4a099c71adf..0b1d65cc0ac 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/KyuubiSQLExceptionSuite.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/KyuubiSQLExceptionSuite.scala @@ -19,7 +19,7 @@ package org.apache.kyuubi import java.lang.reflect.{InvocationTargetException, UndeclaredThrowableException} -import org.apache.hive.service.rpc.thrift.TStatusCode +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TStatusCode class KyuubiSQLExceptionSuite extends KyuubiFunSuite { diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/MarkdownUtils.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/MarkdownUtils.scala index 4dbe6ea6711..71dc05f6176 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/MarkdownUtils.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/MarkdownUtils.scala @@ -25,6 +25,8 @@ import com.vladsch.flexmark.profile.pegdown.PegdownOptionsAdapter import com.vladsch.flexmark.util.data.{MutableDataHolder, MutableDataSet} import com.vladsch.flexmark.util.sequence.SequenceUtils.EOL +import org.apache.kyuubi.util.GoldenFileUtils.getLicenceContent + class MarkdownBuilder { private val buffer = new ListBuffer[String] @@ -58,24 +60,8 @@ class MarkdownBuilder { * @return */ def licence(): MarkdownBuilder = { - this ++= """ - | - |""" + buffer.appendAll(getLicenceContent(header = "")) + this } /** diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/UtilsSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/UtilsSuite.scala index 5973fc6e7a6..60bdd3d22a6 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/UtilsSuite.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/UtilsSuite.scala @@ -23,12 +23,13 @@ import java.nio.file.{Files, Paths} import java.security.PrivilegedExceptionAction import java.util.Properties -import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable import org.apache.hadoop.security.UserGroupInformation import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.SERVER_SECRET_REDACTION_PATTERN +import org.apache.kyuubi.util.command.CommandLineUtils._ class UtilsSuite extends KyuubiFunSuite { @@ -156,44 +157,40 @@ class UtilsSuite extends KyuubiFunSuite { val conf = new KyuubiConf() conf.set(SERVER_SECRET_REDACTION_PATTERN, "(?i)secret|password".r) - val buffer = new ArrayBuffer[String]() + val buffer = new mutable.ListBuffer[String]() buffer += "main" - buffer += "--conf" - buffer += "kyuubi.my.password=sensitive_value" - buffer += "--conf" - buffer += "kyuubi.regular.property1=regular_value" - buffer += "--conf" - buffer += "kyuubi.my.secret=sensitive_value" - buffer += "--conf" - buffer += "kyuubi.regular.property2=regular_value" + buffer ++= confKeyValue("kyuubi.my.password", "sensitive_value") + buffer ++= confKeyValue("kyuubi.regular.property1", "regular_value") + buffer ++= confKeyValue("kyuubi.my.secret", "sensitive_value") + buffer ++= confKeyValue("kyuubi.regular.property2", "regular_value") - val commands = buffer.toArray + val commands = buffer // Redact sensitive information val redactedCmdArgs = Utils.redactCommandLineArgs(conf, commands) - val expectBuffer = new ArrayBuffer[String]() + val expectBuffer = new mutable.ListBuffer[String]() expectBuffer += "main" expectBuffer += "--conf" - expectBuffer += "kyuubi.my.password=" + Utils.REDACTION_REPLACEMENT_TEXT + expectBuffer += "kyuubi.my.password=" + REDACTION_REPLACEMENT_TEXT expectBuffer += "--conf" expectBuffer += "kyuubi.regular.property1=regular_value" expectBuffer += "--conf" - expectBuffer += "kyuubi.my.secret=" + Utils.REDACTION_REPLACEMENT_TEXT + expectBuffer += "kyuubi.my.secret=" + REDACTION_REPLACEMENT_TEXT expectBuffer += "--conf" expectBuffer += "kyuubi.regular.property2=regular_value" - assert(expectBuffer.toArray === redactedCmdArgs) + assert(expectBuffer === redactedCmdArgs) } test("redact sensitive information") { val secretKeys = Some("my.password".r) assert(Utils.redact(secretKeys, Seq(("kyuubi.my.password", "12345"))) === - Seq(("kyuubi.my.password", Utils.REDACTION_REPLACEMENT_TEXT))) + Seq(("kyuubi.my.password", REDACTION_REPLACEMENT_TEXT))) assert(Utils.redact(secretKeys, Seq(("anything", "kyuubi.my.password=12345"))) === - Seq(("anything", Utils.REDACTION_REPLACEMENT_TEXT))) + Seq(("anything", REDACTION_REPLACEMENT_TEXT))) assert(Utils.redact(secretKeys, Seq((999, "kyuubi.my.password=12345"))) === - Seq((999, Utils.REDACTION_REPLACEMENT_TEXT))) + Seq((999, REDACTION_REPLACEMENT_TEXT))) // Do not redact when value type is not string assert(Utils.redact(secretKeys, Seq(("my.password", 12345))) === Seq(("my.password", 12345))) diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/FetchOrientationSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/FetchOrientationSuite.scala index cfcd0b5c855..cfba4516cbb 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/FetchOrientationSuite.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/FetchOrientationSuite.scala @@ -17,9 +17,8 @@ package org.apache.kyuubi.operation -import org.apache.hive.service.rpc.thrift.TFetchOrientation - import org.apache.kyuubi.KyuubiFunSuite +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TFetchOrientation class FetchOrientationSuite extends KyuubiFunSuite { diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/HiveJDBCTestHelper.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/HiveJDBCTestHelper.scala index cbca415dc27..02cb9a00307 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/HiveJDBCTestHelper.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/HiveJDBCTestHelper.scala @@ -19,12 +19,12 @@ package org.apache.kyuubi.operation import java.sql.ResultSet -import org.apache.hive.service.rpc.thrift._ -import org.apache.hive.service.rpc.thrift.TCLIService.Iface -import org.apache.hive.service.rpc.thrift.TOperationState._ import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.Utils +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TCLIService.Iface +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TOperationState._ trait HiveJDBCTestHelper extends JDBCTestHelper { diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/NoopOperation.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/NoopOperation.scala index c369e00efd8..df34577e01c 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/NoopOperation.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/NoopOperation.scala @@ -21,12 +21,11 @@ import java.nio.ByteBuffer import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift.{TColumn, TColumnDesc, TFetchResultsResp, TGetResultSetMetadataResp, TPrimitiveTypeEntry, TStringColumn, TTableSchema, TTypeDesc, TTypeEntry, TTypeId} - import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TColumn, TColumnDesc, TFetchResultsResp, TGetResultSetMetadataResp, TPrimitiveTypeEntry, TStringColumn, TTableSchema, TTypeDesc, TTypeEntry, TTypeId} import org.apache.kyuubi.util.ThriftUtils class NoopOperation(session: Session, shouldFail: Boolean = false) diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/NoopOperationManager.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/NoopOperationManager.scala index 352aae905ed..08fa9dd7cf1 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/NoopOperationManager.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/NoopOperationManager.scala @@ -20,10 +20,9 @@ package org.apache.kyuubi.operation import java.nio.ByteBuffer import java.util -import org.apache.hive.service.rpc.thrift.{TColumn, TFetchResultsResp, TRow, TRowSet, TStatus, TStatusCode, TStringColumn} - import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TColumn, TFetchResultsResp, TRow, TRowSet, TStatus, TStatusCode, TStringColumn} class NoopOperationManager extends OperationManager("noop") { private val invalid = "invalid" diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/OperationStateSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/OperationStateSuite.scala index 86c7e5e80a1..3052f7dcdee 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/OperationStateSuite.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/OperationStateSuite.scala @@ -17,13 +17,12 @@ package org.apache.kyuubi.operation -import org.apache.hive.service.rpc.thrift.{TOperationState, TProtocolVersion} -import org.apache.hive.service.rpc.thrift.TOperationState._ - import org.apache.kyuubi.{KyuubiFunSuite, KyuubiSQLException} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.operation.OperationState._ import org.apache.kyuubi.session.NoopSessionManager +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TOperationState, TProtocolVersion} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TOperationState._ class OperationStateSuite extends KyuubiFunSuite { test("toTOperationState") { diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkDataTypeTests.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkDataTypeTests.scala index 2709bc861f5..49f6b85d89f 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkDataTypeTests.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkDataTypeTests.scala @@ -18,6 +18,7 @@ package org.apache.kyuubi.operation import java.sql.{Date, Timestamp} +import java.util.Calendar import org.apache.kyuubi.util.SparkVersionUtil @@ -160,6 +161,23 @@ trait SparkDataTypeTests extends HiveJDBCTestHelper with SparkVersionUtil { } } + test("execute statement - select date with calendar") { + withJdbcStatement() { statement => + val resultSet = statement.executeQuery("SELECT DATE '2018-11-17' AS col") + assert(resultSet.next()) + assert(resultSet.getDate( + "col", + Calendar.getInstance()) === Date.valueOf("2018-11-17")) + assert(resultSet.getDate( + 1, + Calendar.getInstance()) === Date.valueOf("2018-11-17")) + val metaData = resultSet.getMetaData + assert(metaData.getColumnType(1) === java.sql.Types.DATE) + assert(metaData.getPrecision(1) === 10) + assert(metaData.getScale(1) === 0) + } + } + test("execute statement - select timestamp - second") { withJdbcStatement() { statement => val resultSet = statement.executeQuery( @@ -213,6 +231,26 @@ trait SparkDataTypeTests extends HiveJDBCTestHelper with SparkVersionUtil { } } + test("execute statement - select timestamp - second with calendar") { + withJdbcStatement() { statement => + val resultSet = statement.executeQuery( + "SELECT TIMESTAMP '2018-11-17 13:33:33' AS col") + assert(resultSet.next()) + assert(resultSet.getTimestamp( + "col", + Calendar.getInstance()) === Timestamp.valueOf( + "2018-11-17 13:33:33")) + assert(resultSet.getTimestamp( + 1, + Calendar.getInstance()) === Timestamp.valueOf( + "2018-11-17 13:33:33")) + val metaData = resultSet.getMetaData + assert(metaData.getColumnType(1) === java.sql.Types.TIMESTAMP) + assert(metaData.getPrecision(1) === 29) + assert(metaData.getScale(1) === 9) + } + } + test("execute statement - select daytime interval") { assume( resultFormat == "thrift" || diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkQueryTests.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkQueryTests.scala index 0ac56e3bcf0..b46b30402e8 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkQueryTests.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/SparkQueryTests.scala @@ -24,10 +24,10 @@ import scala.collection.JavaConverters._ import org.apache.commons.io.FileUtils import org.apache.commons.lang3.StringUtils -import org.apache.hive.service.rpc.thrift.{TExecuteStatementReq, TFetchResultsReq, TGetResultSetMetadataReq, TOpenSessionReq, TStatusCode} import org.apache.kyuubi.{KYUUBI_VERSION, Utils} import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TExecuteStatementReq, TFetchResultsReq, TGetResultSetMetadataReq, TOpenSessionReq, TStatusCode} trait SparkQueryTests extends SparkDataTypeTests with HiveJDBCTestHelper { diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/TClientTestUtils.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/TClientTestUtils.scala index d4b4ace88b2..7d2d6946467 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/TClientTestUtils.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/TClientTestUtils.scala @@ -22,15 +22,14 @@ import java.util.Base64 import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift._ -import org.apache.hive.service.rpc.thrift.TCLIService.Iface -import org.apache.thrift.protocol.TBinaryProtocol -import org.apache.thrift.transport.TSocket - import org.apache.kyuubi.{Logging, Utils} import org.apache.kyuubi.config.KyuubiReservedKeys._ import org.apache.kyuubi.service.FrontendService import org.apache.kyuubi.service.authentication.PlainSASLHelper +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TCLIService.Iface +import org.apache.kyuubi.shaded.thrift.protocol.TBinaryProtocol +import org.apache.kyuubi.shaded.thrift.transport.TSocket object TClientTestUtils extends Logging { diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/log/OperationLogSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/log/OperationLogSuite.scala index 570a8159bcf..d87ddec0b94 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/log/OperationLogSuite.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/operation/log/OperationLogSuite.scala @@ -23,12 +23,11 @@ import java.nio.file.{Files, Paths} import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift.{TProtocolVersion, TRowSet} - import org.apache.kyuubi.{KyuubiFunSuite, KyuubiSQLException, Utils} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.operation.{FetchOrientation, OperationHandle} import org.apache.kyuubi.session.NoopSessionManager +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TProtocolVersion, TRowSet} import org.apache.kyuubi.util.ThriftUtils class OperationLogSuite extends KyuubiFunSuite { diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/TFrontendServiceSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/TFrontendServiceSuite.scala index 444bfe2cc3a..1fa6dc63a22 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/TFrontendServiceSuite.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/TFrontendServiceSuite.scala @@ -21,7 +21,6 @@ import java.time.Duration import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift._ import org.scalatest.time._ import org.apache.kyuubi.{KyuubiFunSuite, KyuubiSQLException, Utils} @@ -30,6 +29,7 @@ import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.operation.{OperationHandle, TClientTestUtils} import org.apache.kyuubi.service.TFrontendService.FeServiceServerContext import org.apache.kyuubi.session.{AbstractSession, SessionHandle} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ class TFrontendServiceSuite extends KyuubiFunSuite { diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/KyuubiAuthenticationFactorySuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/KyuubiAuthenticationFactorySuite.scala index 316c9b2dfdf..607c397d81f 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/KyuubiAuthenticationFactorySuite.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/KyuubiAuthenticationFactorySuite.scala @@ -20,11 +20,10 @@ package org.apache.kyuubi.service.authentication import java.security.Security import javax.security.auth.login.LoginException -import org.apache.thrift.transport.TSaslServerTransport - import org.apache.kyuubi.{KyuubiFunSuite, KyuubiSQLException} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.service.authentication.PlainSASLServer.SaslPlainProvider +import org.apache.kyuubi.shaded.thrift.transport.TSaslServerTransport import org.apache.kyuubi.util.AssertionUtils._ import org.apache.kyuubi.util.KyuubiHadoopUtils diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/PlainSASLHelperSuite.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/PlainSASLHelperSuite.scala index d4290a2c6dd..2a55cc0d7ef 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/PlainSASLHelperSuite.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/service/authentication/PlainSASLHelperSuite.scala @@ -19,12 +19,11 @@ package org.apache.kyuubi.service.authentication import java.security.Security -import org.apache.thrift.transport.{TSaslServerTransport, TSocket} - import org.apache.kyuubi.{KYUUBI_VERSION, KyuubiFunSuite} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.service.{NoopTBinaryFrontendServer, TBinaryFrontendService} import org.apache.kyuubi.service.authentication.PlainSASLServer.SaslPlainProvider +import org.apache.kyuubi.shaded.thrift.transport.{TSaslServerTransport, TSocket} import org.apache.kyuubi.util.SemanticVersion class PlainSASLHelperSuite extends KyuubiFunSuite { diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/session/NoopSessionImpl.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/session/NoopSessionImpl.scala index 91548a93e67..5c9e4802276 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/session/NoopSessionImpl.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/session/NoopSessionImpl.scala @@ -17,7 +17,7 @@ package org.apache.kyuubi.session -import org.apache.hive.service.rpc.thrift.TProtocolVersion +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion class NoopSessionImpl( protocol: TProtocolVersion, diff --git a/kyuubi-common/src/test/scala/org/apache/kyuubi/session/NoopSessionManager.scala b/kyuubi-common/src/test/scala/org/apache/kyuubi/session/NoopSessionManager.scala index 3a4088ed2da..075f2c2c373 100644 --- a/kyuubi-common/src/test/scala/org/apache/kyuubi/session/NoopSessionManager.scala +++ b/kyuubi-common/src/test/scala/org/apache/kyuubi/session/NoopSessionManager.scala @@ -17,10 +17,9 @@ package org.apache.kyuubi.session -import org.apache.hive.service.rpc.thrift.TProtocolVersion - import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.operation.{NoopOperationManager, OperationManager} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion class NoopSessionManager extends SessionManager("noop") { override val operationManager: OperationManager = new NoopOperationManager() diff --git a/kyuubi-ctl/pom.xml b/kyuubi-ctl/pom.xml index c453cd3af95..ad08f6ae7bb 100644 --- a/kyuubi-ctl/pom.xml +++ b/kyuubi-ctl/pom.xml @@ -50,7 +50,7 @@ org.apache.kyuubi - ${kyuubi-shaded-zookeeper.artifacts} + ${kyuubi-relocated-zookeeper.artifacts} diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cli/AdminControlCliArguments.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cli/AdminControlCliArguments.scala index e015525b3aa..5a45630c685 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cli/AdminControlCliArguments.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cli/AdminControlCliArguments.scala @@ -61,7 +61,6 @@ class AdminControlCliArguments(args: Seq[String], env: Map[String, String] = sys | type ${cliConfig.engineOpts.engineType} | sharelevel ${cliConfig.engineOpts.engineShareLevel} | sharesubdomain ${cliConfig.engineOpts.engineSubdomain} - | all ${cliConfig.engineOpts.all} """.stripMargin case ControlObject.SERVER => s"""Parsed arguments: diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteBatchCommand.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteBatchCommand.scala index 3988620adb8..ee4a14d2666 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteBatchCommand.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/delete/DeleteBatchCommand.scala @@ -36,7 +36,7 @@ class DeleteBatchCommand(cliConfig: CliConfig) extends Command[Batch](cliConfig) val batchRestApi: BatchRestApi = new BatchRestApi(kyuubiRestClient) val batchId = normalizedCliConfig.batchOpts.batchId - val result = batchRestApi.deleteBatch(batchId, normalizedCliConfig.commonOpts.hs2ProxyUser) + val result = batchRestApi.deleteBatch(batchId) info(JsonUtils.toJson(result)) diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/AdminListEngineCommand.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/AdminListEngineCommand.scala index 96be5cc4744..acd6fe44416 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/AdminListEngineCommand.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/cmd/list/AdminListEngineCommand.scala @@ -38,8 +38,7 @@ class AdminListEngineCommand(cliConfig: CliConfig) normalizedCliConfig.engineOpts.engineType, normalizedCliConfig.engineOpts.engineShareLevel, normalizedCliConfig.engineOpts.engineSubdomain, - normalizedCliConfig.commonOpts.hs2ProxyUser, - normalizedCliConfig.engineOpts.all).asScala + normalizedCliConfig.commonOpts.hs2ProxyUser).asScala } } diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/opt/AdminCommandLine.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/opt/AdminCommandLine.scala index c02826b6875..c7e367405e8 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/opt/AdminCommandLine.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/opt/AdminCommandLine.scala @@ -52,7 +52,7 @@ object AdminCommandLine extends CommonCommandLine { .text("\tDelete resources.") .action((_, c) => c.copy(action = ControlAction.DELETE)) .children( - deleteEngineCmd(builder).text("\tDelete the specified engine node for user."))) + engineCmd(builder).text("\tDelete the specified engine node for user."))) } @@ -64,7 +64,7 @@ object AdminCommandLine extends CommonCommandLine { .text("\tList information about resources.") .action((_, c) => c.copy(action = ControlAction.LIST)) .children( - listEngineCmd(builder).text("\tList the engine nodes"), + engineCmd(builder).text("\tList all the engine nodes for a user"), serverCmd(builder).text("\tList all the server nodes"))) } @@ -80,7 +80,7 @@ object AdminCommandLine extends CommonCommandLine { refreshConfigCmd(builder).text("\tRefresh the config with specified type."))) } - private def deleteEngineCmd(builder: OParserBuilder[CliConfig]): OParser[_, CliConfig] = { + private def engineCmd(builder: OParserBuilder[CliConfig]): OParser[_, CliConfig] = { import builder._ cmd("engine").action((_, c) => c.copy(resource = ControlObject.ENGINE)) .children( @@ -95,24 +95,6 @@ object AdminCommandLine extends CommonCommandLine { .text("The engine share level this engine belong to.")) } - private def listEngineCmd(builder: OParserBuilder[CliConfig]): OParser[_, CliConfig] = { - import builder._ - cmd("engine").action((_, c) => c.copy(resource = ControlObject.ENGINE)) - .children( - opt[String]("engine-type").abbr("et") - .action((v, c) => c.copy(engineOpts = c.engineOpts.copy(engineType = v))) - .text("The engine type this engine belong to."), - opt[String]("engine-subdomain").abbr("es") - .action((v, c) => c.copy(engineOpts = c.engineOpts.copy(engineSubdomain = v))) - .text("The engine subdomain this engine belong to."), - opt[String]("engine-share-level").abbr("esl") - .action((v, c) => c.copy(engineOpts = c.engineOpts.copy(engineShareLevel = v))) - .text("The engine share level this engine belong to."), - opt[String]("all").abbr("a") - .action((v, c) => c.copy(engineOpts = c.engineOpts.copy(all = v))) - .text("All the engine.")) - } - private def serverCmd(builder: OParserBuilder[CliConfig]): OParser[_, CliConfig] = { import builder._ cmd("server").action((_, c) => c.copy(resource = ControlObject.SERVER)) diff --git a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/opt/CliConfig.scala b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/opt/CliConfig.scala index 4ccae109c6a..7818f694a3f 100644 --- a/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/opt/CliConfig.scala +++ b/kyuubi-ctl/src/main/scala/org/apache/kyuubi/ctl/opt/CliConfig.scala @@ -77,7 +77,6 @@ case class EngineOpts( user: String = null, engineType: String = null, engineSubdomain: String = null, - engineShareLevel: String = null, - all: String = null) + engineShareLevel: String = null) case class AdminConfigOpts(configType: String = null) diff --git a/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/AdminControlCliArgumentsSuite.scala b/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/AdminControlCliArgumentsSuite.scala index ae7c0fa1b96..52a2796f463 100644 --- a/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/AdminControlCliArgumentsSuite.scala +++ b/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/AdminControlCliArgumentsSuite.scala @@ -158,14 +158,13 @@ class AdminControlCliArgumentsSuite extends KyuubiFunSuite with TestPrematureExi |Command: list [engine|server] | List information about resources. |Command: list engine [options] - | List the engine nodes + | List all the engine nodes for a user | -et, --engine-type | The engine type this engine belong to. | -es, --engine-subdomain | The engine subdomain this engine belong to. | -esl, --engine-share-level | The engine share level this engine belong to. - | -a, --all All the engine. |Command: list server | List all the server nodes | diff --git a/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/BatchCliArgumentsSuite.scala b/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/BatchCliArgumentsSuite.scala index bf8f101e00a..5987ac16338 100644 --- a/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/BatchCliArgumentsSuite.scala +++ b/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/BatchCliArgumentsSuite.scala @@ -119,18 +119,6 @@ class BatchCliArgumentsSuite extends KyuubiFunSuite with TestPrematureExit { } } - test("delete batch with hs2ProxyUser") { - val args = Array( - "delete", - "batch", - "f7fd702c-e54e-11ec-8fea-0242ac120002", - "--hs2ProxyUser", - "b_user") - val opArgs = new ControlCliArguments(args) - assert(opArgs.cliConfig.batchOpts.batchId == "f7fd702c-e54e-11ec-8fea-0242ac120002") - assert(opArgs.cliConfig.commonOpts.hs2ProxyUser == "b_user") - } - test("test list batch option") { val args = Array( "list", diff --git a/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/ControlCliSuite.scala b/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/ControlCliSuite.scala index 43a694a081a..b0319e497ed 100644 --- a/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/ControlCliSuite.scala +++ b/kyuubi-ctl/src/test/scala/org/apache/kyuubi/ctl/ControlCliSuite.scala @@ -190,9 +190,9 @@ class ControlCliSuite extends KyuubiFunSuite with TestPrematureExit { assert(children.size == 2) assert(children.head.startsWith( - s"serviceUri=localhost:10000;version=$KYUUBI_VERSION;sequence=")) + s"serverUri=localhost:10000;version=$KYUUBI_VERSION;sequence=")) assert(children.last.startsWith( - s"serviceUri=localhost:10001;version=$KYUUBI_VERSION;sequence=")) + s"serverUri=localhost:10001;version=$KYUUBI_VERSION;sequence=")) children.foreach { child => framework.delete(s"""$znodeRoot/$child""") } diff --git a/kyuubi-ha/pom.xml b/kyuubi-ha/pom.xml index 129f7a53dbb..f007f2c7064 100644 --- a/kyuubi-ha/pom.xml +++ b/kyuubi-ha/pom.xml @@ -39,7 +39,7 @@ org.apache.kyuubi - ${kyuubi-shaded-zookeeper.artifacts} + ${kyuubi-relocated-zookeeper.artifacts} diff --git a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/etcd/EtcdDiscoveryClient.scala b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/etcd/EtcdDiscoveryClient.scala index d979804f417..7edc7e8a310 100644 --- a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/etcd/EtcdDiscoveryClient.scala +++ b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/etcd/EtcdDiscoveryClient.scala @@ -335,7 +335,7 @@ class EtcdDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { val extraInfo = attributes.map(kv => kv._1 + "=" + kv._2).mkString(";", ";", "") val pathPrefix = DiscoveryPaths.makePath( namespace, - s"serviceUri=$instance;version=${version.getOrElse(KYUUBI_VERSION)}" + + s"serverUri=$instance;version=${version.getOrElse(KYUUBI_VERSION)}" + s"${extraInfo.stripSuffix(";")};${session}sequence=") val znode = instance diff --git a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClient.scala b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClient.scala index 2db7d89d649..a06087d3adf 100644 --- a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClient.scala +++ b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClient.scala @@ -361,7 +361,7 @@ class ZookeeperDiscoveryClient(conf: KyuubiConf) extends DiscoveryClient { val extraInfo = attributes.map(kv => kv._1 + "=" + kv._2).mkString(";", ";", "") val pathPrefix = ZKPaths.makePath( namespace, - s"serviceUri=$instance;version=${version.getOrElse(KYUUBI_VERSION)}" + + s"serverUri=$instance;version=${version.getOrElse(KYUUBI_VERSION)}" + s"${extraInfo.stripSuffix(";")};${session}sequence=") var localServiceNode: PersistentNode = null val createMode = diff --git a/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/DiscoveryClientTests.scala b/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/DiscoveryClientTests.scala index 9caf3864640..53c0586f5a6 100644 --- a/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/DiscoveryClientTests.scala +++ b/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/DiscoveryClientTests.scala @@ -60,7 +60,7 @@ trait DiscoveryClientTests extends KyuubiFunSuite { assert(discoveryClient.pathExists(basePath)) val children = discoveryClient.getChildren(basePath) assert(children.head === - s"serviceUri=${service.frontendServices.head.connectionUrl};" + + s"serverUri=${service.frontendServices.head.connectionUrl};" + s"version=$KYUUBI_VERSION;sequence=0000000000") children.foreach { child => @@ -107,7 +107,7 @@ trait DiscoveryClientTests extends KyuubiFunSuite { assert(discoveryClient.pathExists(basePath)) val children = discoveryClient.getChildren(basePath) assert(children.head === - s"serviceUri=${service.frontendServices.head.connectionUrl};" + + s"serverUri=${service.frontendServices.head.connectionUrl};" + s"version=$KYUUBI_VERSION;sequence=0000000000") children.foreach { child => diff --git a/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClientSuite.scala b/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClientSuite.scala index dd78e1fb8a0..34ed0559383 100644 --- a/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClientSuite.scala +++ b/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClientSuite.scala @@ -196,7 +196,7 @@ abstract class ZookeeperDiscoveryClientSuite extends DiscoveryClientTests assert(discoveryClient.pathExists(basePath)) val children = discoveryClient.getChildren(basePath) assert(children.head === - s"serviceUri=${service.frontendServices.head.connectionUrl};" + + s"serverUri=${service.frontendServices.head.connectionUrl};" + s"version=$KYUUBI_VERSION;sequence=0000000000") children.foreach { child => diff --git a/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/KyuubiBeeLine.java b/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/KyuubiBeeLine.java index 224cbb3ce11..c786da35f24 100644 --- a/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/KyuubiBeeLine.java +++ b/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/KyuubiBeeLine.java @@ -85,7 +85,7 @@ public KyuubiBeeLine(boolean isBeeLine) { @Override void usage() { super.usage(); - output("Usage: java \" + KyuubiBeeLine.class.getCanonicalName()"); + output("Usage: java " + KyuubiBeeLine.class.getCanonicalName()); output(" --python-mode Execute python code/script."); } diff --git a/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/logs/KyuubiBeelineInPlaceUpdateStream.java b/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/logs/KyuubiBeelineInPlaceUpdateStream.java index afe777f502f..e0087332e12 100644 --- a/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/logs/KyuubiBeelineInPlaceUpdateStream.java +++ b/kyuubi-hive-beeline/src/main/java/org/apache/hive/beeline/logs/KyuubiBeelineInPlaceUpdateStream.java @@ -21,9 +21,9 @@ import java.util.List; import org.apache.hadoop.hive.common.log.InPlaceUpdate; import org.apache.hadoop.hive.common.log.ProgressMonitor; -import org.apache.hive.service.rpc.thrift.TJobExecutionStatus; -import org.apache.hive.service.rpc.thrift.TProgressUpdateResp; import org.apache.kyuubi.jdbc.hive.logs.InPlaceUpdateStream; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TJobExecutionStatus; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProgressUpdateResp; public class KyuubiBeelineInPlaceUpdateStream implements InPlaceUpdateStream { private InPlaceUpdate inPlaceUpdate; diff --git a/kyuubi-hive-jdbc-shaded/pom.xml b/kyuubi-hive-jdbc-shaded/pom.xml index 174f199bead..ba2938a4d36 100644 --- a/kyuubi-hive-jdbc-shaded/pom.xml +++ b/kyuubi-hive-jdbc-shaded/pom.xml @@ -84,10 +84,6 @@ codegen ${kyuubi.shade.packageName}.codegen - - com.facebook - ${kyuubi.shade.packageName}.com.facebook - com.google ${kyuubi.shade.packageName}.com.google @@ -108,18 +104,10 @@ org.apache.commons ${kyuubi.shade.packageName}.org.apache.commons - - org.apache.hive - ${kyuubi.shade.packageName}.org.apache.hive - org.apache.http ${kyuubi.shade.packageName}.org.apache.http - - org.apache.thrift - ${kyuubi.shade.packageName}.org.apache.thrift - diff --git a/kyuubi-hive-jdbc/pom.xml b/kyuubi-hive-jdbc/pom.xml index aa5e7c161d5..1ec5e597fa0 100644 --- a/kyuubi-hive-jdbc/pom.xml +++ b/kyuubi-hive-jdbc/pom.xml @@ -71,21 +71,6 @@ jackson-datatype-jsr310 - - org.apache.thrift - libfb303 - - - - org.apache.thrift - libthrift - - - - org.apache.hive - hive-service-rpc - - commons-codec commons-codec @@ -114,7 +99,12 @@ org.apache.kyuubi - ${kyuubi-shaded-zookeeper.artifacts} + kyuubi-relocated-hive-service-rpc + + + + org.apache.kyuubi + ${kyuubi-relocated-zookeeper.artifacts} diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/JdbcColumn.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/JdbcColumn.java index 365fc1d3e27..a6c4a948b7b 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/JdbcColumn.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/JdbcColumn.java @@ -23,10 +23,10 @@ import java.sql.Date; import java.sql.SQLException; import java.sql.Timestamp; -import org.apache.hive.service.rpc.thrift.TTypeId; import org.apache.kyuubi.jdbc.hive.common.HiveIntervalDayTime; import org.apache.kyuubi.jdbc.hive.common.HiveIntervalYearMonth; import org.apache.kyuubi.jdbc.hive.common.TimestampTZ; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId; /** Column metadata. */ public class JdbcColumn { diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/JdbcConnectionParams.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/JdbcConnectionParams.java index 71949b9dfea..c60f3489958 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/JdbcConnectionParams.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/JdbcConnectionParams.java @@ -33,6 +33,7 @@ public class JdbcConnectionParams { // Client param names: + static final String CLIENT_PROTOCOL_VERSION = "clientProtocolVersion"; // Retry setting static final String RETRIES = "retries"; @@ -48,6 +49,7 @@ public class JdbcConnectionParams { public static final String AUTH_KYUUBI_SERVER_PRINCIPAL = "kyuubiServerPrincipal"; public static final String AUTH_KYUUBI_CLIENT_PRINCIPAL = "kyuubiClientPrincipal"; public static final String AUTH_KYUUBI_CLIENT_KEYTAB = "kyuubiClientKeytab"; + public static final String AUTH_KYUUBI_CLIENT_TICKET_CACHE = "kyuubiClientTicketCache"; public static final String AUTH_PASSWD = "password"; public static final String AUTH_KERBEROS_AUTH_TYPE = "kerberosAuthType"; public static final String AUTH_KERBEROS_AUTH_TYPE_FROM_SUBJECT = "fromSubject"; @@ -114,6 +116,8 @@ public class JdbcConnectionParams { // Currently supports JKS keystore format static final String SSL_TRUST_STORE_TYPE = "JKS"; + static final String SSL_STORE_PASSWORD_PATH = "storePasswordPath"; + static final String HIVE_VAR_PREFIX = "hivevar:"; static final String HIVE_CONF_PREFIX = "hiveconf:"; private String host = null; diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowBasedResultSet.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowBasedResultSet.java index ef5008503aa..52f178a2254 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowBasedResultSet.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowBasedResultSet.java @@ -23,15 +23,16 @@ import java.math.MathContext; import java.nio.charset.StandardCharsets; import java.sql.*; +import java.util.Calendar; import java.util.List; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.hive.service.rpc.thrift.TTableSchema; -import org.apache.hive.service.rpc.thrift.TTypeId; import org.apache.kyuubi.jdbc.hive.adapter.SQLResultSet; import org.apache.kyuubi.jdbc.hive.arrow.ArrowColumnarBatchRow; import org.apache.kyuubi.jdbc.hive.arrow.ArrowUtils; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTableSchema; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId; /** Data independent base class which implements the common part of all Kyuubi result sets. */ @SuppressWarnings("deprecation") @@ -198,6 +199,32 @@ public Date getDate(String columnName) throws SQLException { return getDate(findColumn(columnName)); } + @Override + public Date getDate(int columnIndex, Calendar cal) throws SQLException { + Date value = getDate(columnIndex); + if (value == null) { + return null; + } + try { + return parseDate(value, cal); + } catch (IllegalArgumentException e) { + throw new KyuubiSQLException("Cannot convert column " + columnIndex + " to date: " + e, e); + } + } + + @Override + public Date getDate(String columnLabel, Calendar cal) throws SQLException { + return this.getDate(findColumn(columnLabel), cal); + } + + private Date parseDate(Date value, Calendar cal) { + if (cal == null) { + cal = Calendar.getInstance(); + } + cal.setTime(value); + return new Date(cal.getTimeInMillis()); + } + @Override public double getDouble(int columnIndex) throws SQLException { try { @@ -406,6 +433,83 @@ public Timestamp getTimestamp(String columnName) throws SQLException { return getTimestamp(findColumn(columnName)); } + @Override + public Timestamp getTimestamp(int columnIndex, Calendar cal) throws SQLException { + Timestamp value = getTimestamp(columnIndex); + if (value == null) { + return null; + } + try { + return parseTimestamp(value, cal); + } catch (IllegalArgumentException e) { + throw new KyuubiSQLException( + "Cannot convert column " + columnIndex + " to timestamp: " + e, e); + } + } + + @Override + public Timestamp getTimestamp(String columnLabel, Calendar cal) throws SQLException { + return this.getTimestamp(findColumn(columnLabel), cal); + } + + private Timestamp parseTimestamp(Timestamp timestamp, Calendar cal) { + if (cal == null) { + cal = Calendar.getInstance(); + } + long v = timestamp.getTime(); + cal.setTimeInMillis(v); + timestamp = new Timestamp(cal.getTime().getTime()); + return timestamp; + } + + @Override + public Time getTime(int columnIndex) throws SQLException { + Object obj = getObject(columnIndex); + if (obj == null) { + return null; + } + if (obj instanceof Time) { + return (Time) obj; + } + if (obj instanceof String) { + return Time.valueOf((String) obj); + } + throw new KyuubiSQLException("Illegal conversion"); + } + + @Override + public Time getTime(String columnLabel) throws SQLException { + return getTime(findColumn(columnLabel)); + } + + @Override + public Time getTime(int columnIndex, Calendar cal) throws SQLException { + Time value = getTime(columnIndex); + if (value == null) { + return null; + } + try { + return parseTime(value, cal); + } catch (IllegalArgumentException e) { + throw new KyuubiSQLException("Cannot convert column " + columnIndex + " to time: " + e, e); + } + } + + @Override + public Time getTime(String columnLabel, Calendar cal) throws SQLException { + return this.getTime(findColumn(columnLabel), cal); + } + + private Time parseTime(Time date, Calendar cal) { + if (cal == null) { + cal = Calendar.getInstance(); + } + long v = date.getTime(); + cal.setTimeInMillis(v); + date = new Time(cal.getTime().getTime()); + return date; + } + @Override public int getType() throws SQLException { return ResultSet.TYPE_FORWARD_ONLY; diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowQueryResultSet.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowQueryResultSet.java index 54491b2d670..163322ccb32 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowQueryResultSet.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiArrowQueryResultSet.java @@ -31,12 +31,12 @@ import org.apache.arrow.vector.ipc.ReadChannel; import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.hive.service.rpc.thrift.*; import org.apache.kyuubi.jdbc.hive.arrow.ArrowColumnVector; import org.apache.kyuubi.jdbc.hive.arrow.ArrowColumnarBatch; import org.apache.kyuubi.jdbc.hive.arrow.ArrowColumnarBatchRow; import org.apache.kyuubi.jdbc.hive.arrow.ArrowUtils; import org.apache.kyuubi.jdbc.hive.common.HiveDecimal; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiBaseResultSet.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiBaseResultSet.java index a9d32e8cafb..cf47e104295 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiBaseResultSet.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiBaseResultSet.java @@ -23,13 +23,14 @@ import java.math.MathContext; import java.nio.charset.StandardCharsets; import java.sql.*; +import java.util.Calendar; import java.util.List; -import org.apache.hive.service.rpc.thrift.TTableSchema; -import org.apache.hive.service.rpc.thrift.TTypeId; import org.apache.kyuubi.jdbc.hive.adapter.SQLResultSet; import org.apache.kyuubi.jdbc.hive.common.HiveIntervalDayTime; import org.apache.kyuubi.jdbc.hive.common.HiveIntervalYearMonth; import org.apache.kyuubi.jdbc.hive.common.TimestampTZUtil; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTableSchema; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId; /** Data independent base class which implements the common part of all Kyuubi result sets. */ @SuppressWarnings("deprecation") @@ -182,6 +183,32 @@ public Date getDate(String columnName) throws SQLException { return getDate(findColumn(columnName)); } + @Override + public Date getDate(int columnIndex, Calendar cal) throws SQLException { + Date value = getDate(columnIndex); + if (value == null) { + return null; + } + try { + return parseDate(value, cal); + } catch (IllegalArgumentException e) { + throw new KyuubiSQLException("Cannot convert column " + columnIndex + " to date: " + e, e); + } + } + + @Override + public Date getDate(String columnLabel, Calendar cal) throws SQLException { + return this.getDate(findColumn(columnLabel), cal); + } + + private Date parseDate(Date value, Calendar cal) { + if (cal == null) { + cal = Calendar.getInstance(); + } + cal.setTime(value); + return new Date(cal.getTimeInMillis()); + } + @Override public double getDouble(int columnIndex) throws SQLException { try { @@ -412,6 +439,83 @@ public Timestamp getTimestamp(String columnName) throws SQLException { return getTimestamp(findColumn(columnName)); } + @Override + public Timestamp getTimestamp(int columnIndex, Calendar cal) throws SQLException { + Timestamp value = getTimestamp(columnIndex); + if (value == null) { + return null; + } + try { + return parseTimestamp(value, cal); + } catch (IllegalArgumentException e) { + throw new KyuubiSQLException( + "Cannot convert column " + columnIndex + " to timestamp: " + e, e); + } + } + + @Override + public Timestamp getTimestamp(String columnLabel, Calendar cal) throws SQLException { + return this.getTimestamp(findColumn(columnLabel), cal); + } + + private Timestamp parseTimestamp(Timestamp timestamp, Calendar cal) { + if (cal == null) { + cal = Calendar.getInstance(); + } + long v = timestamp.getTime(); + cal.setTimeInMillis(v); + timestamp = new Timestamp(cal.getTime().getTime()); + return timestamp; + } + + @Override + public Time getTime(int columnIndex) throws SQLException { + Object obj = getObject(columnIndex); + if (obj == null) { + return null; + } + if (obj instanceof Time) { + return (Time) obj; + } + if (obj instanceof String) { + return Time.valueOf((String) obj); + } + throw new KyuubiSQLException("Illegal conversion"); + } + + @Override + public Time getTime(String columnLabel) throws SQLException { + return getTime(findColumn(columnLabel)); + } + + @Override + public Time getTime(int columnIndex, Calendar cal) throws SQLException { + Time value = getTime(columnIndex); + if (value == null) { + return null; + } + try { + return parseTime(value, cal); + } catch (IllegalArgumentException e) { + throw new KyuubiSQLException("Cannot convert column " + columnIndex + " to time: " + e, e); + } + } + + @Override + public Time getTime(String columnLabel, Calendar cal) throws SQLException { + return this.getTime(findColumn(columnLabel), cal); + } + + private Time parseTime(Time date, Calendar cal) { + if (cal == null) { + cal = Calendar.getInstance(); + } + long v = date.getTime(); + cal.setTimeInMillis(v); + date = new Time(cal.getTime().getTime()); + return date; + } + @Override public int getType() throws SQLException { return ResultSet.TYPE_FORWARD_ONLY; diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiConnection.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiConnection.java index c23985328ec..47de5f7480b 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiConnection.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiConnection.java @@ -42,7 +42,6 @@ import javax.security.sasl.Sasl; import org.apache.commons.lang3.ClassUtils; import org.apache.commons.lang3.StringUtils; -import org.apache.hive.service.rpc.thrift.*; import org.apache.http.HttpRequestInterceptor; import org.apache.http.HttpResponse; import org.apache.http.NoHttpResponseException; @@ -66,11 +65,12 @@ import org.apache.kyuubi.jdbc.hive.cli.RowSet; import org.apache.kyuubi.jdbc.hive.cli.RowSetFactory; import org.apache.kyuubi.jdbc.hive.logs.KyuubiLoggable; -import org.apache.thrift.TException; -import org.apache.thrift.protocol.TBinaryProtocol; -import org.apache.thrift.transport.THttpClient; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportException; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.*; +import org.apache.kyuubi.shaded.thrift.TException; +import org.apache.kyuubi.shaded.thrift.protocol.TBinaryProtocol; +import org.apache.kyuubi.shaded.thrift.transport.THttpClient; +import org.apache.kyuubi.shaded.thrift.transport.TTransport; +import org.apache.kyuubi.shaded.thrift.transport.TTransportException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -539,7 +539,8 @@ public long getRetryInterval() { if (useSsl) { String useTwoWaySSL = sessConfMap.get(USE_TWO_WAY_SSL); String sslTrustStorePath = sessConfMap.get(SSL_TRUST_STORE); - String sslTrustStorePassword = sessConfMap.get(SSL_TRUST_STORE_PASSWORD); + String sslTrustStorePassword = + Utils.getPassword(sessConfMap, JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD); KeyStore sslTrustStore; SSLConnectionSocketFactory socketFactory; SSLContext sslContext; @@ -559,7 +560,8 @@ public long getRetryInterval() { // Pick trust store config from the given path sslTrustStore = KeyStore.getInstance(SSL_TRUST_STORE_TYPE); try (FileInputStream fis = new FileInputStream(sslTrustStorePath)) { - sslTrustStore.load(fis, sslTrustStorePassword.toCharArray()); + sslTrustStore.load( + fis, sslTrustStorePassword != null ? sslTrustStorePassword.toCharArray() : null); } sslContext = SSLContexts.custom().loadTrustMaterial(sslTrustStore, null).build(); socketFactory = @@ -590,7 +592,8 @@ private TTransport createUnderlyingTransport() throws TTransportException { if (isSslConnection()) { // get SSL socket String sslTrustStore = sessConfMap.get(SSL_TRUST_STORE); - String sslTrustStorePassword = sessConfMap.get(SSL_TRUST_STORE_PASSWORD); + String sslTrustStorePassword = + Utils.getPassword(sessConfMap, JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD); if (sslTrustStore == null || sslTrustStore.isEmpty()) { transport = ThriftUtils.getSSLSocket(host, port, connectTimeout, socketTimeout); @@ -661,7 +664,8 @@ SSLConnectionSocketFactory getTwoWaySSLSocketFactory() throws SQLException { KeyManagerFactory keyManagerFactory = KeyManagerFactory.getInstance(SUNX509_ALGORITHM_STRING, SUNJSSE_ALGORITHM_STRING); String keyStorePath = sessConfMap.get(SSL_KEY_STORE); - String keyStorePassword = sessConfMap.get(SSL_KEY_STORE_PASSWORD); + String keyStorePassword = + Utils.getPassword(sessConfMap, JdbcConnectionParams.SSL_KEY_STORE_PASSWORD); KeyStore sslKeyStore = KeyStore.getInstance(SSL_KEY_STORE_TYPE); if (keyStorePath == null || keyStorePath.isEmpty()) { @@ -677,7 +681,8 @@ SSLConnectionSocketFactory getTwoWaySSLSocketFactory() throws SQLException { TrustManagerFactory trustManagerFactory = TrustManagerFactory.getInstance(SUNX509_ALGORITHM_STRING); String trustStorePath = sessConfMap.get(SSL_TRUST_STORE); - String trustStorePassword = sessConfMap.get(SSL_TRUST_STORE_PASSWORD); + String trustStorePassword = + Utils.getPassword(sessConfMap, JdbcConnectionParams.SSL_TRUST_STORE_PASSWORD); KeyStore sslTrustStore = KeyStore.getInstance(SSL_TRUST_STORE_TYPE); if (trustStorePath == null || trustStorePath.isEmpty()) { @@ -685,7 +690,8 @@ SSLConnectionSocketFactory getTwoWaySSLSocketFactory() throws SQLException { SSL_TRUST_STORE + " Not configured for 2 way SSL connection"); } try (FileInputStream fis = new FileInputStream(trustStorePath)) { - sslTrustStore.load(fis, trustStorePassword.toCharArray()); + sslTrustStore.load( + fis, trustStorePassword != null ? trustStorePassword.toCharArray() : null); } trustManagerFactory.init(sslTrustStore); SSLContext context = SSLContext.getInstance("TLS"); @@ -733,6 +739,18 @@ private void openSession() throws SQLException { if (sessVars.containsKey(HS2_PROXY_USER)) { openConf.put(HS2_PROXY_USER, sessVars.get(HS2_PROXY_USER)); } + String clientProtocolStr = + sessVars.getOrDefault( + CLIENT_PROTOCOL_VERSION, openReq.getClient_protocol().getValue() + ""); + TProtocolVersion clientProtocol = + TProtocolVersion.findByValue(Integer.parseInt(clientProtocolStr)); + if (clientProtocol == null) { + throw new IllegalArgumentException( + String.format( + "Unsupported Hive2 protocol version %s specified by session conf key %s", + clientProtocolStr, CLIENT_PROTOCOL_VERSION)); + } + openReq.setClient_protocol(clientProtocol); try { openConf.put("kyuubi.client.ipAddress", InetAddress.getLocalHost().getHostAddress()); } catch (UnknownHostException e) { @@ -870,7 +888,8 @@ private Subject createSubject() { AccessControlContext context = AccessController.getContext(); return Subject.getSubject(context); } else if (isTgtCacheAuthMode()) { - return KerberosAuthenticationManager.getTgtCacheAuthentication().getSubject(); + String ticketCache = sessConfMap.get(AUTH_KYUUBI_CLIENT_TICKET_CACHE); + return KerberosAuthenticationManager.getTgtCacheAuthentication(ticketCache).getSubject(); } else { // This should never happen throw new IllegalArgumentException("Unsupported auth mode"); diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiDatabaseMetaData.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiDatabaseMetaData.java index c6ab3a277c4..cb32bbd8d46 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiDatabaseMetaData.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiDatabaseMetaData.java @@ -17,7 +17,7 @@ package org.apache.kyuubi.jdbc.hive; -import static org.apache.hive.service.rpc.thrift.TTypeId.*; +import static org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId.*; import java.sql.Connection; import java.sql.DatabaseMetaData; @@ -28,10 +28,10 @@ import java.util.Comparator; import java.util.List; import java.util.jar.Attributes; -import org.apache.hive.service.rpc.thrift.*; import org.apache.kyuubi.jdbc.KyuubiHiveDriver; import org.apache.kyuubi.jdbc.hive.adapter.SQLDatabaseMetaData; -import org.apache.thrift.TException; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.*; +import org.apache.kyuubi.shaded.thrift.TException; /** KyuubiDatabaseMetaData. */ public class KyuubiDatabaseMetaData implements SQLDatabaseMetaData { diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiMetaDataResultSet.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiMetaDataResultSet.java index 48fdaaa1a68..b8c865dd1ce 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiMetaDataResultSet.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiMetaDataResultSet.java @@ -21,7 +21,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import org.apache.hive.service.rpc.thrift.TTypeId; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId; public abstract class KyuubiMetaDataResultSet extends KyuubiBaseResultSet { protected List data = Collections.emptyList(); diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiPreparedStatement.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiPreparedStatement.java index 1e53f940157..d8105ea07d3 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiPreparedStatement.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiPreparedStatement.java @@ -28,9 +28,9 @@ import java.text.MessageFormat; import java.util.HashMap; import java.util.Scanner; -import org.apache.hive.service.rpc.thrift.TCLIService; -import org.apache.hive.service.rpc.thrift.TSessionHandle; import org.apache.kyuubi.jdbc.hive.adapter.SQLPreparedStatement; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TCLIService; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TSessionHandle; /** KyuubiPreparedStatement. */ public class KyuubiPreparedStatement extends KyuubiStatement implements SQLPreparedStatement { diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiQueryResultSet.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiQueryResultSet.java index 242ec772021..81873f0dcb2 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiQueryResultSet.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiQueryResultSet.java @@ -22,11 +22,11 @@ import java.util.Iterator; import java.util.List; import java.util.concurrent.locks.ReentrantLock; -import org.apache.hive.service.rpc.thrift.*; import org.apache.kyuubi.jdbc.hive.cli.RowSet; import org.apache.kyuubi.jdbc.hive.cli.RowSetFactory; import org.apache.kyuubi.jdbc.hive.common.HiveDecimal; -import org.apache.thrift.TException; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.*; +import org.apache.kyuubi.shaded.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiResultSetMetaData.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiResultSetMetaData.java index bec1ca7fd32..fa121d2871b 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiResultSetMetaData.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiResultSetMetaData.java @@ -20,8 +20,8 @@ import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.List; -import org.apache.hive.service.rpc.thrift.TTypeId; import org.apache.kyuubi.jdbc.hive.adapter.SQLResultSetMetaData; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId; /** KyuubiResultSetMetaData. */ public class KyuubiResultSetMetaData implements SQLResultSetMetaData { diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiSQLException.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiSQLException.java index 7d26f807898..b5e6579379b 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiSQLException.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiSQLException.java @@ -20,7 +20,7 @@ import java.sql.SQLException; import java.util.ArrayList; import java.util.List; -import org.apache.hive.service.rpc.thrift.TStatus; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TStatus; import org.apache.kyuubi.util.reflect.DynConstructors; public class KyuubiSQLException extends SQLException { diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiStatement.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiStatement.java index cbe32eca65e..346c8a964e1 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiStatement.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/KyuubiStatement.java @@ -21,14 +21,14 @@ import java.sql.*; import java.util.*; import org.apache.commons.lang3.StringUtils; -import org.apache.hive.service.rpc.thrift.*; import org.apache.kyuubi.jdbc.hive.adapter.SQLStatement; import org.apache.kyuubi.jdbc.hive.cli.FetchType; import org.apache.kyuubi.jdbc.hive.cli.RowSet; import org.apache.kyuubi.jdbc.hive.cli.RowSetFactory; import org.apache.kyuubi.jdbc.hive.logs.InPlaceUpdateStream; import org.apache.kyuubi.jdbc.hive.logs.KyuubiLoggable; -import org.apache.thrift.TException; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.*; +import org.apache.kyuubi.shaded.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/Utils.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/Utils.java index d0167e3e490..2a0462aeda2 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/Utils.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/Utils.java @@ -28,8 +28,10 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; -import org.apache.hive.service.rpc.thrift.TStatus; -import org.apache.hive.service.rpc.thrift.TStatusCode; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TStatus; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TStatusCode; +import org.apache.kyuubi.util.reflect.DynConstructors; +import org.apache.kyuubi.util.reflect.DynMethods; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,6 +60,11 @@ public class Utils { public static final String HIVE_SERVER2_RETRY_TRUE = "true"; public static final String HIVE_SERVER2_RETRY_FALSE = "false"; + public static final String HADOOP_CONFIGURATION_CLASS = "org.apache.hadoop.conf.Configuration"; + + public static final String HADOOP_SECURITY_CREDENTIAL_PATH = + "hadoop.security.credential.provider.path"; + public static final Pattern KYUUBI_OPERATION_HINT_PATTERN = Pattern.compile("^__kyuubi_operation_result_(.*)__=(.*)", Pattern.CASE_INSENSITIVE); @@ -201,7 +208,7 @@ public static JdbcConnectionParams extractURLComponents(String uri, Properties i uri = uri.replace(urlPrefix, urlPrefix + authorityFromClientJdbcURL); } connParams.setSuppliedURLAuthority(authorityFromClientJdbcURL); - uri = uri.replace(authorityFromClientJdbcURL, dummyAuthorityString); + uri = uri.replaceFirst(authorityFromClientJdbcURL, dummyAuthorityString); // Now parse the connection uri with dummy authority URI jdbcURI = URI.create(uri.substring(URI_JDBC_PREFIX.length())); @@ -292,6 +299,13 @@ public static JdbcConnectionParams extractURLComponents(String uri, Properties i } } } + if (!connParams.getSessionVars().containsKey(CLIENT_PROTOCOL_VERSION)) { + if (info.containsKey(CLIENT_PROTOCOL_VERSION)) { + connParams + .getSessionVars() + .put(CLIENT_PROTOCOL_VERSION, info.getProperty(CLIENT_PROTOCOL_VERSION)); + } + } // Extract user/password from JDBC connection properties if its not supplied // in the connection URL if (!connParams.getSessionVars().containsKey(AUTH_USER)) { @@ -563,4 +577,69 @@ public static synchronized String getVersion() { } return KYUUBI_CLIENT_VERSION; } + + /** + * Method to get the password from the credential provider + * + * @param configuration Hadoop configuration + * @param providerPath provider path + * @param key alias name + * @return password + */ + private static String getPasswordFromCredentialProvider( + Object configuration, String providerPath, String key) { + try { + if (providerPath != null) { + DynMethods.builder("set") + .impl(Class.forName(HADOOP_CONFIGURATION_CLASS), String.class, String.class) + .buildChecked() + .invoke(configuration, HADOOP_SECURITY_CREDENTIAL_PATH, providerPath); + + char[] password = + DynMethods.builder("getPassword") + .impl(Class.forName(HADOOP_CONFIGURATION_CLASS), String.class) + .buildChecked() + .invoke(configuration, key); + if (password != null) { + return new String(password); + } + } + } catch (ClassNotFoundException exception) { + throw new RuntimeException(exception); + } catch (NoSuchMethodException exception) { + LOG.warn("Could not retrieve password for " + key, exception); + throw new RuntimeException(exception); + } + return null; + } + + /** + * Method to get the password from the configuration map if available. Otherwise, get it from the + * Hadoop CredentialProvider if Hadoop classes are available + * + * @param confMap configuration map + * @param key param + * @return password + */ + public static String getPassword(Map confMap, String key) { + String password = confMap.get(key); + boolean hadoopCredentialProviderAvailable = false; + Object hadoopConfiguration = null; + if (password == null) { + try { + hadoopConfiguration = + DynConstructors.builder().impl(HADOOP_CONFIGURATION_CLASS).build().newInstance(); + hadoopCredentialProviderAvailable = true; + } catch (Exception exception) { + LOG.warn("Hadoop credential provider is unavailable", exception); + throw new RuntimeException(exception); + } + } + if (password == null && hadoopCredentialProviderAvailable) { + password = + getPasswordFromCredentialProvider( + hadoopConfiguration, confMap.get(JdbcConnectionParams.SSL_STORE_PASSWORD_PATH), key); + } + return password; + } } diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLResultSet.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLResultSet.java index 70c8ff4fe57..32f1c88bc9f 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLResultSet.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/adapter/SQLResultSet.java @@ -22,7 +22,6 @@ import java.math.BigDecimal; import java.net.URL; import java.sql.*; -import java.util.Calendar; import java.util.Map; @SuppressWarnings("deprecation") @@ -436,36 +435,6 @@ default Array getArray(String columnLabel) throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); } - @Override - default Date getDate(int columnIndex, Calendar cal) throws SQLException { - throw new SQLFeatureNotSupportedException("Method not supported"); - } - - @Override - default Date getDate(String columnLabel, Calendar cal) throws SQLException { - throw new SQLFeatureNotSupportedException("Method not supported"); - } - - @Override - default Time getTime(int columnIndex, Calendar cal) throws SQLException { - throw new SQLFeatureNotSupportedException("Method not supported"); - } - - @Override - default Time getTime(String columnLabel, Calendar cal) throws SQLException { - throw new SQLFeatureNotSupportedException("Method not supported"); - } - - @Override - default Timestamp getTimestamp(int columnIndex, Calendar cal) throws SQLException { - throw new SQLFeatureNotSupportedException("Method not supported"); - } - - @Override - default Timestamp getTimestamp(String columnLabel, Calendar cal) throws SQLException { - throw new SQLFeatureNotSupportedException("Method not supported"); - } - @Override default URL getURL(int columnIndex) throws SQLException { throw new SQLFeatureNotSupportedException("Method not supported"); diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowColumnarBatchRow.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowColumnarBatchRow.java index 373867069b4..15bcb7bf872 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowColumnarBatchRow.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowColumnarBatchRow.java @@ -21,10 +21,10 @@ import java.sql.Timestamp; import java.time.LocalDateTime; import org.apache.arrow.vector.util.DateUtility; -import org.apache.hive.service.rpc.thrift.TTypeId; import org.apache.kyuubi.jdbc.hive.common.DateUtils; import org.apache.kyuubi.jdbc.hive.common.HiveIntervalDayTime; import org.apache.kyuubi.jdbc.hive.common.HiveIntervalYearMonth; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId; public class ArrowColumnarBatchRow { public int rowId; diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowUtils.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowUtils.java index 9a777d4c240..835df35fe70 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowUtils.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/arrow/ArrowUtils.java @@ -30,8 +30,8 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.hive.service.rpc.thrift.TTypeId; import org.apache.kyuubi.jdbc.hive.JdbcColumnAttributes; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId; public class ArrowUtils { diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/KerberosAuthentication.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/KerberosAuthentication.java index a7683523f49..a137fbb9946 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/KerberosAuthentication.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/KerberosAuthentication.java @@ -37,6 +37,7 @@ import javax.security.auth.login.Configuration; import javax.security.auth.login.LoginContext; import javax.security.auth.login.LoginException; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,8 +49,8 @@ public class KerberosAuthentication { private KerberosPrincipal principal = null; private final Configuration configuration; - KerberosAuthentication() { - this.configuration = createLoginFromTgtCacheConfiguration(); + KerberosAuthentication(String ticketCache) { + this.configuration = createLoginFromTgtCacheConfiguration(ticketCache); } KerberosAuthentication(String principal, String keytabLocation) { @@ -96,14 +97,16 @@ private static KerberosPrincipal createKerberosPrincipal(String principal) { } } - private static Configuration createLoginFromTgtCacheConfiguration() { + private static Configuration createLoginFromTgtCacheConfiguration(String ticketCache) { ImmutableMap.Builder optionsBuilder = ImmutableMap.builder() .put("useTicketCache", "true") .put("renewTGT", "true"); - String ticketCache = System.getenv("KRB5CCNAME"); - if (ticketCache != null) { + if (StringUtils.isBlank(ticketCache)) { + ticketCache = System.getenv("KRB5CCNAME"); + } + if (StringUtils.isNotBlank(ticketCache)) { optionsBuilder.put("ticketCache", ticketCache); } return createConfiguration(optionsBuilder); diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/KerberosAuthenticationManager.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/KerberosAuthenticationManager.java index 92927985fde..3df9aa8366d 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/KerberosAuthenticationManager.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/KerberosAuthenticationManager.java @@ -27,9 +27,10 @@ public class KerberosAuthenticationManager { private static final Map KEYTAB_AUTHENTICATION_CACHE = new ConcurrentHashMap<>(); - public static synchronized CachingKerberosAuthentication getTgtCacheAuthentication() { + public static synchronized CachingKerberosAuthentication getTgtCacheAuthentication( + String ticketCache) { if (GLOBAL_TGT_CACHE_AUTHENTICATION == null) { - KerberosAuthentication tgtCacheAuth = new KerberosAuthentication(); + KerberosAuthentication tgtCacheAuth = new KerberosAuthentication(ticketCache); GLOBAL_TGT_CACHE_AUTHENTICATION = new CachingKerberosAuthentication(tgtCacheAuth); } return GLOBAL_TGT_CACHE_AUTHENTICATION; diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/KerberosSaslHelper.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/KerberosSaslHelper.java index 67ac6e1663e..e3fb6729365 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/KerberosSaslHelper.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/KerberosSaslHelper.java @@ -20,8 +20,8 @@ import java.util.Map; import javax.security.auth.Subject; import javax.security.sasl.SaslException; -import org.apache.thrift.transport.TSaslClientTransport; -import org.apache.thrift.transport.TTransport; +import org.apache.kyuubi.shaded.thrift.transport.TSaslClientTransport; +import org.apache.kyuubi.shaded.thrift.transport.TTransport; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/PlainSaslHelper.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/PlainSaslHelper.java index 62b4898e24c..43272e48da5 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/PlainSaslHelper.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/PlainSaslHelper.java @@ -20,8 +20,8 @@ import java.util.HashMap; import javax.security.auth.callback.*; import javax.security.sasl.SaslException; -import org.apache.thrift.transport.TSaslClientTransport; -import org.apache.thrift.transport.TTransport; +import org.apache.kyuubi.shaded.thrift.transport.TSaslClientTransport; +import org.apache.kyuubi.shaded.thrift.transport.TTransport; public final class PlainSaslHelper { diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/TFilterTransport.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/TFilterTransport.java index 1c7da82fe1e..6d462717b89 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/TFilterTransport.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/TFilterTransport.java @@ -17,8 +17,8 @@ package org.apache.kyuubi.jdbc.hive.auth; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportException; +import org.apache.kyuubi.shaded.thrift.transport.TTransport; +import org.apache.kyuubi.shaded.thrift.transport.TTransportException; /** * Transport that simply wraps another transport. This is the equivalent of FilterInputStream for diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/TSubjectTransport.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/TSubjectTransport.java index c0785aeed67..be7e581eefe 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/TSubjectTransport.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/TSubjectTransport.java @@ -20,8 +20,8 @@ import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; import javax.security.auth.Subject; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportException; +import org.apache.kyuubi.shaded.thrift.transport.TTransport; +import org.apache.kyuubi.shaded.thrift.transport.TTransportException; /** * This is used on the client side, where the API explicitly opens transport to the server using the diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/ThriftUtils.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/ThriftUtils.java index 24f2bf53abd..b76401d50ce 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/ThriftUtils.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/auth/ThriftUtils.java @@ -19,10 +19,10 @@ import javax.net.ssl.SSLParameters; import javax.net.ssl.SSLSocket; -import org.apache.thrift.transport.TSSLTransportFactory; -import org.apache.thrift.transport.TSocket; -import org.apache.thrift.transport.TTransport; -import org.apache.thrift.transport.TTransportException; +import org.apache.kyuubi.shaded.thrift.transport.TSSLTransportFactory; +import org.apache.kyuubi.shaded.thrift.transport.TSocket; +import org.apache.kyuubi.shaded.thrift.transport.TTransport; +import org.apache.kyuubi.shaded.thrift.transport.TTransportException; /** * This class helps in some aspects of authentication. It creates the proper Thrift classes for the diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnBasedSet.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnBasedSet.java index 675f4b92d2c..c775f0491ca 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnBasedSet.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnBasedSet.java @@ -21,12 +21,12 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import org.apache.hive.service.rpc.thrift.TColumn; -import org.apache.hive.service.rpc.thrift.TRowSet; -import org.apache.thrift.TException; -import org.apache.thrift.protocol.TCompactProtocol; -import org.apache.thrift.protocol.TProtocol; -import org.apache.thrift.transport.TIOStreamTransport; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TColumn; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TRowSet; +import org.apache.kyuubi.shaded.thrift.TException; +import org.apache.kyuubi.shaded.thrift.protocol.TCompactProtocol; +import org.apache.kyuubi.shaded.thrift.protocol.TProtocol; +import org.apache.kyuubi.shaded.thrift.transport.TIOStreamTransport; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnBuffer.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnBuffer.java index bd5124f9524..0d1bd445d44 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnBuffer.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnBuffer.java @@ -23,8 +23,8 @@ import java.util.Arrays; import java.util.BitSet; import java.util.List; -import org.apache.hive.service.rpc.thrift.TColumn; -import org.apache.hive.service.rpc.thrift.TTypeId; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TColumn; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId; /** ColumnBuffer */ public class ColumnBuffer extends AbstractList { diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnValue.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnValue.java index 291b791c81d..2efe35618fa 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnValue.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/ColumnValue.java @@ -17,7 +17,7 @@ package org.apache.kyuubi.jdbc.hive.cli; -import org.apache.hive.service.rpc.thrift.*; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.*; /** Protocols before HIVE_CLI_SERVICE_PROTOCOL_V6 (used by RowBasedSet) */ public class ColumnValue { diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/RowBasedSet.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/RowBasedSet.java index 3e9c48428c5..8eacedd11c6 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/RowBasedSet.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/RowBasedSet.java @@ -20,9 +20,9 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import org.apache.hive.service.rpc.thrift.TColumnValue; -import org.apache.hive.service.rpc.thrift.TRow; -import org.apache.hive.service.rpc.thrift.TRowSet; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TColumnValue; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TRow; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TRowSet; /** RowBasedSet */ public class RowBasedSet implements RowSet { diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/RowSetFactory.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/RowSetFactory.java index 48b7e4ad894..0b3afbf35c6 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/RowSetFactory.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/cli/RowSetFactory.java @@ -17,11 +17,11 @@ package org.apache.kyuubi.jdbc.hive.cli; -import static org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6; +import static org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6; -import org.apache.hive.service.rpc.thrift.TProtocolVersion; -import org.apache.hive.service.rpc.thrift.TRowSet; -import org.apache.thrift.TException; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TRowSet; +import org.apache.kyuubi.shaded.thrift.TException; public class RowSetFactory { diff --git a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/logs/InPlaceUpdateStream.java b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/logs/InPlaceUpdateStream.java index 8ca106e2146..266d88c26b1 100644 --- a/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/logs/InPlaceUpdateStream.java +++ b/kyuubi-hive-jdbc/src/main/java/org/apache/kyuubi/jdbc/hive/logs/InPlaceUpdateStream.java @@ -17,7 +17,7 @@ package org.apache.kyuubi.jdbc.hive.logs; -import org.apache.hive.service.rpc.thrift.TProgressUpdateResp; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProgressUpdateResp; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/TestKyuubiPreparedStatement.java b/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/TestKyuubiPreparedStatement.java index 522c209e7aa..df0c2d84b78 100644 --- a/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/TestKyuubiPreparedStatement.java +++ b/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/TestKyuubiPreparedStatement.java @@ -23,8 +23,8 @@ import static org.mockito.Mockito.when; import java.sql.SQLException; -import org.apache.hive.service.rpc.thrift.*; -import org.apache.hive.service.rpc.thrift.TCLIService.Iface; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.*; +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TCLIService.Iface; import org.junit.Before; import org.junit.Test; import org.mockito.ArgumentCaptor; diff --git a/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/UtilsTest.java b/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/UtilsTest.java index b01957b3e43..fc4a55d9ff2 100644 --- a/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/UtilsTest.java +++ b/kyuubi-hive-jdbc/src/test/java/org/apache/kyuubi/jdbc/hive/UtilsTest.java @@ -111,6 +111,17 @@ public static Collection data() throws UnsupportedEncodingException { StandardCharsets.UTF_8.toString()) .replaceAll("\\+", "%20") + "#k4=v4" + }, + { + "hostname", + "10018", + "catalog", + "db", + new ImmutableMap.Builder() + .put("k2", "v2") + .put("k3", "hostname:10018") + .build(), + "jdbc:hive2://hostname:10018/catalog/db;k1=v1?k2=v2;k3=hostname:10018" } }); } diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/AdminRestApi.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/AdminRestApi.java index 904ecb6c9d6..3e59d0c5b67 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/AdminRestApi.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/AdminRestApi.java @@ -70,13 +70,12 @@ public String deleteEngine( } public List listEngines( - String engineType, String shareLevel, String subdomain, String hs2ProxyUser, String all) { + String engineType, String shareLevel, String subdomain, String hs2ProxyUser) { Map params = new HashMap<>(); params.put("type", engineType); params.put("sharelevel", shareLevel); params.put("subdomain", subdomain); params.put("hive.server2.proxy.user", hs2ProxyUser); - params.put("all", all); Engine[] result = this.getClient() .get(API_BASE_PATH + "/engine", params, Engine[].class, client.getAuthHeader()); diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/BatchRestApi.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/BatchRestApi.java index 7d113308df1..e6f9577b345 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/BatchRestApi.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/BatchRestApi.java @@ -23,8 +23,11 @@ import org.apache.kyuubi.client.api.v1.dto.*; import org.apache.kyuubi.client.util.JsonUtils; import org.apache.kyuubi.client.util.VersionUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class BatchRestApi { + static final Logger LOG = LoggerFactory.getLogger(BatchRestApi.class); private KyuubiRestClient client; @@ -101,7 +104,15 @@ public OperationLog getBatchLocalLog(String batchId, int from, int size) { return this.getClient().get(path, params, OperationLog.class, client.getAuthHeader()); } + /** + * hs2ProxyUser for delete batch is deprecated since 1.8.1, please use {@link + * #deleteBatch(String)} instead. + */ + @Deprecated public CloseBatchResponse deleteBatch(String batchId, String hs2ProxyUser) { + LOG.warn( + "The method `deleteBatch(batchId, hs2ProxyUser)` is deprecated since 1.8.1, " + + "using `deleteBatch(batchId)` instead."); Map params = new HashMap<>(); params.put("hive.server2.proxy.user", hs2ProxyUser); @@ -109,6 +120,11 @@ public CloseBatchResponse deleteBatch(String batchId, String hs2ProxyUser) { return this.getClient().delete(path, params, CloseBatchResponse.class, client.getAuthHeader()); } + public CloseBatchResponse deleteBatch(String batchId) { + String path = String.format("%s/%s", API_BASE_PATH, batchId); + return this.getClient().delete(path, null, CloseBatchResponse.class, client.getAuthHeader()); + } + private IRestClient getClient() { return this.client.getHttpClient(); } diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/KyuubiOperationEvent.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/KyuubiOperationEvent.java index 13c40eecf78..ec583954216 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/KyuubiOperationEvent.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/KyuubiOperationEvent.java @@ -51,6 +51,8 @@ public class KyuubiOperationEvent { private Map metrics; + private OperationProgress progress; + public KyuubiOperationEvent() {} public KyuubiOperationEvent( @@ -68,7 +70,8 @@ public KyuubiOperationEvent( String sessionUser, String sessionType, String kyuubiInstance, - Map metrics) { + Map metrics, + OperationProgress progress) { this.statementId = statementId; this.remoteId = remoteId; this.statement = statement; @@ -84,6 +87,7 @@ public KyuubiOperationEvent( this.sessionType = sessionType; this.kyuubiInstance = kyuubiInstance; this.metrics = metrics; + this.progress = progress; } public static KyuubiOperationEvent.KyuubiOperationEventBuilder builder() { @@ -121,6 +125,8 @@ public static class KyuubiOperationEventBuilder { private Map metrics; + private OperationProgress progress; + public KyuubiOperationEventBuilder() {} public KyuubiOperationEvent.KyuubiOperationEventBuilder statementId(final String statementId) { @@ -201,6 +207,12 @@ public KyuubiOperationEvent.KyuubiOperationEventBuilder metrics( return this; } + public KyuubiOperationEvent.KyuubiOperationEventBuilder progress( + final OperationProgress progress) { + this.progress = progress; + return this; + } + public KyuubiOperationEvent build() { return new KyuubiOperationEvent( statementId, @@ -217,7 +229,8 @@ public KyuubiOperationEvent build() { sessionUser, sessionType, kyuubiInstance, - metrics); + metrics, + progress); } } @@ -340,4 +353,12 @@ public Map getMetrics() { public void setMetrics(Map metrics) { this.metrics = metrics; } + + public OperationProgress getProgress() { + return progress; + } + + public void setProgress(OperationProgress progress) { + this.progress = progress; + } } diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/OperationData.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/OperationData.java index 70c2dd3f3a1..8e5bafc6e28 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/OperationData.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/OperationData.java @@ -25,6 +25,7 @@ public class OperationData { private String identifier; + private String remoteId; private String statement; private String state; private Long createTime; @@ -36,11 +37,13 @@ public class OperationData { private String sessionType; private String kyuubiInstance; private Map metrics; + private OperationProgress progress; public OperationData() {} public OperationData( String identifier, + String remoteId, String statement, String state, Long createTime, @@ -51,8 +54,10 @@ public OperationData( String sessionUser, String sessionType, String kyuubiInstance, - Map metrics) { + Map metrics, + OperationProgress progress) { this.identifier = identifier; + this.remoteId = remoteId; this.statement = statement; this.state = state; this.createTime = createTime; @@ -64,6 +69,7 @@ public OperationData( this.sessionType = sessionType; this.kyuubiInstance = kyuubiInstance; this.metrics = metrics; + this.progress = progress; } public String getIdentifier() { @@ -74,6 +80,14 @@ public void setIdentifier(String identifier) { this.identifier = identifier; } + public String getRemoteId() { + return remoteId; + } + + public void setRemoteId(String remoteId) { + this.remoteId = remoteId; + } + public String getStatement() { return statement; } @@ -165,6 +179,14 @@ public void setMetrics(Map metrics) { this.metrics = metrics; } + public OperationProgress getProgress() { + return progress; + } + + public void setProgress(OperationProgress progress) { + this.progress = progress; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/OperationProgress.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/OperationProgress.java new file mode 100644 index 00000000000..8668f2f30f7 --- /dev/null +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/OperationProgress.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.client.api.v1.dto; + +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import org.apache.commons.lang3.builder.ReflectionToStringBuilder; +import org.apache.commons.lang3.builder.ToStringStyle; + +public class OperationProgress { + private List headerNames; + private List> rows; + private double progressedPercentage; + private String status; + private String footerSummary; + private long startTime; + + public OperationProgress() {} + + public OperationProgress( + List headerNames, + List> rows, + double progressedPercentage, + String status, + String footerSummary, + long startTime) { + this.headerNames = headerNames; + this.rows = rows; + this.progressedPercentage = progressedPercentage; + this.status = status; + this.footerSummary = footerSummary; + this.startTime = startTime; + } + + public List getHeaderNames() { + if (headerNames == null) { + return Collections.emptyList(); + } + return headerNames; + } + + public void setHeaderNames(List headerNames) { + this.headerNames = headerNames; + } + + public List> getRows() { + if (rows == null) { + return Collections.emptyList(); + } + return rows; + } + + public void setRows(List> rows) { + this.rows = rows; + } + + public double getProgressedPercentage() { + return progressedPercentage; + } + + public void setProgressedPercentage(double progressedPercentage) { + this.progressedPercentage = progressedPercentage; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getFooterSummary() { + return footerSummary; + } + + public void setFooterSummary(String footerSummary) { + this.footerSummary = footerSummary; + } + + public long getStartTime() { + return startTime; + } + + public void setStartTime(long startTime) { + this.startTime = startTime; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + OperationProgress that = (OperationProgress) o; + return Double.compare(getProgressedPercentage(), that.getProgressedPercentage()) == 0 + && getStartTime() == that.getStartTime() + && Objects.equals(getHeaderNames(), that.getHeaderNames()) + && Objects.equals(getRows(), that.getRows()) + && Objects.equals(getStatus(), that.getStatus()) + && Objects.equals(getFooterSummary(), that.getFooterSummary()); + } + + @Override + public int hashCode() { + return Objects.hash( + getHeaderNames(), + getRows(), + getProgressedPercentage(), + getStatus(), + getFooterSummary(), + getStartTime()); + } + + @Override + public String toString() { + return ReflectionToStringBuilder.toString(this, ToStringStyle.JSON_STYLE); + } +} diff --git a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/SessionData.java b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/SessionData.java index ae7dfdec984..30a4eb51540 100644 --- a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/SessionData.java +++ b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/api/v1/dto/SessionData.java @@ -25,6 +25,7 @@ public class SessionData { private String identifier; + private String remoteId; private String user; private String ipAddr; private Map conf; @@ -40,6 +41,7 @@ public SessionData() {} public SessionData( String identifier, + String remoteId, String user, String ipAddr, Map conf, @@ -51,6 +53,7 @@ public SessionData( String kyuubiInstance, String engineId) { this.identifier = identifier; + this.remoteId = remoteId; this.user = user; this.ipAddr = ipAddr; this.conf = conf; @@ -71,6 +74,14 @@ public void setIdentifier(String identifier) { this.identifier = identifier; } + public String getRemoteId() { + return remoteId; + } + + public void setRemoteId(String remoteId) { + this.remoteId = remoteId; + } + public String getUser() { return user; } diff --git a/kyuubi-rest-client/src/test/java/org/apache/kyuubi/client/BatchRestClientTest.java b/kyuubi-rest-client/src/test/java/org/apache/kyuubi/client/BatchRestClientTest.java index 80fb1c4b95f..9715460ca5f 100644 --- a/kyuubi-rest-client/src/test/java/org/apache/kyuubi/client/BatchRestClientTest.java +++ b/kyuubi-rest-client/src/test/java/org/apache/kyuubi/client/BatchRestClientTest.java @@ -267,13 +267,13 @@ public void getOperationLogTest() { public void deleteBatchTest() { // test spnego auth BatchTestServlet.setAuthSchema(NEGOTIATE_AUTH); - CloseBatchResponse response = spnegoBatchRestApi.deleteBatch("71535", "b_test"); + CloseBatchResponse response = spnegoBatchRestApi.deleteBatch("71535"); assertTrue(response.isSuccess()); // test basic auth BatchTestServlet.setAuthSchema(BASIC_AUTH); BatchTestServlet.allowAnonymous(false); - response = basicBatchRestApi.deleteBatch("71535", "b_test"); + response = basicBatchRestApi.deleteBatch("71535"); assertTrue(response.isSuccess()); } } diff --git a/kyuubi-server/pom.xml b/kyuubi-server/pom.xml index 56155a27bec..17fd851d2ec 100644 --- a/kyuubi-server/pom.xml +++ b/kyuubi-server/pom.xml @@ -187,6 +187,21 @@ + + org.apache.thrift + libfb303 + + + + org.apache.thrift + libthrift + + + + org.apache.hive + hive-service-rpc + + commons-lang commons-lang diff --git a/kyuubi-server/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenIdentifier b/kyuubi-server/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenIdentifier index 3b1f088f90a..65e2965c025 100644 --- a/kyuubi-server/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenIdentifier +++ b/kyuubi-server/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenIdentifier @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/kyuubi-server/src/main/resources/META-INF/services/org.apache.kyuubi.credentials.HadoopDelegationTokenProvider b/kyuubi-server/src/main/resources/META-INF/services/org.apache.kyuubi.credentials.HadoopDelegationTokenProvider index 1d931c8c79d..95d6e1987fa 100644 --- a/kyuubi-server/src/main/resources/META-INF/services/org.apache.kyuubi.credentials.HadoopDelegationTokenProvider +++ b/kyuubi-server/src/main/resources/META-INF/services/org.apache.kyuubi.credentials.HadoopDelegationTokenProvider @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, diff --git a/kyuubi-server/src/main/resources/META-INF/services/org.apache.kyuubi.engine.ApplicationOperation b/kyuubi-server/src/main/resources/META-INF/services/org.apache.kyuubi.engine.ApplicationOperation index 712bd8f2e2f..b6df64bd9ba 100644 --- a/kyuubi-server/src/main/resources/META-INF/services/org.apache.kyuubi.engine.ApplicationOperation +++ b/kyuubi-server/src/main/resources/META-INF/services/org.apache.kyuubi.engine.ApplicationOperation @@ -6,7 +6,7 @@ # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -15,6 +15,6 @@ # limitations under the License. # -org.apache.kyuubi.engine.YarnApplicationOperation org.apache.kyuubi.engine.JpsApplicationOperation org.apache.kyuubi.engine.KubernetesApplicationOperation +org.apache.kyuubi.engine.YarnApplicationOperation diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/client/KyuubiSyncThriftClient.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/client/KyuubiSyncThriftClient.scala index ad7191c090c..0dc6692da43 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/client/KyuubiSyncThriftClient.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/client/KyuubiSyncThriftClient.scala @@ -26,9 +26,6 @@ import scala.concurrent.ExecutionException import scala.concurrent.duration.Duration import com.google.common.annotations.VisibleForTesting -import org.apache.hive.service.rpc.thrift._ -import org.apache.thrift.protocol.{TBinaryProtocol, TProtocol} -import org.apache.thrift.transport.TSocket import org.apache.kyuubi.{KyuubiSQLException, Logging, Utils} import org.apache.kyuubi.config.KyuubiConf @@ -38,7 +35,11 @@ import org.apache.kyuubi.operation.FetchOrientation import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.service.authentication.PlainSASLHelper import org.apache.kyuubi.session.SessionHandle +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ +import org.apache.kyuubi.shaded.thrift.protocol.{TBinaryProtocol, TProtocol} +import org.apache.kyuubi.shaded.thrift.transport.TSocket import org.apache.kyuubi.util.{ThreadUtils, ThriftUtils} +import org.apache.kyuubi.util.ThreadUtils.scheduleTolerableRunnableWithFixedDelay class KyuubiSyncThriftClient private ( protocol: TProtocol, @@ -98,10 +99,11 @@ class KyuubiSyncThriftClient private ( remoteEngineBroken = false } catch { case e: Throwable => - warn(s"The engine[$engineId] alive probe fails", e) + val engineIdStr = engineId.getOrElse("") + warn(s"The engine[$engineIdStr] alive probe fails", e) val now = System.currentTimeMillis() if (now - engineLastAlive > engineAliveTimeout) { - error(s"Mark the engine[$engineId] not alive with no recent alive probe" + + error(s"Mark the engine[$engineIdStr] not alive with no recent alive probe" + s" success: ${now - engineLastAlive} ms exceeds timeout $engineAliveTimeout ms") remoteEngineBroken = true } @@ -125,7 +127,8 @@ class KyuubiSyncThriftClient private ( } } engineLastAlive = System.currentTimeMillis() - engineAliveThreadPool.scheduleWithFixedDelay( + scheduleTolerableRunnableWithFixedDelay( + engineAliveThreadPool, task, engineAliveProbeInterval, engineAliveProbeInterval, diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/credentials/HadoopCredentialsManager.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/credentials/HadoopCredentialsManager.scala index b51255b716f..92b201718b4 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/credentials/HadoopCredentialsManager.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/credentials/HadoopCredentialsManager.scala @@ -33,6 +33,7 @@ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.service.AbstractService import org.apache.kyuubi.util.{KyuubiHadoopUtils, ThreadUtils} +import org.apache.kyuubi.util.ThreadUtils.scheduleTolerableRunnableWithFixedDelay import org.apache.kyuubi.util.reflect.ReflectUtils._ /** @@ -107,7 +108,7 @@ class HadoopCredentialsManager private (name: String) extends AbstractService(na s" Check your configuration to see if security is disabled or not." + s" If security is enabled, some configurations of ${provider.serviceName} " + s" might be missing, please check the configurations in " + - s" https://kyuubi.readthedocs.io/en/latest/security" + + s" https://kyuubi.readthedocs.io/en/master/security" + s"/hadoop_credentials_manager.html#required-security-configs") provider.close() } @@ -299,7 +300,8 @@ class HadoopCredentialsManager private (name: String) extends AbstractService(na } credentialsTimeoutChecker.foreach { executor => - executor.scheduleWithFixedDelay( + scheduleTolerableRunnableWithFixedDelay( + executor, checkTask, credentialsCheckInterval, credentialsCheckInterval, diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala index 6122a6f138f..2bd8554036e 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala @@ -307,6 +307,19 @@ private[kyuubi] class EngineRef( } } + /** + * Deregister the engine from engine space with the given host and port on connection failure. + * + * @param discoveryClient the zookeeper client to get or create engine instance + * @param hostPort the existing engine host and port + */ + def deregister(discoveryClient: DiscoveryClient, hostPort: (String, Int)): Unit = + tryWithLock(discoveryClient) { + if (discoveryClient.getServerHost(engineSpace) == Option(hostPort)) { + discoveryClient.delete(engineSpace) + } + } + def close(): Unit = { if (shareLevel == CONNECTION && builder != null) { try { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationAuditLogger.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationAuditLogger.scala index 731b9d7b5ba..565c8a694e5 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationAuditLogger.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationAuditLogger.scala @@ -17,25 +17,40 @@ package org.apache.kyuubi.engine +import scala.collection.JavaConverters._ + import io.fabric8.kubernetes.api.model.Pod import org.apache.kyuubi.Logging -import org.apache.kyuubi.engine.KubernetesApplicationOperation.{toApplicationState, LABEL_KYUUBI_UNIQUE_KEY, SPARK_APP_ID_LABEL} +import org.apache.kyuubi.config.KyuubiConf.KubernetesApplicationStateSource.KubernetesApplicationStateSource +import org.apache.kyuubi.engine.KubernetesApplicationOperation.{toApplicationStateAndError, LABEL_KYUUBI_UNIQUE_KEY, SPARK_APP_ID_LABEL} object KubernetesApplicationAuditLogger extends Logging { final private val AUDIT_BUFFER = new ThreadLocal[StringBuilder]() { override protected def initialValue: StringBuilder = new StringBuilder() } - def audit(kubernetesInfo: KubernetesInfo, pod: Pod): Unit = { + def audit( + kubernetesInfo: KubernetesInfo, + pod: Pod, + appStateSource: KubernetesApplicationStateSource, + appStateContainer: String): Unit = { val sb = AUDIT_BUFFER.get() sb.setLength(0) sb.append(s"label=${pod.getMetadata.getLabels.get(LABEL_KYUUBI_UNIQUE_KEY)}").append("\t") sb.append(s"context=${kubernetesInfo.context.orNull}").append("\t") sb.append(s"namespace=${kubernetesInfo.namespace.orNull}").append("\t") sb.append(s"pod=${pod.getMetadata.getName}").append("\t") + sb.append(s"podState=${pod.getStatus.getPhase}").append("\t") + val containerStatuses = pod.getStatus.getContainerStatuses.asScala.map { containerState => + s"${containerState.getName}->${containerState.getState}" + }.mkString("[", ",", "]") + sb.append(s"containers=$containerStatuses").append("\t") sb.append(s"appId=${pod.getMetadata.getLabels.get(SPARK_APP_ID_LABEL)}").append("\t") - sb.append(s"appState=${toApplicationState(pod.getStatus.getPhase)}") + val (appState, appError) = + toApplicationStateAndError(pod, appStateSource, appStateContainer) + sb.append(s"appState=$appState").append("\t") + sb.append(s"appError='${appError.getOrElse("")}'") info(sb.toString()) } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala index 16a0c29d149..6afe3257be9 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala @@ -18,22 +18,26 @@ package org.apache.kyuubi.engine import java.util.Locale -import java.util.concurrent.{ConcurrentHashMap, TimeUnit} +import java.util.concurrent.{ConcurrentHashMap, ScheduledExecutorService, TimeUnit} import scala.collection.JavaConverters._ +import scala.util.control.NonFatal import com.google.common.cache.{Cache, CacheBuilder, RemovalNotification} -import io.fabric8.kubernetes.api.model.Pod +import io.fabric8.kubernetes.api.model.{ContainerState, Pod} import io.fabric8.kubernetes.client.KubernetesClient import io.fabric8.kubernetes.client.informers.{ResourceEventHandler, SharedIndexInformer} import org.apache.kyuubi.{KyuubiException, Logging, Utils} import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.config.KyuubiConf.{KubernetesApplicationStateSource, KubernetesCleanupDriverPodStrategy} +import org.apache.kyuubi.config.KyuubiConf.KubernetesApplicationStateSource.KubernetesApplicationStateSource +import org.apache.kyuubi.config.KyuubiConf.KubernetesCleanupDriverPodStrategy.{ALL, COMPLETED, NONE} import org.apache.kyuubi.engine.ApplicationState.{isTerminated, ApplicationState, FAILED, FINISHED, NOT_FOUND, PENDING, RUNNING, UNKNOWN} -import org.apache.kyuubi.engine.KubernetesApplicationOperation.{toApplicationState, toLabel, LABEL_KYUUBI_UNIQUE_KEY, SPARK_APP_ID_LABEL} -import org.apache.kyuubi.util.KubernetesUtils +import org.apache.kyuubi.util.{KubernetesUtils, ThreadUtils} class KubernetesApplicationOperation extends ApplicationOperation with Logging { + import KubernetesApplicationOperation._ private val kubernetesClients: ConcurrentHashMap[KubernetesInfo, KubernetesClient] = new ConcurrentHashMap[KubernetesInfo, KubernetesClient] @@ -48,12 +52,20 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging { private def allowedNamespaces: Set[String] = kyuubiConf.get(KyuubiConf.KUBERNETES_NAMESPACE_ALLOW_LIST) + private def appStateSource: KubernetesApplicationStateSource = + KubernetesApplicationStateSource.withName( + kyuubiConf.get(KyuubiConf.KUBERNETES_APPLICATION_STATE_SOURCE)) + private def appStateContainer: String = + kyuubiConf.get(KyuubiConf.KUBERNETES_APPLICATION_STATE_CONTAINER) + // key is kyuubi_unique_key - private val appInfoStore: ConcurrentHashMap[String, ApplicationInfo] = - new ConcurrentHashMap[String, ApplicationInfo] + private val appInfoStore: ConcurrentHashMap[String, (KubernetesInfo, ApplicationInfo)] = + new ConcurrentHashMap[String, (KubernetesInfo, ApplicationInfo)] // key is kyuubi_unique_key private var cleanupTerminatedAppInfoTrigger: Cache[String, ApplicationState] = _ + private var expireCleanUpTriggerCacheExecutor: ScheduledExecutorService = _ + private def getOrCreateKubernetesClient(kubernetesInfo: KubernetesInfo): KubernetesClient = { checkKubernetesInfo(kubernetesInfo) kubernetesClients.computeIfAbsent(kubernetesInfo, kInfo => buildKubernetesClient(kInfo)) @@ -98,15 +110,65 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging { submitTimeout = conf.get(KyuubiConf.ENGINE_KUBERNETES_SUBMIT_TIMEOUT) // Defer cleaning terminated application information val retainPeriod = conf.get(KyuubiConf.KUBERNETES_TERMINATED_APPLICATION_RETAIN_PERIOD) + val cleanupDriverPodStrategy = KubernetesCleanupDriverPodStrategy.withName( + conf.get(KyuubiConf.KUBERNETES_SPARK_CLEANUP_TERMINATED_DRIVER_POD_KIND)) + val cleanupDriverPodCheckInterval = conf.get( + KyuubiConf.KUBERNETES_SPARK_CLEANUP_TERMINATED_DRIVER_POD_KIND_CHECK_INTERVAL) cleanupTerminatedAppInfoTrigger = CacheBuilder.newBuilder() .expireAfterWrite(retainPeriod, TimeUnit.MILLISECONDS) .removalListener((notification: RemovalNotification[String, ApplicationState]) => { - Option(appInfoStore.remove(notification.getKey)).foreach { removed => - info(s"Remove terminated application ${removed.id} with " + - s"[${toLabel(notification.getKey)}, state: ${removed.state}]") + Option(appInfoStore.remove(notification.getKey)).foreach { case (kubernetesInfo, removed) => + val appLabel = notification.getKey + val shouldDelete = cleanupDriverPodStrategy match { + case NONE => false + case ALL => true + case COMPLETED => !ApplicationState.isFailed(notification.getValue) + } + if (shouldDelete) { + val podName = removed.name + try { + val kubernetesClient = getOrCreateKubernetesClient(kubernetesInfo) + val deleted = if (podName == null) { + !kubernetesClient.pods() + .withLabel(LABEL_KYUUBI_UNIQUE_KEY, appLabel) + .delete().isEmpty + } else { + !kubernetesClient.pods().withName(podName).delete().isEmpty + } + if (deleted) { + info(s"[$kubernetesInfo] Operation of delete pod $podName with" + + s" ${toLabel(appLabel)} is completed.") + } else { + warn(s"[$kubernetesInfo] Failed to delete pod $podName with ${toLabel(appLabel)}.") + } + } catch { + case NonFatal(e) => error( + s"[$kubernetesInfo] Failed to delete pod $podName with ${toLabel(appLabel)}", + e) + } + } + info(s"Remove terminated application $removed with ${toLabel(appLabel)}") } }) .build() + expireCleanUpTriggerCacheExecutor = ThreadUtils.newDaemonSingleThreadScheduledExecutor( + "pod-cleanup-trigger-thread") + ThreadUtils.scheduleTolerableRunnableWithFixedDelay( + expireCleanUpTriggerCacheExecutor, + () => { + try { + cleanupTerminatedAppInfoTrigger.asMap().asScala.foreach { + case (key, _) => + // do get to trigger cache eviction + cleanupTerminatedAppInfoTrigger.getIfPresent(key) + } + } catch { + case NonFatal(e) => error("Failed to evict clean up terminated app cache", e) + } + }, + cleanupDriverPodCheckInterval, + cleanupDriverPodCheckInterval, + TimeUnit.MILLISECONDS) } override def isSupported(appMgrInfo: ApplicationManagerInfo): Boolean = { @@ -127,7 +189,7 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging { debug(s"[$kubernetesInfo] Deleting application[${toLabel(tag)}]'s info from Kubernetes cluster") try { Option(appInfoStore.get(tag)) match { - case Some(info) => + case Some((_, info)) => debug(s"Application[${toLabel(tag)}] is in ${info.state} state") info.state match { case NOT_FOUND | FAILED | UNKNOWN => @@ -167,7 +229,8 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging { try { // need to initialize the kubernetes client if not exists getOrCreateKubernetesClient(appMgrInfo.kubernetesInfo) - val appInfo = appInfoStore.getOrDefault(tag, ApplicationInfo.NOT_FOUND) + val (_, appInfo) = + appInfoStore.getOrDefault(tag, appMgrInfo.kubernetesInfo -> ApplicationInfo.NOT_FOUND) (appInfo.state, submitTime) match { // Kyuubi should wait second if pod is not be created case (NOT_FOUND, Some(_submitTime)) => @@ -200,15 +263,15 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging { } enginePodInformers.clear() - kubernetesClients.asScala.foreach { case (_, client) => - Utils.tryLogNonFatalError(client.close()) - } - kubernetesClients.clear() - if (cleanupTerminatedAppInfoTrigger != null) { cleanupTerminatedAppInfoTrigger.cleanUp() cleanupTerminatedAppInfoTrigger = null } + + kubernetesClients.asScala.foreach { case (_, client) => + Utils.tryLogNonFatalError(client.close()) + } + kubernetesClients.clear() } private class SparkEnginePodEventHandler(kubernetesInfo: KubernetesInfo) @@ -216,27 +279,39 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging { override def onAdd(pod: Pod): Unit = { if (isSparkEnginePod(pod)) { - updateApplicationState(pod) - KubernetesApplicationAuditLogger.audit(kubernetesInfo, pod) + updateApplicationState(kubernetesInfo, pod) + KubernetesApplicationAuditLogger.audit( + kubernetesInfo, + pod, + appStateSource, + appStateContainer) } } override def onUpdate(oldPod: Pod, newPod: Pod): Unit = { if (isSparkEnginePod(newPod)) { - updateApplicationState(newPod) - val appState = toApplicationState(newPod.getStatus.getPhase) + updateApplicationState(kubernetesInfo, newPod) + val appState = toApplicationState(newPod, appStateSource, appStateContainer) if (isTerminated(appState)) { markApplicationTerminated(newPod) } - KubernetesApplicationAuditLogger.audit(kubernetesInfo, newPod) + KubernetesApplicationAuditLogger.audit( + kubernetesInfo, + newPod, + appStateSource, + appStateContainer) } } override def onDelete(pod: Pod, deletedFinalStateUnknown: Boolean): Unit = { if (isSparkEnginePod(pod)) { - updateApplicationState(pod) + updateApplicationState(kubernetesInfo, pod) markApplicationTerminated(pod) - KubernetesApplicationAuditLogger.audit(kubernetesInfo, pod) + KubernetesApplicationAuditLogger.audit( + kubernetesInfo, + pod, + appStateSource, + appStateContainer) } } } @@ -246,22 +321,25 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging { labels.containsKey(LABEL_KYUUBI_UNIQUE_KEY) && labels.containsKey(SPARK_APP_ID_LABEL) } - private def updateApplicationState(pod: Pod): Unit = { - val appState = toApplicationState(pod.getStatus.getPhase) + private def updateApplicationState(kubernetesInfo: KubernetesInfo, pod: Pod): Unit = { + val (appState, appError) = + toApplicationStateAndError(pod, appStateSource, appStateContainer) debug(s"Driver Informer changes pod: ${pod.getMetadata.getName} to state: $appState") appInfoStore.put( pod.getMetadata.getLabels.get(LABEL_KYUUBI_UNIQUE_KEY), - ApplicationInfo( + kubernetesInfo -> ApplicationInfo( id = pod.getMetadata.getLabels.get(SPARK_APP_ID_LABEL), name = pod.getMetadata.getName, state = appState, - error = Option(pod.getStatus.getReason))) + error = appError)) } private def markApplicationTerminated(pod: Pod): Unit = synchronized { val key = pod.getMetadata.getLabels.get(LABEL_KYUUBI_UNIQUE_KEY) if (cleanupTerminatedAppInfoTrigger.getIfPresent(key) == null) { - cleanupTerminatedAppInfoTrigger.put(key, toApplicationState(pod.getStatus.getPhase)) + cleanupTerminatedAppInfoTrigger.put( + key, + toApplicationState(pod, appStateSource, appStateContainer)) } } } @@ -274,16 +352,62 @@ object KubernetesApplicationOperation extends Logging { def toLabel(tag: String): String = s"label: $LABEL_KYUUBI_UNIQUE_KEY=$tag" - def toApplicationState(state: String): ApplicationState = state match { - // https://github.com/kubernetes/kubernetes/blob/master/pkg/apis/core/types.go#L2396 - // https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/ + def toApplicationState( + pod: Pod, + appStateSource: KubernetesApplicationStateSource, + appStateContainer: String): ApplicationState = { + toApplicationStateAndError(pod, appStateSource, appStateContainer)._1 + } + + def toApplicationStateAndError( + pod: Pod, + appStateSource: KubernetesApplicationStateSource, + appStateContainer: String): (ApplicationState, Option[String]) = { + val podName = pod.getMetadata.getName + val containerStateToBuildAppState = appStateSource match { + case KubernetesApplicationStateSource.CONTAINER => + pod.getStatus.getContainerStatuses.asScala + .find(cs => appStateContainer.equalsIgnoreCase(cs.getName)).map(_.getState) + case KubernetesApplicationStateSource.POD => None + } + val applicationState = containerStateToBuildAppState.map(containerStateToApplicationState) + .getOrElse(podStateToApplicationState(pod.getStatus.getPhase)) + val applicationError = containerStateToBuildAppState + .map(cs => containerStateToApplicationError(cs).map(r => s"$podName/$appStateContainer[$r]")) + .getOrElse(Option(pod.getStatus.getReason).map(r => s"$podName[$r]")) + applicationState -> applicationError + } + + def containerStateToApplicationState(containerState: ContainerState): ApplicationState = { + // https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-states + if (containerState.getWaiting != null) { + PENDING + } else if (containerState.getRunning != null) { + RUNNING + } else if (containerState.getTerminated == null) { + UNKNOWN + } else if (containerState.getTerminated.getExitCode == 0) { + FINISHED + } else { + FAILED + } + } + + def containerStateToApplicationError(containerState: ContainerState): Option[String] = { + // https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-states + Option(containerState.getWaiting).map(_.getReason) + .orElse(Option(containerState.getTerminated).map(_.getReason)) + } + + def podStateToApplicationState(podState: String): ApplicationState = podState match { + // https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase case "Pending" => PENDING case "Running" => RUNNING case "Succeeded" => FINISHED case "Failed" | "Error" => FAILED case "Unknown" => UNKNOWN case _ => - warn(s"The kubernetes driver pod state: $state is not supported, " + + warn(s"The spark driver pod state: $podState is not supported, " + "mark the application state as UNKNOWN.") UNKNOWN } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala index 84807a62d87..23196bf1ded 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala @@ -99,7 +99,7 @@ trait ProcBuilder { protected def proxyUser: String - protected val commands: Array[String] + protected val commands: Iterable[String] def conf: KyuubiConf @@ -142,7 +142,7 @@ trait ProcBuilder { } final lazy val processBuilder: ProcessBuilder = { - val pb = new ProcessBuilder(commands: _*) + val pb = new ProcessBuilder(commands.toStream.asJava) val envs = pb.environment() envs.putAll(env.asJava) @@ -287,10 +287,10 @@ trait ProcBuilder { override def toString: String = { if (commands == null) { - super.toString() + super.toString } else { Utils.redactCommandLineArgs(conf, commands).map { - case arg if arg.startsWith("--") => s"\\\n\t$arg" + case arg if arg.startsWith("-") => s"\\\n\t$arg" case arg => arg }.mkString(" ") } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/chat/ChatProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/chat/ChatProcessBuilder.scala index 3e4a20de373..ddf88e14924 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/chat/ChatProcessBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/chat/ChatProcessBuilder.scala @@ -19,20 +19,18 @@ package org.apache.kyuubi.engine.chat import java.io.File import java.nio.file.{Files, Paths} -import java.util -import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable import com.google.common.annotations.VisibleForTesting import org.apache.kyuubi.{Logging, SCALA_COMPILE_VERSION, Utils} -import org.apache.kyuubi.Utils.REDACTION_REPLACEMENT_TEXT import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_USER_KEY import org.apache.kyuubi.engine.ProcBuilder import org.apache.kyuubi.operation.log.OperationLog +import org.apache.kyuubi.util.command.CommandLineUtils._ class ChatProcessBuilder( override val proxyUser: String, @@ -59,8 +57,8 @@ class ChatProcessBuilder( */ override protected def mainClass: String = "org.apache.kyuubi.engine.chat.ChatEngine" - override protected val commands: Array[String] = { - val buffer = new ArrayBuffer[String]() + override protected val commands: Iterable[String] = { + val buffer = new mutable.ListBuffer[String]() buffer += executable val memory = conf.get(ENGINE_CHAT_MEMORY) @@ -69,8 +67,7 @@ class ChatProcessBuilder( val javaOptions = conf.get(ENGINE_CHAT_JAVA_OPTIONS) javaOptions.foreach(buffer += _) - buffer += "-cp" - val classpathEntries = new util.LinkedHashSet[String] + val classpathEntries = new mutable.LinkedHashSet[String] mainResource.foreach(classpathEntries.add) mainResource.foreach { path => val parent = Paths.get(path).getParent @@ -88,27 +85,25 @@ class ChatProcessBuilder( val extraCp = conf.get(ENGINE_CHAT_EXTRA_CLASSPATH) extraCp.foreach(classpathEntries.add) - buffer += classpathEntries.asScala.mkString(File.pathSeparator) + buffer ++= genClasspathOption(classpathEntries) + buffer += mainClass - buffer += "--conf" - buffer += s"$KYUUBI_SESSION_USER_KEY=$proxyUser" + buffer ++= confKeyValue(KYUUBI_SESSION_USER_KEY, proxyUser) - conf.getAll.foreach { case (k, v) => - buffer += "--conf" - buffer += s"$k=$v" - } - buffer.toArray + buffer ++= confKeyValues(conf.getAll) + + buffer } override def toString: String = { if (commands == null) { - super.toString() + super.toString } else { - Utils.redactCommandLineArgs(conf, commands).map { + redactConfValues( + Utils.redactCommandLineArgs(conf, commands), + Set(ENGINE_CHAT_GPT_API_KEY.key)).map { case arg if arg.startsWith("-") || arg == mainClass => s"\\\n\t$arg" - case arg if arg.contains(ENGINE_CHAT_GPT_API_KEY.key) => - s"${ENGINE_CHAT_GPT_API_KEY.key}=$REDACTION_REPLACEMENT_TEXT" case arg => arg }.mkString(" ") } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/flink/FlinkProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/flink/FlinkProcessBuilder.scala index f43adfbc216..a1e8cdcd38b 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/flink/FlinkProcessBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/flink/FlinkProcessBuilder.scala @@ -20,8 +20,7 @@ package org.apache.kyuubi.engine.flink import java.io.{File, FilenameFilter} import java.nio.file.{Files, Paths} -import scala.collection.JavaConverters._ -import scala.collection.mutable.{ArrayBuffer, ListBuffer} +import scala.collection.mutable import com.google.common.annotations.VisibleForTesting @@ -32,6 +31,7 @@ import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_USER_KEY import org.apache.kyuubi.engine.{ApplicationManagerInfo, KyuubiApplicationManager, ProcBuilder} import org.apache.kyuubi.engine.flink.FlinkProcessBuilder._ import org.apache.kyuubi.operation.log.OperationLog +import org.apache.kyuubi.util.command.CommandLineUtils._ /** * A builder to build flink sql engine progress. @@ -77,18 +77,18 @@ class FlinkProcessBuilder( ApplicationManagerInfo(clusterManager()) } - override protected val commands: Array[String] = { + override protected val commands: Iterable[String] = { KyuubiApplicationManager.tagApplication(engineRefId, shortName, clusterManager(), conf) // unset engine credentials because Flink doesn't support them at the moment conf.unset(KyuubiReservedKeys.KYUUBI_ENGINE_CREDENTIALS_KEY) // flink.execution.target are required in Kyuubi conf currently executionTarget match { case Some("yarn-application") => - val buffer = new ArrayBuffer[String]() + val buffer = new mutable.ListBuffer[String]() buffer += flinkExecutable buffer += "run-application" - val flinkExtraJars = new ListBuffer[String] + val flinkExtraJars = new mutable.ListBuffer[String] // locate flink sql jars val flinkSqlJars = Paths.get(flinkHome) .resolve("opt") @@ -134,19 +134,14 @@ class FlinkProcessBuilder( buffer += s"$mainClass" buffer += s"${mainResource.get}" - buffer += "--conf" - buffer += s"$KYUUBI_SESSION_USER_KEY=$proxyUser" - conf.getAll.foreach { case (k, v) => - if (k.startsWith("kyuubi.")) { - buffer += "--conf" - buffer += s"$k=$v" - } - } + buffer ++= confKeyValue(KYUUBI_SESSION_USER_KEY, proxyUser) + + buffer ++= confKeyValues(conf.getAll.filter(_._1.startsWith("kyuubi."))) - buffer.toArray + buffer case _ => - val buffer = new ArrayBuffer[String]() + val buffer = new mutable.ListBuffer[String]() buffer += executable val memory = conf.get(ENGINE_FLINK_MEMORY) @@ -156,8 +151,7 @@ class FlinkProcessBuilder( buffer += javaOptions.get } - buffer += "-cp" - val classpathEntries = new java.util.LinkedHashSet[String] + val classpathEntries = new mutable.LinkedHashSet[String] // flink engine runtime jar mainResource.foreach(classpathEntries.add) // flink sql jars @@ -201,17 +195,15 @@ class FlinkProcessBuilder( classpathEntries.add(s"$devHadoopJars${File.separator}*") } } - buffer += classpathEntries.asScala.mkString(File.pathSeparator) + buffer ++= genClasspathOption(classpathEntries) + buffer += mainClass - buffer += "--conf" - buffer += s"$KYUUBI_SESSION_USER_KEY=$proxyUser" + buffer ++= confKeyValue(KYUUBI_SESSION_USER_KEY, proxyUser) - conf.getAll.foreach { case (k, v) => - buffer += "--conf" - buffer += s"$k=$v" - } - buffer.toArray + buffer ++= confKeyValues(conf.getAll) + + buffer } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala index 61fe55887ea..d8e4454b610 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala @@ -19,10 +19,8 @@ package org.apache.kyuubi.engine.hive import java.io.File import java.nio.file.{Files, Paths} -import java.util -import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable import com.google.common.annotations.VisibleForTesting @@ -33,6 +31,7 @@ import org.apache.kyuubi.config.KyuubiReservedKeys.{KYUUBI_ENGINE_ID, KYUUBI_SES import org.apache.kyuubi.engine.{KyuubiApplicationManager, ProcBuilder} import org.apache.kyuubi.engine.hive.HiveProcessBuilder._ import org.apache.kyuubi.operation.log.OperationLog +import org.apache.kyuubi.util.command.CommandLineUtils._ class HiveProcessBuilder( override val proxyUser: String, @@ -52,9 +51,9 @@ class HiveProcessBuilder( override protected def mainClass: String = "org.apache.kyuubi.engine.hive.HiveSQLEngine" - override protected val commands: Array[String] = { + override protected val commands: Iterable[String] = { KyuubiApplicationManager.tagApplication(engineRefId, shortName, clusterManager(), conf) - val buffer = new ArrayBuffer[String]() + val buffer = new mutable.ListBuffer[String]() buffer += executable val memory = conf.get(ENGINE_HIVE_MEMORY) @@ -65,8 +64,7 @@ class HiveProcessBuilder( } // -Xmx5g // java options - buffer += "-cp" - val classpathEntries = new util.LinkedHashSet[String] + val classpathEntries = new mutable.LinkedHashSet[String] // hive engine runtime jar mainResource.foreach(classpathEntries.add) // classpath contains hive configurations, default to hive.home/conf @@ -101,22 +99,16 @@ class HiveProcessBuilder( classpathEntries.add(s"$devHadoopJars${File.separator}*") } } - buffer += classpathEntries.asScala.mkString(File.pathSeparator) + buffer ++= genClasspathOption(classpathEntries) buffer += mainClass - buffer += "--conf" - buffer += s"$KYUUBI_SESSION_USER_KEY=$proxyUser" - buffer += "--conf" - buffer += s"$KYUUBI_ENGINE_ID=$engineRefId" + buffer ++= confKeyValue(KYUUBI_SESSION_USER_KEY, proxyUser) + buffer ++= confKeyValue(KYUUBI_ENGINE_ID, engineRefId) - for ((k, v) <- conf.getAll) { - buffer += "--conf" - buffer += s"$k=$v" - } - buffer.toArray - } + buffer ++= confKeyValues(conf.getAll) - override def toString: String = Utils.redactCommandLineArgs(conf, commands).mkString("\n") + buffer + } override def shortName: String = "hive" } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/jdbc/JdbcProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/jdbc/JdbcProcessBuilder.scala index 14ad53b20a8..2d08d510199 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/jdbc/JdbcProcessBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/jdbc/JdbcProcessBuilder.scala @@ -19,20 +19,18 @@ package org.apache.kyuubi.engine.jdbc import java.io.File import java.nio.file.Paths -import java.util -import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable import com.google.common.annotations.VisibleForTesting import org.apache.kyuubi.{Logging, SCALA_COMPILE_VERSION, Utils} -import org.apache.kyuubi.Utils.REDACTION_REPLACEMENT_TEXT import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.{ENGINE_JDBC_CONNECTION_PASSWORD, ENGINE_JDBC_CONNECTION_URL, ENGINE_JDBC_EXTRA_CLASSPATH, ENGINE_JDBC_JAVA_OPTIONS, ENGINE_JDBC_MEMORY} import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_USER_KEY import org.apache.kyuubi.engine.ProcBuilder import org.apache.kyuubi.operation.log.OperationLog +import org.apache.kyuubi.util.command.CommandLineUtils._ class JdbcProcessBuilder( override val proxyUser: String, @@ -59,11 +57,11 @@ class JdbcProcessBuilder( */ override protected def mainClass: String = "org.apache.kyuubi.engine.jdbc.JdbcSQLEngine" - override protected val commands: Array[String] = { + override protected val commands: Iterable[String] = { require( conf.get(ENGINE_JDBC_CONNECTION_URL).nonEmpty, s"Jdbc server url can not be null! Please set ${ENGINE_JDBC_CONNECTION_URL.key}") - val buffer = new ArrayBuffer[String]() + val buffer = new mutable.ListBuffer[String]() buffer += executable val memory = conf.get(ENGINE_JDBC_MEMORY) @@ -72,8 +70,7 @@ class JdbcProcessBuilder( val javaOptions = conf.get(ENGINE_JDBC_JAVA_OPTIONS) javaOptions.foreach(buffer += _) - buffer += "-cp" - val classpathEntries = new util.LinkedHashSet[String] + val classpathEntries = new mutable.LinkedHashSet[String] mainResource.foreach(classpathEntries.add) mainResource.foreach { path => val parent = Paths.get(path).getParent @@ -91,28 +88,27 @@ class JdbcProcessBuilder( val extraCp = conf.get(ENGINE_JDBC_EXTRA_CLASSPATH) extraCp.foreach(classpathEntries.add) - buffer += classpathEntries.asScala.mkString(File.pathSeparator) + buffer ++= genClasspathOption(classpathEntries) + buffer += mainClass - buffer += "--conf" - buffer += s"$KYUUBI_SESSION_USER_KEY=$proxyUser" + buffer ++= confKeyValue(KYUUBI_SESSION_USER_KEY, proxyUser) - for ((k, v) <- conf.getAll) { - buffer += "--conf" - buffer += s"$k=$v" - } - buffer.toArray + buffer ++= confKeyValues(conf.getAll) + + buffer } override def toString: String = { if (commands == null) { - super.toString() + super.toString } else { - Utils.redactCommandLineArgs(conf, commands).map { - case arg if arg.contains(ENGINE_JDBC_CONNECTION_PASSWORD.key) => - s"${ENGINE_JDBC_CONNECTION_PASSWORD.key}=$REDACTION_REPLACEMENT_TEXT" + redactConfValues( + Utils.redactCommandLineArgs(conf, commands), + Set(ENGINE_JDBC_CONNECTION_PASSWORD.key)).map { + case arg if arg.startsWith("-") => s"\\\n\t$arg" case arg => arg - }.mkString("\n") + }.mkString(" ") } } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkBatchProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkBatchProcessBuilder.scala index ef159bb93ad..0167f95516d 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkBatchProcessBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkBatchProcessBuilder.scala @@ -17,11 +17,12 @@ package org.apache.kyuubi.engine.spark -import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.engine.KyuubiApplicationManager import org.apache.kyuubi.operation.log.OperationLog +import org.apache.kyuubi.util.command.CommandLineUtils._ class SparkBatchProcessBuilder( override val proxyUser: String, @@ -36,8 +37,8 @@ class SparkBatchProcessBuilder( extends SparkProcessBuilder(proxyUser, conf, batchId, extraEngineLog) { import SparkProcessBuilder._ - override protected lazy val commands: Array[String] = { - val buffer = new ArrayBuffer[String]() + override protected lazy val commands: Iterable[String] = { + val buffer = new mutable.ListBuffer[String]() buffer += executable Option(mainClass).foreach { cla => buffer += CLASS @@ -51,13 +52,11 @@ class SparkBatchProcessBuilder( // tag batch application KyuubiApplicationManager.tagApplication(batchId, "spark", clusterManager(), batchKyuubiConf) - (batchKyuubiConf.getAll ++ + val allConfigs = batchKyuubiConf.getAll ++ sparkAppNameConf() ++ engineLogPathConf() ++ - appendPodNameConf(batchConf)).foreach { case (k, v) => - buffer += CONF - buffer += s"${convertConfigKey(k)}=$v" - } + appendPodNameConf(batchConf) + buffer ++= confKeyValues(allConfigs) setupKerberos(buffer) @@ -66,7 +65,7 @@ class SparkBatchProcessBuilder( batchArgs.foreach { arg => buffer += arg } - buffer.toArray + buffer } private def sparkAppNameConf(): Map[String, String] = { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala index afc96fb5ea0..972284f5c06 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala @@ -22,7 +22,6 @@ import java.nio.file.Paths import java.util.Locale import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer import com.google.common.annotations.VisibleForTesting import org.apache.commons.lang3.StringUtils @@ -30,6 +29,7 @@ import org.apache.hadoop.security.UserGroupInformation import org.apache.kyuubi._ import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.engine.{ApplicationManagerInfo, KyuubiApplicationManager, ProcBuilder} import org.apache.kyuubi.engine.KubernetesApplicationOperation.{KUBERNETES_SERVICE_HOST, KUBERNETES_SERVICE_PORT} import org.apache.kyuubi.engine.ProcBuilder.KYUUBI_ENGINE_LOG_PATH_KEY @@ -37,6 +37,7 @@ import org.apache.kyuubi.ha.HighAvailabilityConf import org.apache.kyuubi.ha.client.AuthTypes import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.util.{KubernetesUtils, Validator} +import org.apache.kyuubi.util.command.CommandLineUtils._ class SparkProcessBuilder( override val proxyUser: String, @@ -121,12 +122,12 @@ class SparkProcessBuilder( file.isDirectory && r.findFirstMatchIn(file.getName).isDefined } - override protected lazy val commands: Array[String] = { + override protected lazy val commands: Iterable[String] = { // complete `spark.master` if absent on kubernetes completeMasterUrl(conf) KyuubiApplicationManager.tagApplication(engineRefId, shortName, clusterManager(), conf) - val buffer = new ArrayBuffer[String]() + val buffer = new mutable.ListBuffer[String]() buffer += executable buffer += CLASS buffer += mainClass @@ -139,21 +140,21 @@ class SparkProcessBuilder( allConf = allConf ++ zkAuthKeytabFileConf(allConf) } // pass spark engine log path to spark conf - (allConf ++ engineLogPathConf ++ appendPodNameConf(allConf)).foreach { case (k, v) => - buffer += CONF - buffer += s"${convertConfigKey(k)}=$v" + (allConf ++ engineLogPathConf ++ extraYarnConf(allConf) ++ appendPodNameConf(allConf)).foreach { + case (k, v) => + buffer ++= confKeyValue(convertConfigKey(k), v) } setupKerberos(buffer) mainResource.foreach { r => buffer += r } - buffer.toArray + buffer } override protected def module: String = "kyuubi-spark-sql-engine" - protected def setupKerberos(buffer: ArrayBuffer[String]): Unit = { + protected def setupKerberos(buffer: mutable.Buffer[String]): Unit = { // if the keytab is specified, PROXY_USER is not supported tryKeytab() match { case None => @@ -229,17 +230,28 @@ class SparkProcessBuilder( kubernetesNamespace()) } + private val forciblyRewriteDriverPodName: Boolean = + conf.get(KUBERNETES_FORCIBLY_REWRITE_DRIVER_POD_NAME) + private val forciblyRewriteExecPodNamePrefix: Boolean = + conf.get(KUBERNETES_FORCIBLY_REWRITE_EXEC_POD_NAME_PREFIX) + def appendPodNameConf(conf: Map[String, String]): Map[String, String] = { val appName = conf.getOrElse(APP_KEY, "spark") val map = mutable.Map.newBuilder[String, String] if (clusterManager().exists(cm => cm.toLowerCase(Locale.ROOT).startsWith("k8s"))) { if (!conf.contains(KUBERNETES_EXECUTOR_POD_NAME_PREFIX)) { - val prefix = KubernetesUtils.generateExecutorPodNamePrefix(appName, engineRefId) + val prefix = KubernetesUtils.generateExecutorPodNamePrefix( + appName, + engineRefId, + forciblyRewriteExecPodNamePrefix) map += (KUBERNETES_EXECUTOR_POD_NAME_PREFIX -> prefix) } if (deployMode().exists(_.toLowerCase(Locale.ROOT) == "cluster")) { if (!conf.contains(KUBERNETES_DRIVER_POD_NAME)) { - val name = KubernetesUtils.generateDriverPodName(appName, engineRefId) + val name = KubernetesUtils.generateDriverPodName( + appName, + engineRefId, + forciblyRewriteDriverPodName) map += (KUBERNETES_DRIVER_POD_NAME -> name) } } @@ -247,6 +259,18 @@ class SparkProcessBuilder( map.result().toMap } + def extraYarnConf(conf: Map[String, String]): Map[String, String] = { + val map = mutable.Map.newBuilder[String, String] + if (clusterManager().exists(_.toLowerCase(Locale.ROOT).startsWith("yarn"))) { + if (!conf.contains(YARN_MAX_APP_ATTEMPTS_KEY)) { + // Set `spark.yarn.maxAppAttempts` to 1 to avoid invalid attempts. + // As mentioned in YARN-5617, it is improved after hadoop `2.8.2/2.9.0/3.0.0`. + map += (YARN_MAX_APP_ATTEMPTS_KEY -> "1") + } + } + map.result().toMap + } + override def clusterManager(): Option[String] = { conf.getOption(MASTER_KEY).orElse(defaultsConf.get(MASTER_KEY)) } @@ -274,13 +298,11 @@ class SparkProcessBuilder( override def validateConf: Unit = Validator.validateConf(conf) // For spark on kubernetes, spark pod using env SPARK_USER_NAME as current user - def setSparkUserName(userName: String, buffer: ArrayBuffer[String]): Unit = { + def setSparkUserName(userName: String, buffer: mutable.Buffer[String]): Unit = { clusterManager().foreach { cm => if (cm.toUpperCase.startsWith("K8S")) { - buffer += CONF - buffer += s"spark.kubernetes.driverEnv.SPARK_USER_NAME=$userName" - buffer += CONF - buffer += s"spark.executorEnv.SPARK_USER_NAME=$userName" + buffer ++= confKeyValue("spark.kubernetes.driverEnv.SPARK_USER_NAME", userName) + buffer ++= confKeyValue("spark.executorEnv.SPARK_USER_NAME", userName) } } } @@ -299,6 +321,7 @@ object SparkProcessBuilder { final val KUBERNETES_NAMESPACE_KEY = "spark.kubernetes.namespace" final val KUBERNETES_DRIVER_POD_NAME = "spark.kubernetes.driver.pod.name" final val KUBERNETES_EXECUTOR_POD_NAME_PREFIX = "spark.kubernetes.executor.podNamePrefix" + final val YARN_MAX_APP_ATTEMPTS_KEY = "spark.yarn.maxAppAttempts" final val INTERNAL_RESOURCE = "spark-internal" /** @@ -323,7 +346,6 @@ object SparkProcessBuilder { "spark.kubernetes.kerberos.krb5.path", "spark.kubernetes.file.upload.path") - final private[spark] val CONF = "--conf" final private[spark] val CLASS = "--class" final private[spark] val PROXY_USER = "--proxy-user" final private[spark] val SPARK_FILES = "spark.files" diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/trino/TrinoProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/trino/TrinoProcessBuilder.scala index 041219dd0fb..96502fb9607 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/trino/TrinoProcessBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/trino/TrinoProcessBuilder.scala @@ -19,20 +19,18 @@ package org.apache.kyuubi.engine.trino import java.io.File import java.nio.file.Paths -import java.util -import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable import com.google.common.annotations.VisibleForTesting import org.apache.kyuubi.{Logging, SCALA_COMPILE_VERSION, Utils} -import org.apache.kyuubi.Utils.REDACTION_REPLACEMENT_TEXT import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_USER_KEY import org.apache.kyuubi.engine.{KyuubiApplicationManager, ProcBuilder} import org.apache.kyuubi.operation.log.OperationLog +import org.apache.kyuubi.util.command.CommandLineUtils._ class TrinoProcessBuilder( override val proxyUser: String, @@ -50,7 +48,7 @@ class TrinoProcessBuilder( override protected def mainClass: String = "org.apache.kyuubi.engine.trino.TrinoSqlEngine" - override protected val commands: Array[String] = { + override protected val commands: Iterable[String] = { KyuubiApplicationManager.tagApplication(engineRefId, shortName, clusterManager(), conf) require( conf.get(ENGINE_TRINO_CONNECTION_URL).nonEmpty, @@ -58,7 +56,7 @@ class TrinoProcessBuilder( require( conf.get(ENGINE_TRINO_CONNECTION_CATALOG).nonEmpty, s"Trino default catalog can not be null! Please set ${ENGINE_TRINO_CONNECTION_CATALOG.key}") - val buffer = new ArrayBuffer[String]() + val buffer = new mutable.ListBuffer[String]() buffer += executable val memory = conf.get(ENGINE_TRINO_MEMORY) @@ -68,8 +66,7 @@ class TrinoProcessBuilder( buffer += javaOptions.get } - buffer += "-cp" - val classpathEntries = new util.LinkedHashSet[String] + val classpathEntries = new mutable.LinkedHashSet[String] // trino engine runtime jar mainResource.foreach(classpathEntries.add) @@ -90,38 +87,36 @@ class TrinoProcessBuilder( val extraCp = conf.get(ENGINE_TRINO_EXTRA_CLASSPATH) extraCp.foreach(classpathEntries.add) - buffer += classpathEntries.asScala.mkString(File.pathSeparator) + buffer ++= genClasspathOption(classpathEntries) + buffer += mainClass // TODO: How shall we deal with proxyUser, // user.name // kyuubi.session.user // or just leave it, because we can handle it at operation layer - buffer += "--conf" - buffer += s"$KYUUBI_SESSION_USER_KEY=$proxyUser" + buffer ++= confKeyValue(KYUUBI_SESSION_USER_KEY, proxyUser) - for ((k, v) <- conf.getAll) { - buffer += "--conf" - buffer += s"$k=$v" - } - buffer.toArray + buffer ++= confKeyValues(conf.getAll) + + buffer } override def shortName: String = "trino" override def toString: String = { if (commands == null) { - super.toString() + super.toString } else { - Utils.redactCommandLineArgs(conf, commands).map { - case arg if arg.contains(ENGINE_TRINO_CONNECTION_PASSWORD.key) => - s"${ENGINE_TRINO_CONNECTION_PASSWORD.key}=$REDACTION_REPLACEMENT_TEXT" - case arg if arg.contains(ENGINE_TRINO_CONNECTION_KEYSTORE_PASSWORD.key) => - s"${ENGINE_TRINO_CONNECTION_KEYSTORE_PASSWORD.key}=$REDACTION_REPLACEMENT_TEXT" - case arg if arg.contains(ENGINE_TRINO_CONNECTION_TRUSTSTORE_PASSWORD.key) => - s"${ENGINE_TRINO_CONNECTION_TRUSTSTORE_PASSWORD.key}=$REDACTION_REPLACEMENT_TEXT" + redactConfValues( + Utils.redactCommandLineArgs(conf, commands), + Set( + ENGINE_TRINO_CONNECTION_PASSWORD.key, + ENGINE_TRINO_CONNECTION_KEYSTORE_PASSWORD.key, + ENGINE_TRINO_CONNECTION_TRUSTSTORE_PASSWORD.key)).map { + case arg if arg.startsWith("-") => s"\\\n\t$arg" case arg => arg - }.mkString("\n") + }.mkString(" ") } } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/BatchJobSubmission.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/BatchJobSubmission.scala index af6242ae1c7..276fe344600 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/BatchJobSubmission.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/BatchJobSubmission.scala @@ -17,14 +17,12 @@ package org.apache.kyuubi.operation -import java.io.IOException import java.nio.file.{Files, Paths} import java.util.Locale import java.util.concurrent.TimeUnit import com.codahale.metrics.MetricRegistry import com.google.common.annotations.VisibleForTesting -import org.apache.hive.service.rpc.thrift._ import org.apache.kyuubi.{KyuubiException, KyuubiSQLException, Utils} import org.apache.kyuubi.config.KyuubiConf @@ -37,6 +35,7 @@ import org.apache.kyuubi.operation.OperationState.{isTerminal, CANCELED, Operati import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.server.metadata.api.Metadata import org.apache.kyuubi.session.KyuubiBatchSession +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ /** * The state of batch operation is special. In general, the lifecycle of state is: @@ -182,7 +181,7 @@ class BatchJobSubmission( OperationLog.removeCurrentOperationLog() } - override protected def runInternal(): Unit = session.handleSessionException { + override protected def runInternal(): Unit = { val asyncOperation: Runnable = () => { try { metadata match { @@ -336,14 +335,8 @@ class BatchJobSubmission( } } - override def close(): Unit = withLockRequired { + override def close(): Unit = withLockRequired(withClosingOperationLog { if (!isClosedOrCanceled) { - try { - getOperationLog.foreach(_.close()) - } catch { - case e: IOException => error(e.getMessage, e) - } - MetricsSystem.tracing(_.decCount(MetricRegistry.name(OPERATION_OPEN, opType))) // fast fail @@ -379,7 +372,7 @@ class BatchJobSubmission( } } } - } + }) override def cancel(): Unit = { throw new IllegalStateException("Use close instead.") diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/ExecuteStatement.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/ExecuteStatement.scala index 86bd3f8c84c..026d4be2ddb 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/ExecuteStatement.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/ExecuteStatement.scala @@ -20,8 +20,6 @@ package org.apache.kyuubi.operation import scala.collection.JavaConverters._ import com.codahale.metrics.MetricRegistry -import org.apache.hive.service.rpc.thrift.{TGetOperationStatusResp, TOperationState, TProtocolVersion} -import org.apache.hive.service.rpc.thrift.TOperationState._ import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.config.KyuubiConf @@ -30,6 +28,8 @@ import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.operation.FetchOrientation.FETCH_NEXT import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TGetOperationStatusResp, TOperationState, TProtocolVersion} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TOperationState._ class ExecuteStatement( session: Session, diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/ExecutedCommandExec.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/ExecutedCommandExec.scala index 70b727e5e67..a59c2db7b77 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/ExecutedCommandExec.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/ExecutedCommandExec.scala @@ -17,11 +17,10 @@ package org.apache.kyuubi.operation -import org.apache.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp} - import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.session.KyuubiSessionImpl +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TFetchResultsResp, TGetResultSetMetadataResp} import org.apache.kyuubi.sql.plan.command.RunnableCommand import org.apache.kyuubi.sql.schema.SchemaHelper @@ -49,7 +48,7 @@ class ExecutedCommandExec( OperationLog.removeCurrentOperationLog() } - override protected def runInternal(): Unit = session.handleSessionException { + override protected def runInternal(): Unit = { val asyncOperation: Runnable = () => { setState(OperationState.RUNNING) try { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiApplicationOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiApplicationOperation.scala index 93929c59cce..1ef70f266f3 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiApplicationOperation.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiApplicationOperation.scala @@ -22,11 +22,10 @@ import java.util.{ArrayList => JArrayList} import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift.{TColumn, TColumnDesc, TFetchResultsResp, TGetResultSetMetadataResp, TPrimitiveTypeEntry, TRow, TRowSet, TStringColumn, TTableSchema, TTypeDesc, TTypeEntry, TTypeId} - import org.apache.kyuubi.engine.ApplicationInfo import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.session.Session +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TColumn, TColumnDesc, TFetchResultsResp, TGetResultSetMetadataResp, TPrimitiveTypeEntry, TRow, TRowSet, TStringColumn, TTableSchema, TTypeDesc, TTypeEntry, TTypeId} import org.apache.kyuubi.util.ThriftUtils abstract class KyuubiApplicationOperation(session: Session) extends KyuubiOperation(session) { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperation.scala index 83e19cb6579..54a7c96029a 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperation.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperation.scala @@ -21,9 +21,6 @@ import java.io.IOException import com.codahale.metrics.MetricRegistry import org.apache.commons.lang3.StringUtils -import org.apache.hive.service.rpc.thrift._ -import org.apache.thrift.TException -import org.apache.thrift.transport.TTransportException import org.apache.kyuubi.{KyuubiSQLException, Utils} import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_OPERATION_HANDLE_KEY @@ -32,7 +29,10 @@ import org.apache.kyuubi.metrics.MetricsConstants.{OPERATION_FAIL, OPERATION_OPE import org.apache.kyuubi.metrics.MetricsSystem import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.operation.OperationState.OperationState -import org.apache.kyuubi.session.{KyuubiSessionImpl, KyuubiSessionManager, Session} +import org.apache.kyuubi.session.{KyuubiSession, KyuubiSessionImpl, KyuubiSessionManager, Session} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ +import org.apache.kyuubi.shaded.thrift.TException +import org.apache.kyuubi.shaded.thrift.transport.TTransportException import org.apache.kyuubi.util.ThriftUtils abstract class KyuubiOperation(session: Session) extends AbstractOperation(session) { @@ -100,6 +100,17 @@ abstract class KyuubiOperation(session: Session) extends AbstractOperation(sessi } } + override def run(): Unit = { + beforeRun() + try { + session.asInstanceOf[KyuubiSession].handleSessionException { + runInternal() + } + } finally { + afterRun() + } + } + override protected def beforeRun(): Unit = { setHasResultSet(true) setState(OperationState.RUNNING) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperationManager.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperationManager.scala index 739c99cd78a..a248fe2a832 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperationManager.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperationManager.scala @@ -19,8 +19,6 @@ package org.apache.kyuubi.operation import java.util.concurrent.TimeUnit -import org.apache.hive.service.rpc.thrift.{TFetchResultsResp, TStatus, TStatusCode} - import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.OPERATION_QUERY_TIMEOUT @@ -29,6 +27,7 @@ import org.apache.kyuubi.metrics.MetricsSystem import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.server.metadata.api.Metadata import org.apache.kyuubi.session.{KyuubiBatchSession, KyuubiSessionImpl, Session} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TFetchResultsResp, TStatus, TStatusCode} import org.apache.kyuubi.sql.plan.command.RunnableCommand import org.apache.kyuubi.util.ThriftUtils diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/LaunchEngine.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/LaunchEngine.scala index 758dccb9d1b..cfbd2a0ca9a 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/LaunchEngine.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/LaunchEngine.scala @@ -53,7 +53,7 @@ class LaunchEngine(session: KyuubiSessionImpl, override val shouldRunAsync: Bool OperationLog.removeCurrentOperationLog() } - override protected def runInternal(): Unit = session.handleSessionException { + override protected def runInternal(): Unit = { val asyncOperation: Runnable = () => { setState(OperationState.RUNNING) try { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/plugin/PluginLoader.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/plugin/PluginLoader.scala index da4c8e4a9d1..1bc80dc7da1 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/plugin/PluginLoader.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/plugin/PluginLoader.scala @@ -25,20 +25,22 @@ import org.apache.kyuubi.util.reflect.DynConstructors private[kyuubi] object PluginLoader { - def loadSessionConfAdvisor(conf: KyuubiConf): SessionConfAdvisor = { + def loadSessionConfAdvisor(conf: KyuubiConf): Seq[SessionConfAdvisor] = { val advisorClass = conf.get(KyuubiConf.SESSION_CONF_ADVISOR) if (advisorClass.isEmpty) { - return new DefaultSessionConfAdvisor() + return new DefaultSessionConfAdvisor() :: Nil } - - try { - DynConstructors.builder.impl(advisorClass.get).buildChecked[SessionConfAdvisor].newInstance() - } catch { - case _: ClassCastException => - throw new KyuubiException( - s"Class ${advisorClass.get} is not a child of '${classOf[SessionConfAdvisor].getName}'.") - case NonFatal(e) => - throw new IllegalArgumentException(s"Error while instantiating '${advisorClass.get}': ", e) + advisorClass.get.map { advisorClassName => + try { + DynConstructors.builder.impl(advisorClassName) + .buildChecked[SessionConfAdvisor].newInstance() + } catch { + case _: ClassCastException => + throw new KyuubiException( + s"Class $advisorClassName is not a child of '${classOf[SessionConfAdvisor].getName}'.") + case NonFatal(e) => + throw new IllegalArgumentException(s"Error while instantiating '$advisorClassName': ", e) + } } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/BackendServiceMetric.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/BackendServiceMetric.scala index 9da4b78c036..4b4ab6f56c0 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/BackendServiceMetric.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/BackendServiceMetric.scala @@ -17,13 +17,12 @@ package org.apache.kyuubi.server -import org.apache.hive.service.rpc.thrift._ - import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.operation.{KyuubiOperation, OperationHandle, OperationStatus} import org.apache.kyuubi.operation.FetchOrientation.FetchOrientation import org.apache.kyuubi.service.BackendService import org.apache.kyuubi.session.SessionHandle +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ trait BackendServiceMetric extends BackendService { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiRestFrontendService.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiRestFrontendService.scala index c5d44213c90..d738995130b 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiRestFrontendService.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiRestFrontendService.scala @@ -35,9 +35,10 @@ import org.apache.kyuubi.server.api.v1.ApiRootResource import org.apache.kyuubi.server.http.authentication.{AuthenticationFilter, KyuubiHttpAuthenticationFactory} import org.apache.kyuubi.server.ui.{JettyServer, JettyUtils} import org.apache.kyuubi.service.{AbstractFrontendService, Serverable, Service, ServiceUtils} -import org.apache.kyuubi.service.authentication.KyuubiAuthenticationFactory +import org.apache.kyuubi.service.authentication.{AuthMethods, AuthTypes, KyuubiAuthenticationFactory} import org.apache.kyuubi.session.{KyuubiSessionManager, SessionHandle} import org.apache.kyuubi.util.ThreadUtils +import org.apache.kyuubi.util.ThreadUtils.scheduleTolerableRunnableWithFixedDelay /** * A frontend service based on RESTful api via HTTP protocol. @@ -70,6 +71,17 @@ class KyuubiRestFrontendService(override val serverable: Serverable) private lazy val port: Int = conf.get(FRONTEND_REST_BIND_PORT) + private lazy val securityEnabled = { + val authTypes = conf.get(AUTHENTICATION_METHOD).map(AuthTypes.withName) + KyuubiAuthenticationFactory.getValidPasswordAuthMethod(authTypes) != AuthMethods.NONE + } + + private lazy val administrators: Set[String] = + conf.get(KyuubiConf.SERVER_ADMINISTRATORS) + Utils.currentUser + + def isAdministrator(userName: String): Boolean = + if (securityEnabled) administrators.contains(userName) else true + override def initialize(conf: KyuubiConf): Unit = synchronized { this.conf = conf server = JettyServer( @@ -131,7 +143,12 @@ class KyuubiRestFrontendService(override val serverable: Serverable) } } - batchChecker.scheduleWithFixedDelay(task, interval, interval, TimeUnit.MILLISECONDS) + scheduleTolerableRunnableWithFixedDelay( + batchChecker, + task, + interval, + interval, + TimeUnit.MILLISECONDS) } @VisibleForTesting @@ -208,9 +225,10 @@ class KyuubiRestFrontendService(override val serverable: Serverable) Option(AuthenticationFilter.getUserName).filter(_.nonEmpty).getOrElse("anonymous")) } - def getSessionUser(hs2ProxyUser: String): String = { - val sessionConf = Option(hs2ProxyUser).filter(_.nonEmpty).map(proxyUser => - Map(KyuubiAuthenticationFactory.HS2_PROXY_USER -> proxyUser)).getOrElse(Map()) + def getSessionUser(proxyUser: String): String = { + // Internally, we use kyuubi.session.proxy.user to unify the key as proxyUser + val sessionConf = Option(proxyUser).filter(_.nonEmpty).map(proxyUser => + Map(PROXY_USER.key -> proxyUser)).getOrElse(Map()) getSessionUser(sessionConf) } @@ -239,12 +257,13 @@ class KyuubiRestFrontendService(override val serverable: Serverable) if (sessionConf == null) { realUser } else { - sessionConf.get(KyuubiAuthenticationFactory.HS2_PROXY_USER).map { proxyUser => - if (!getConf.get(KyuubiConf.SERVER_ADMINISTRATORS).contains(realUser)) { - KyuubiAuthenticationFactory.verifyProxyAccess(realUser, proxyUser, ipAddress, hadoopConf) - } - proxyUser - }.getOrElse(realUser) + val proxyUser = sessionConf.getOrElse( + PROXY_USER.key, + sessionConf.getOrElse(KyuubiAuthenticationFactory.HS2_PROXY_USER, realUser)) + if (!proxyUser.equals(realUser) && !isAdministrator(realUser)) { + KyuubiAuthenticationFactory.verifyProxyAccess(realUser, proxyUser, ipAddress, hadoopConf) + } + proxyUser } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiTBinaryFrontendService.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiTBinaryFrontendService.scala index ae388a7c42a..b46c1fec4e2 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiTBinaryFrontendService.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiTBinaryFrontendService.scala @@ -20,9 +20,6 @@ package org.apache.kyuubi.server import java.util.Base64 import org.apache.hadoop.conf.Configuration -import org.apache.hive.service.rpc.thrift._ -import org.apache.thrift.protocol.TProtocol -import org.apache.thrift.server.ServerContext import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.cli.Handle @@ -34,6 +31,9 @@ import org.apache.kyuubi.metrics.MetricsSystem import org.apache.kyuubi.service.{Serverable, Service, TBinaryFrontendService} import org.apache.kyuubi.service.TFrontendService.{CURRENT_SERVER_CONTEXT, FeServiceServerContext, OK_STATUS} import org.apache.kyuubi.session.KyuubiSessionImpl +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ +import org.apache.kyuubi.shaded.thrift.protocol.TProtocol +import org.apache.kyuubi.shaded.thrift.server.ServerContext final class KyuubiTBinaryFrontendService( override val serverable: Serverable) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiTHttpFrontendService.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiTHttpFrontendService.scala index 79351118c50..ca8939d69a3 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiTHttpFrontendService.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/KyuubiTHttpFrontendService.scala @@ -24,8 +24,6 @@ import javax.servlet.{ServletContextEvent, ServletContextListener} import org.apache.commons.lang3.SystemUtils import org.apache.hadoop.conf.Configuration -import org.apache.hive.service.rpc.thrift.{TCLIService, TOpenSessionReq} -import org.apache.thrift.protocol.TBinaryProtocol import org.eclipse.jetty.http.HttpMethod import org.eclipse.jetty.security.{ConstraintMapping, ConstraintSecurityHandler} import org.eclipse.jetty.server._ @@ -43,6 +41,9 @@ import org.apache.kyuubi.metrics.MetricsSystem import org.apache.kyuubi.server.http.ThriftHttpServlet import org.apache.kyuubi.server.http.util.SessionManager import org.apache.kyuubi.service.{Serverable, Service, ServiceUtils, TFrontendService} +import org.apache.kyuubi.service.authentication.KyuubiAuthenticationFactory +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TCLIService, TOpenSessionReq} +import org.apache.kyuubi.shaded.thrift.protocol.TBinaryProtocol import org.apache.kyuubi.util.NamedThreadFactory /** @@ -74,9 +75,9 @@ final class KyuubiTHttpFrontendService( */ override def initialize(conf: KyuubiConf): Unit = synchronized { this.conf = conf - if (authFactory.isKerberosEnabled) { + if (authFactory.kerberosEnabled) { try { - authFactory.getValidPasswordAuthMethod + KyuubiAuthenticationFactory.getValidPasswordAuthMethod(authFactory.authTypes) } catch { case _: IllegalArgumentException => throw new AuthenticationException("Kerberos is not supported for thrift http mode") diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/PeriodicGCService.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/PeriodicGCService.scala index a4035b689d5..4ec6f4c127e 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/PeriodicGCService.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/PeriodicGCService.scala @@ -22,6 +22,7 @@ import java.util.concurrent.TimeUnit import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.service.AbstractService import org.apache.kyuubi.util.ThreadUtils +import org.apache.kyuubi.util.ThreadUtils.scheduleTolerableRunnableWithFixedDelay class PeriodicGCService(name: String) extends AbstractService(name) { def this() = this(classOf[PeriodicGCService].getSimpleName) @@ -40,6 +41,11 @@ class PeriodicGCService(name: String) extends AbstractService(name) { private def startGcTrigger(): Unit = { val interval = conf.get(KyuubiConf.SERVER_PERIODIC_GC_INTERVAL) - gcTrigger.scheduleWithFixedDelay(() => System.gc(), interval, interval, TimeUnit.MILLISECONDS) + scheduleTolerableRunnableWithFixedDelay( + gcTrigger, + () => System.gc(), + interval, + interval, + TimeUnit.MILLISECONDS) } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/ApiUtils.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/ApiUtils.scala index 5aaf4d7780f..49442160878 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/ApiUtils.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/ApiUtils.scala @@ -20,18 +20,41 @@ package org.apache.kyuubi.server.api import scala.collection.JavaConverters._ import org.apache.kyuubi.{Logging, Utils} -import org.apache.kyuubi.client.api.v1.dto.{OperationData, ServerData, SessionData} +import org.apache.kyuubi.client.api.v1.dto +import org.apache.kyuubi.client.api.v1.dto.{OperationData, OperationProgress, ServerData, SessionData} import org.apache.kyuubi.events.KyuubiOperationEvent import org.apache.kyuubi.ha.client.ServiceNodeInfo import org.apache.kyuubi.operation.KyuubiOperation import org.apache.kyuubi.session.KyuubiSession object ApiUtils extends Logging { + def sessionEvent(session: KyuubiSession): dto.KyuubiSessionEvent = { + session.getSessionEvent.map(event => + dto.KyuubiSessionEvent.builder() + .sessionId(event.sessionId) + .clientVersion(event.clientVersion) + .sessionType(event.sessionType) + .sessionName(event.sessionName) + .user(event.user) + .clientIp(event.clientIP) + .serverIp(event.serverIP) + .conf(event.conf.asJava) + .remoteSessionId(event.remoteSessionId) + .engineId(event.engineId) + .eventTime(event.eventTime) + .openedTime(event.openedTime) + .startTime(event.startTime) + .endTime(event.endTime) + .totalOperations(event.totalOperations) + .exception(event.exception.orNull) + .build()).orNull + } def sessionData(session: KyuubiSession): SessionData = { val sessionEvent = session.getSessionEvent new SessionData( session.handle.identifier.toString, + sessionEvent.map(_.remoteSessionId).getOrElse(""), session.user, session.ipAddress, session.conf.asJava, @@ -44,10 +67,45 @@ object ApiUtils extends Logging { sessionEvent.map(_.engineId).getOrElse("")) } + private def operationProgress(operation: KyuubiOperation): OperationProgress = { + Option(operation.getOperationJobProgress).map { jobProgress => + new OperationProgress( + jobProgress.getHeaderNames, + jobProgress.getRows, + jobProgress.getProgressedPercentage, + jobProgress.getStatus.toString, + jobProgress.getFooterSummary, + jobProgress.getStartTime) + }.orNull + } + + def operationEvent(operation: KyuubiOperation): dto.KyuubiOperationEvent = { + val opEvent = KyuubiOperationEvent(operation) + dto.KyuubiOperationEvent.builder() + .statementId(opEvent.statementId) + .remoteId(opEvent.remoteId) + .statement(opEvent.statement) + .shouldRunAsync(opEvent.shouldRunAsync) + .state(opEvent.state) + .eventTime(opEvent.eventTime) + .createTime(opEvent.createTime) + .startTime(opEvent.startTime) + .completeTime(opEvent.completeTime) + .exception(opEvent.exception.orNull) + .sessionId(opEvent.sessionId) + .sessionUser(opEvent.sessionUser) + .sessionType(opEvent.sessionType) + .kyuubiInstance(opEvent.kyuubiInstance) + .metrics(opEvent.metrics.asJava) + .progress(operationProgress(operation)) + .build() + } + def operationData(operation: KyuubiOperation): OperationData = { val opEvent = KyuubiOperationEvent(operation) new OperationData( opEvent.statementId, + opEvent.remoteId, opEvent.statement, opEvent.state, opEvent.createTime, @@ -58,7 +116,8 @@ object ApiUtils extends Logging { opEvent.sessionUser, opEvent.sessionType, operation.getSession.asInstanceOf[KyuubiSession].connectionUrl, - operation.metrics.asJava) + operation.metrics.asJava, + operationProgress(operation)) } def serverData(nodeInfo: ServiceNodeInfo): ServerData = { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/AdminResource.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/AdminResource.scala index 3c6f2a19782..ca35a1b6b12 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/AdminResource.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/AdminResource.scala @@ -29,7 +29,7 @@ import io.swagger.v3.oas.annotations.responses.ApiResponse import io.swagger.v3.oas.annotations.tags.Tag import org.apache.commons.lang3.StringUtils -import org.apache.kyuubi.{KYUUBI_VERSION, Logging, Utils} +import org.apache.kyuubi.{KYUUBI_VERSION, Logging} import org.apache.kyuubi.client.api.v1.dto._ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ @@ -40,13 +40,10 @@ import org.apache.kyuubi.operation.{KyuubiOperation, OperationHandle} import org.apache.kyuubi.server.KyuubiServer import org.apache.kyuubi.server.api.{ApiRequestContext, ApiUtils} import org.apache.kyuubi.session.{KyuubiSession, SessionHandle} -import org.apache.kyuubi.shaded.zookeeper.KeeperException.NoNodeException @Tag(name = "Admin") @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class AdminResource extends ApiRequestContext with Logging { - private lazy val administrators = fe.getConf.get(KyuubiConf.SERVER_ADMINISTRATORS) + - Utils.currentUser @ApiResponse( responseCode = "200", @@ -59,7 +56,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { val userName = fe.getSessionUser(Map.empty[String, String]) val ipAddress = fe.getIpAddress info(s"Receive refresh Kyuubi server hadoop conf request from $userName/$ipAddress") - if (!isAdministrator(userName)) { + if (!fe.isAdministrator(userName)) { throw new NotAllowedException( s"$userName is not allowed to refresh the Kyuubi server hadoop conf") } @@ -78,7 +75,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { val userName = fe.getSessionUser(Map.empty[String, String]) val ipAddress = fe.getIpAddress info(s"Receive refresh user defaults conf request from $userName/$ipAddress") - if (!isAdministrator(userName)) { + if (!fe.isAdministrator(userName)) { throw new NotAllowedException( s"$userName is not allowed to refresh the user defaults conf") } @@ -97,7 +94,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { val userName = fe.getSessionUser(Map.empty[String, String]) val ipAddress = fe.getIpAddress info(s"Receive refresh kubernetes conf request from $userName/$ipAddress") - if (!isAdministrator(userName)) { + if (!fe.isAdministrator(userName)) { throw new NotAllowedException( s"$userName is not allowed to refresh the kubernetes conf") } @@ -116,7 +113,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { val userName = fe.getSessionUser(Map.empty[String, String]) val ipAddress = fe.getIpAddress info(s"Receive refresh unlimited users request from $userName/$ipAddress") - if (!isAdministrator(userName)) { + if (!fe.isAdministrator(userName)) { throw new NotAllowedException( s"$userName is not allowed to refresh the unlimited users") } @@ -135,7 +132,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { val userName = fe.getSessionUser(Map.empty[String, String]) val ipAddress = fe.getIpAddress info(s"Receive refresh deny users request from $userName/$ipAddress") - if (!isAdministrator(userName)) { + if (!fe.isAdministrator(userName)) { throw new NotAllowedException( s"$userName is not allowed to refresh the deny users") } @@ -156,7 +153,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { val userName = fe.getSessionUser(Map.empty[String, String]) val ipAddress = fe.getIpAddress info(s"Received listing all live sessions request from $userName/$ipAddress") - if (!isAdministrator(userName)) { + if (!fe.isAdministrator(userName)) { throw new NotAllowedException( s"$userName is not allowed to list all live sessions") } @@ -178,7 +175,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { val userName = fe.getSessionUser(Map.empty[String, String]) val ipAddress = fe.getIpAddress info(s"Received closing a session request from $userName/$ipAddress") - if (!isAdministrator(userName)) { + if (!fe.isAdministrator(userName)) { throw new NotAllowedException( s"$userName is not allowed to close the session $sessionHandleStr") } @@ -202,7 +199,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { val userName = fe.getSessionUser(Map.empty[String, String]) val ipAddress = fe.getIpAddress info(s"Received listing all of the active operations request from $userName/$ipAddress") - if (!isAdministrator(userName)) { + if (!fe.isAdministrator(userName)) { throw new NotAllowedException( s"$userName is not allowed to list all the operations") } @@ -229,7 +226,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { val userName = fe.getSessionUser(Map.empty[String, String]) val ipAddress = fe.getIpAddress info(s"Received close an operation request from $userName/$ipAddress") - if (!isAdministrator(userName)) { + if (!fe.isAdministrator(userName)) { throw new NotAllowedException( s"$userName is not allowed to close the operation $operationHandleStr") } @@ -248,14 +245,16 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { @QueryParam("type") engineType: String, @QueryParam("sharelevel") shareLevel: String, @QueryParam("subdomain") subdomain: String, + @QueryParam("proxyUser") kyuubiProxyUser: String, @QueryParam("hive.server2.proxy.user") hs2ProxyUser: String): Response = { - val userName = if (isAdministrator(fe.getRealUser())) { - Option(hs2ProxyUser).getOrElse(fe.getRealUser()) + val activeProxyUser = Option(kyuubiProxyUser).getOrElse(hs2ProxyUser) + val userName = if (fe.isAdministrator(fe.getRealUser())) { + Option(activeProxyUser).getOrElse(fe.getRealUser()) } else { - fe.getSessionUser(hs2ProxyUser) + fe.getSessionUser(activeProxyUser) } - val engine = getEngine(userName, engineType, shareLevel, subdomain, "default") - val engineSpace = getEngineSpace(engine) + val engine = normalizeEngineInfo(userName, engineType, shareLevel, subdomain, "default") + val engineSpace = calculateEngineSpace(engine) withDiscoveryClient(fe.getConf) { discoveryClient => val engineNodes = discoveryClient.getChildren(engineSpace) @@ -286,86 +285,32 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { @QueryParam("type") engineType: String, @QueryParam("sharelevel") shareLevel: String, @QueryParam("subdomain") subdomain: String, - @QueryParam("hive.server2.proxy.user") hs2ProxyUser: String, - @QueryParam("all") @DefaultValue("false") all: String): Seq[Engine] = { - if (all.toBoolean) { - val userName = fe.getSessionUser(Map.empty[String, String]) - val ipAddress = fe.getIpAddress - info(s"Received list all kyuubi engine request from $userName/$ipAddress") - if (!isAdministrator(userName)) { - throw new NotAllowedException( - s"$userName is not allowed to list all kyuubi engine") - } - val engines = ListBuffer[Engine]() - val engineSpace = fe.getConf.get(HA_NAMESPACE) - val finalShareLevel = Option(shareLevel).getOrElse(fe.getConf.get(ENGINE_SHARE_LEVEL)) - val finalEngineType = Option(engineType).getOrElse(fe.getConf.get(ENGINE_TYPE)) - withDiscoveryClient(fe.getConf) { discoveryClient => - val commonParent = s"/${engineSpace}_${KYUUBI_VERSION}_${finalShareLevel}_$finalEngineType" - info(s"Listing engine nodes for $commonParent") - try { - discoveryClient.getChildren(commonParent).map { - user => - val engine = getEngine(user, finalEngineType, finalShareLevel, "", "") - val engineSpace = getEngineSpace(engine) - discoveryClient.getChildren(engineSpace).map { child => - info(s"Listing engine nodes for $engineSpace/$child") - engines ++= discoveryClient.getServiceNodesInfo(s"$engineSpace/$child").map(node => - new Engine( - engine.getVersion, - engine.getUser, - engine.getEngineType, - engine.getSharelevel, - node.namespace.split("/").last, - node.instance, - node.namespace, - node.attributes.asJava)) - } - } - } catch { - case nne: NoNodeException => - error( - s"No such engine for engine type: $finalEngineType," + - s" share level: $finalShareLevel", - nne) - throw new NotFoundException( - s"No such engine for engine type: $finalEngineType, share level: $finalShareLevel") - } - } - return engines.toSeq - } - val userName = if (isAdministrator(fe.getRealUser())) { - Option(hs2ProxyUser).getOrElse(fe.getRealUser()) + @QueryParam("proxyUser") kyuubiProxyUser: String, + @QueryParam("hive.server2.proxy.user") hs2ProxyUser: String): Seq[Engine] = { + val activeProxyUser = Option(kyuubiProxyUser).getOrElse(hs2ProxyUser) + val userName = if (fe.isAdministrator(fe.getRealUser())) { + Option(activeProxyUser).getOrElse(fe.getRealUser()) } else { - fe.getSessionUser(hs2ProxyUser) + fe.getSessionUser(activeProxyUser) } - val engine = getEngine(userName, engineType, shareLevel, subdomain, "") - val engineSpace = getEngineSpace(engine) + val engine = normalizeEngineInfo(userName, engineType, shareLevel, subdomain, "") + val engineSpace = calculateEngineSpace(engine) val engineNodes = ListBuffer[ServiceNodeInfo]() - Option(subdomain).filter(_.nonEmpty) match { - case Some(_) => - withDiscoveryClient(fe.getConf) { discoveryClient => - info(s"Listing engine nodes for $engineSpace") + withDiscoveryClient(fe.getConf) { discoveryClient => + Option(subdomain).filter(_.nonEmpty) match { + case Some(_) => + info(s"Listing engine nodes under $engineSpace") engineNodes ++= discoveryClient.getServiceNodesInfo(engineSpace) - } - case None => - withDiscoveryClient(fe.getConf) { discoveryClient => - try { - discoveryClient.getChildren(engineSpace).map { child => - info(s"Listing engine nodes for $engineSpace/$child") - engineNodes ++= discoveryClient.getServiceNodesInfo(s"$engineSpace/$child") - } - } catch { - case nne: NoNodeException => - error( - s"No such engine for user: $userName, " + - s"engine type: $engineType, share level: $shareLevel, subdomain: $subdomain", - nne) - throw new NotFoundException(s"No such engine for user: $userName, " + - s"engine type: $engineType, share level: $shareLevel, subdomain: $subdomain") + case None if discoveryClient.pathNonExists(engineSpace) => + warn(s"Path $engineSpace does not exist. user: $userName, engine type: $engineType, " + + s"share level: $shareLevel, subdomain: $subdomain") + case None => + discoveryClient.getChildren(engineSpace).map { child => + info(s"Listing engine nodes under $engineSpace/$child") + engineNodes ++= discoveryClient.getServiceNodesInfo(s"$engineSpace/$child") } - } + } } engineNodes.map(node => new Engine( @@ -394,7 +339,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { val userName = fe.getSessionUser(Map.empty[String, String]) val ipAddress = fe.getIpAddress info(s"Received list all live kyuubi servers request from $userName/$ipAddress") - if (!isAdministrator(userName)) { + if (!fe.isAdministrator(userName)) { throw new NotAllowedException( s"$userName is not allowed to list all live kyuubi servers") } @@ -409,7 +354,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { servers.toSeq } - private def getEngine( + private def normalizeEngineInfo( userName: String, engineType: String, shareLevel: String, @@ -422,6 +367,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { .foreach(_ => clonedConf.set(ENGINE_SHARE_LEVEL_SUBDOMAIN, Option(subdomain))) Option(shareLevel).filter(_.nonEmpty).foreach(clonedConf.set(ENGINE_SHARE_LEVEL, _)) + val serverSpace = clonedConf.get(HA_NAMESPACE) val normalizedEngineType = clonedConf.get(ENGINE_TYPE) val engineSubdomain = clonedConf.get(ENGINE_SHARE_LEVEL_SUBDOMAIN).getOrElse(subdomainDefault) val engineShareLevel = clonedConf.get(ENGINE_SHARE_LEVEL) @@ -433,22 +379,20 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { engineShareLevel, engineSubdomain, null, - null, + serverSpace, Collections.emptyMap()) } - private def getEngineSpace(engine: Engine): String = { - val serverSpace = fe.getConf.get(HA_NAMESPACE) - val appUser = engine.getSharelevel match { + private def calculateEngineSpace(engine: Engine): String = { + val userOrGroup = engine.getSharelevel match { case "GROUP" => fe.sessionManager.groupProvider.primaryGroup(engine.getUser, fe.getConf.getAll.asJava) case _ => engine.getUser } - DiscoveryPaths.makePath( - s"${serverSpace}_${engine.getVersion}_${engine.getSharelevel}_${engine.getEngineType}", - appUser, - engine.getSubdomain) + val engineSpace = + s"${engine.getNamespace}_${engine.getVersion}_${engine.getSharelevel}_${engine.getEngineType}" + DiscoveryPaths.makePath(engineSpace, userOrGroup, engine.getSubdomain) } @ApiResponse( @@ -466,7 +410,7 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { val userName = fe.getSessionUser(Map.empty[String, String]) val ipAddress = fe.getIpAddress info(s"Received counting batches request from $userName/$ipAddress") - if (!isAdministrator(userName)) { + if (!fe.isAdministrator(userName)) { throw new NotAllowedException( s"$userName is not allowed to count the batches") } @@ -475,8 +419,4 @@ private[v1] class AdminResource extends ApiRequestContext with Logging { .getOrElse(0) new Count(batchCount) } - - private def isAdministrator(userName: String): Boolean = { - administrators.contains(userName) - } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/BatchesResource.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/BatchesResource.scala index c0a3b0ed905..4e3f8d20b03 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/BatchesResource.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/BatchesResource.scala @@ -23,7 +23,6 @@ import java.util.{Collections, Locale, UUID} import java.util.concurrent.ConcurrentHashMap import javax.ws.rs._ import javax.ws.rs.core.MediaType -import javax.ws.rs.core.Response.Status import scala.collection.JavaConverters._ import scala.util.{Failure, Success, Try} @@ -58,6 +57,8 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { fe.getConf.get(BATCH_INTERNAL_REST_CLIENT_SOCKET_TIMEOUT).toInt private lazy val internalConnectTimeout = fe.getConf.get(BATCH_INTERNAL_REST_CLIENT_CONNECT_TIMEOUT).toInt + private lazy val internalSecurityEnabled = + fe.getConf.get(ENGINE_SECURITY_ENABLED) private def batchV2Enabled(reqConf: Map[String, String]): Boolean = { KyuubiServer.kyuubiServer.getConf.get(BATCH_SUBMITTER_ENABLED) && @@ -67,7 +68,12 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { private def getInternalRestClient(kyuubiInstance: String): InternalRestClient = { internalRestClients.computeIfAbsent( kyuubiInstance, - k => new InternalRestClient(k, internalSocketTimeout, internalConnectTimeout)) + kyuubiInstance => + new InternalRestClient( + kyuubiInstance, + internalSocketTimeout, + internalConnectTimeout, + internalSecurityEnabled)) } private def sessionManager = fe.be.sessionManager.asInstanceOf[KyuubiSessionManager] @@ -212,6 +218,8 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { } request.setBatchType(request.getBatchType.toUpperCase(Locale.ROOT)) + val userName = fe.getSessionUser(request.getConf.asScala.toMap) + val ipAddress = fe.getIpAddress val userProvidedBatchId = request.getConf.asScala.get(KYUUBI_BATCH_ID_KEY) userProvidedBatchId.foreach { batchId => try UUID.fromString(batchId) @@ -227,8 +235,6 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { case Some(batch) => markDuplicated(batch) case None => - val userName = fe.getSessionUser(request.getConf.asScala.toMap) - val ipAddress = fe.getIpAddress val batchId = userProvidedBatchId.getOrElse(UUID.randomUUID().toString) request.setConf( (request.getConf.asScala ++ Map( @@ -441,18 +447,7 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { description = "close and cancel a batch session") @DELETE @Path("{batchId}") - def closeBatchSession( - @PathParam("batchId") batchId: String, - @QueryParam("hive.server2.proxy.user") hs2ProxyUser: String): CloseBatchResponse = { - - def checkPermission(operator: String, owner: String): Unit = { - if (operator != owner) { - throw new WebApplicationException( - s"$operator is not allowed to close the session belong to $owner", - Status.METHOD_NOT_ALLOWED) - } - } - + def closeBatchSession(@PathParam("batchId") batchId: String): CloseBatchResponse = { def forceKill( appMgrInfo: ApplicationManagerInfo, batchId: String, @@ -465,16 +460,14 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { } val sessionHandle = formatSessionHandle(batchId) - val userName = fe.getSessionUser(hs2ProxyUser) - sessionManager.getBatchSession(sessionHandle).map { batchSession => - checkPermission(userName, batchSession.user) + fe.getSessionUser(batchSession.user) sessionManager.closeSession(batchSession.handle) val (killed, msg) = batchSession.batchJobSubmissionOp.getKillMessage new CloseBatchResponse(killed, msg) }.getOrElse { sessionManager.getBatchMetadata(batchId).map { metadata => - checkPermission(userName, metadata.username) + fe.getSessionUser(metadata.username) if (OperationState.isTerminal(OperationState.withName(metadata.state))) { new CloseBatchResponse(false, s"The batch[$metadata] has been terminated.") } else if (batchV2Enabled(metadata.requestConf) && metadata.state == "INITIALIZED" && @@ -485,21 +478,21 @@ private[v1] class BatchesResource extends ApiRequestContext with Logging { } else if (batchV2Enabled(metadata.requestConf) && metadata.kyuubiInstance == null) { // code goes here indicates metadata is outdated, recursively calls itself to refresh // the metadata - closeBatchSession(batchId, hs2ProxyUser) + closeBatchSession(batchId) } else if (metadata.kyuubiInstance != fe.connectionUrl) { info(s"Redirecting delete batch[$batchId] to ${metadata.kyuubiInstance}") val internalRestClient = getInternalRestClient(metadata.kyuubiInstance) try { - internalRestClient.deleteBatch(userName, batchId) + internalRestClient.deleteBatch(metadata.username, batchId) } catch { case e: KyuubiRestException => error(s"Error redirecting delete batch[$batchId] to ${metadata.kyuubiInstance}", e) - val (killed, msg) = forceKill(metadata.appMgrInfo, batchId, userName) + val (killed, msg) = forceKill(metadata.appMgrInfo, batchId, metadata.username) new CloseBatchResponse(killed, if (killed) msg else Utils.stringifyException(e)) } } else { // should not happen, but handle this for safe warn(s"Something wrong on deleting batch[$batchId], try forcibly killing application") - val (killed, msg) = forceKill(metadata.appMgrInfo, batchId, userName) + val (killed, msg) = forceKill(metadata.appMgrInfo, batchId, metadata.username) new CloseBatchResponse(killed, msg) } }.getOrElse { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/InternalRestClient.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/InternalRestClient.scala index 8b8a6151303..59d14dacd1e 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/InternalRestClient.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/InternalRestClient.scala @@ -33,10 +33,16 @@ import org.apache.kyuubi.service.authentication.InternalSecurityAccessor * @param socketTimeout the socket timeout for http client. * @param connectTimeout the connect timeout for http client. */ -class InternalRestClient(kyuubiInstance: String, socketTimeout: Int, connectTimeout: Int) { - require( - InternalSecurityAccessor.get() != null, - "Internal secure access across Kyuubi instances is not enabled") +class InternalRestClient( + kyuubiInstance: String, + socketTimeout: Int, + connectTimeout: Int, + securityEnabled: Boolean) { + if (securityEnabled) { + require( + InternalSecurityAccessor.get() != null, + "Internal secure access across Kyuubi instances is not enabled") + } private val internalBatchRestApi = new BatchRestApi(initKyuubiRestClient()) @@ -54,17 +60,19 @@ class InternalRestClient(kyuubiInstance: String, socketTimeout: Int, connectTime def deleteBatch(user: String, batchId: String): CloseBatchResponse = { withAuthUser(user) { - internalBatchRestApi.deleteBatch(batchId, null) + internalBatchRestApi.deleteBatch(batchId) } } private def initKyuubiRestClient(): KyuubiRestClient = { - KyuubiRestClient.builder(s"http://$kyuubiInstance") + val builder = KyuubiRestClient.builder(s"http://$kyuubiInstance") .apiVersion(KyuubiRestClient.ApiVersion.V1) .socketTimeout(socketTimeout) .connectionTimeout(connectTimeout) - .authHeaderGenerator(InternalRestClient.internalAuthHeaderGenerator) - .build() + if (securityEnabled) { + builder.authHeaderGenerator(InternalRestClient.internalAuthHeaderGenerator) + } + builder.build() } private def withAuthUser[T](user: String)(f: => T): T = { @@ -82,7 +90,7 @@ object InternalRestClient { override def initialValue(): String = null } - final val internalAuthHeaderGenerator = new AuthHeaderGenerator { + final lazy val internalAuthHeaderGenerator = new AuthHeaderGenerator { override def generateAuthHeader(): String = { val authUser = AUTH_USER.get() require(authUser != null, "The auth user shall be not null") diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/OperationsResource.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/OperationsResource.scala index fdde5bbc5b2..e7a15ab9293 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/OperationsResource.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/OperationsResource.scala @@ -26,13 +26,12 @@ import scala.util.control.NonFatal import io.swagger.v3.oas.annotations.media.{Content, Schema} import io.swagger.v3.oas.annotations.responses.ApiResponse import io.swagger.v3.oas.annotations.tags.Tag -import org.apache.hive.service.rpc.thrift._ import org.apache.kyuubi.{KyuubiSQLException, Logging} import org.apache.kyuubi.client.api.v1.dto._ -import org.apache.kyuubi.events.KyuubiOperationEvent import org.apache.kyuubi.operation.{FetchOrientation, KyuubiOperation, OperationHandle} import org.apache.kyuubi.server.api.{ApiRequestContext, ApiUtils} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ @Tag(name = "Operation") @Produces(Array(MediaType.APPLICATION_JSON)) @@ -54,7 +53,7 @@ private[v1] class OperationsResource extends ApiRequestContext with Logging { try { val opHandle = OperationHandle(operationHandleStr) val operation = fe.be.sessionManager.operationManager.getOperation(opHandle) - KyuubiOperationEvent(operation.asInstanceOf[KyuubiOperation]) + ApiUtils.operationEvent(operation.asInstanceOf[KyuubiOperation]) } catch { case NonFatal(e) => val errorMsg = "Error getting an operation event" diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/SessionsResource.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/SessionsResource.scala index 10a55786798..928bb207a1e 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/SessionsResource.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/api/v1/SessionsResource.scala @@ -28,7 +28,6 @@ import io.swagger.v3.oas.annotations.media.{ArraySchema, Content, Schema} import io.swagger.v3.oas.annotations.responses.ApiResponse import io.swagger.v3.oas.annotations.tags.Tag import org.apache.commons.lang3.StringUtils -import org.apache.hive.service.rpc.thrift.{TGetInfoType, TProtocolVersion} import org.apache.kyuubi.Logging import org.apache.kyuubi.client.api.v1.dto @@ -37,6 +36,7 @@ import org.apache.kyuubi.config.KyuubiReservedKeys._ import org.apache.kyuubi.operation.{KyuubiOperation, OperationHandle} import org.apache.kyuubi.server.api.{ApiRequestContext, ApiUtils} import org.apache.kyuubi.session.{KyuubiSession, SessionHandle} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TGetInfoType, TProtocolVersion} @Tag(name = "Session") @Produces(Array(MediaType.APPLICATION_JSON)) @@ -69,26 +69,7 @@ private[v1] class SessionsResource extends ApiRequestContext with Logging { @Path("{sessionHandle}") def sessionInfo(@PathParam("sessionHandle") sessionHandleStr: String): dto.KyuubiSessionEvent = { try { - sessionManager.getSession(sessionHandleStr) - .asInstanceOf[KyuubiSession].getSessionEvent.map(event => - dto.KyuubiSessionEvent.builder - .sessionId(event.sessionId) - .clientVersion(event.clientVersion) - .sessionType(event.sessionType) - .sessionName(event.sessionName) - .user(event.user) - .clientIp(event.clientIP) - .serverIp(event.serverIP) - .conf(event.conf.asJava) - .remoteSessionId(event.remoteSessionId) - .engineId(event.engineId) - .eventTime(event.eventTime) - .openedTime(event.openedTime) - .startTime(event.startTime) - .endTime(event.endTime) - .totalOperations(event.totalOperations) - .exception(event.exception.orNull) - .build).get + ApiUtils.sessionEvent(sessionManager.getSession(sessionHandleStr).asInstanceOf[KyuubiSession]) } catch { case NonFatal(e) => val errorMsg = s"Invalid $sessionHandleStr" diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/ThriftHttpServlet.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/ThriftHttpServlet.scala index bb9f1553d39..eb8fb2caa69 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/ThriftHttpServlet.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/ThriftHttpServlet.scala @@ -27,17 +27,17 @@ import javax.ws.rs.core.NewCookie import scala.collection.mutable import org.apache.hadoop.hive.shims.Utils -import org.apache.thrift.TProcessor -import org.apache.thrift.protocol.TProtocolFactory -import org.apache.thrift.server.TServlet import org.apache.kyuubi.Logging import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.FRONTEND_PROXY_HTTP_CLIENT_IP_HEADER import org.apache.kyuubi.server.http.authentication.AuthenticationFilter -import org.apache.kyuubi.server.http.authentication.AuthenticationHandler.AUTHORIZATION_HEADER import org.apache.kyuubi.server.http.util.{CookieSigner, HttpAuthUtils, SessionManager} +import org.apache.kyuubi.server.http.util.HttpAuthUtils.AUTHORIZATION_HEADER import org.apache.kyuubi.service.authentication.KyuubiAuthenticationFactory +import org.apache.kyuubi.shaded.thrift.TProcessor +import org.apache.kyuubi.shaded.thrift.protocol.TProtocolFactory +import org.apache.kyuubi.shaded.thrift.server.TServlet class ThriftHttpServlet( processor: TProcessor, @@ -136,7 +136,7 @@ class ThriftHttpServlet( } else SessionManager.setForwardedAddresses(List.empty[String]) // Generate new cookie and add it to the response - if (requireNewCookie && !authFactory.isNoSaslEnabled) { + if (requireNewCookie && !authFactory.noSaslEnabled) { val cookieToken = HttpAuthUtils.createCookieToken(clientUserName) val hs2Cookie = createCookie(signer.signCookie(cookieToken)) if (isHttpOnlyCookie) response.setHeader("SET-COOKIE", getHttpOnlyCookieHeader(hs2Cookie)) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationFilter.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationFilter.scala index 523d2490753..15b387607ea 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationFilter.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationFilter.scala @@ -27,12 +27,12 @@ import scala.collection.mutable import org.apache.kyuubi.Logging import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf.{AUTHENTICATION_METHOD, FRONTEND_PROXY_HTTP_CLIENT_IP_HEADER} +import org.apache.kyuubi.server.http.util.HttpAuthUtils.AUTHORIZATION_HEADER import org.apache.kyuubi.service.authentication.{AuthTypes, InternalSecurityAccessor} import org.apache.kyuubi.service.authentication.AuthTypes.{KERBEROS, NOSASL} class AuthenticationFilter(conf: KyuubiConf) extends Filter with Logging { import AuthenticationFilter._ - import AuthenticationHandler._ import AuthSchemes._ private[authentication] val authSchemeHandlers = diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationHandler.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationHandler.scala index bf2cb5bbecb..a0b3fb4ab37 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationHandler.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/AuthenticationHandler.scala @@ -20,13 +20,11 @@ package org.apache.kyuubi.server.http.authentication import javax.security.sasl.AuthenticationException import javax.servlet.http.{HttpServletRequest, HttpServletResponse} -import org.apache.hadoop.security.authentication.server.HttpConstants - import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.server.http.authentication.AuthSchemes.AuthScheme +import org.apache.kyuubi.server.http.util.HttpAuthUtils.AUTHORIZATION_HEADER trait AuthenticationHandler { - import AuthenticationHandler._ /** * HTTP header prefix used during the authentication sequence. @@ -103,23 +101,10 @@ trait AuthenticationHandler { authorization = authorization.stripPrefix(":").trim } // Authorization header must have a payload - if (authorization == null || authorization.isEmpty()) { + if (authorization == null || authorization.isEmpty) { throw new AuthenticationException( "Authorization header received from the client does not contain any data.") } authorization } } - -object AuthenticationHandler { - - /** - * HTTP header used by the SPNEGO server endpoint during an authentication sequence. - */ - final val WWW_AUTHENTICATE: String = HttpConstants.WWW_AUTHENTICATE_HEADER - - /** - * HTTP header used by the client endpoint during an authentication sequence. - */ - final val AUTHORIZATION_HEADER = "Authorization" -} diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/BasicAuthenticationHandler.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/BasicAuthenticationHandler.scala index 57ce2e60e8f..76560cabb55 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/BasicAuthenticationHandler.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/BasicAuthenticationHandler.scala @@ -24,12 +24,12 @@ import javax.servlet.http.{HttpServletRequest, HttpServletResponse} import org.apache.kyuubi.Logging import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.server.http.authentication.AuthSchemes.AuthScheme +import org.apache.kyuubi.server.http.util.HttpAuthUtils.{AUTHORIZATION_HEADER, WWW_AUTHENTICATE_HEADER} import org.apache.kyuubi.service.authentication.{AuthenticationProviderFactory, AuthMethods} import org.apache.kyuubi.service.authentication.AuthTypes._ class BasicAuthenticationHandler(basicAuthType: AuthType) extends AuthenticationHandler with Logging { - import AuthenticationHandler._ private var conf: KyuubiConf = _ private val allowAnonymous = basicAuthType == NOSASL || basicAuthType == NONE @@ -75,7 +75,7 @@ class BasicAuthenticationHandler(basicAuthType: AuthType) authUser = creds.take(1).headOption.filterNot(_.isEmpty).getOrElse("anonymous") } else { if (creds.size < 2 || creds(0).trim.isEmpty || creds(1).trim.isEmpty) { - response.setHeader(WWW_AUTHENTICATE, authScheme.toString) + response.setHeader(WWW_AUTHENTICATE_HEADER, authScheme.toString) response.setStatus(HttpServletResponse.SC_UNAUTHORIZED) } else { val Seq(user, password) = creds.toSeq.take(2) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/KerberosAuthenticationHandler.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/KerberosAuthenticationHandler.scala index 04603f30a41..7220e3906eb 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/KerberosAuthenticationHandler.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/KerberosAuthenticationHandler.scala @@ -27,15 +27,15 @@ import javax.servlet.ServletException import javax.servlet.http.{HttpServletRequest, HttpServletResponse} import org.apache.hadoop.security.authentication.util.KerberosName +import org.apache.hadoop.security.authentication.util.KerberosUtil._ import org.ietf.jgss.{GSSContext, GSSCredential, GSSManager, Oid} import org.apache.kyuubi.Logging import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.server.http.authentication.AuthSchemes.AuthScheme +import org.apache.kyuubi.server.http.util.HttpAuthUtils.{NEGOTIATE, WWW_AUTHENTICATE_HEADER} class KerberosAuthenticationHandler extends AuthenticationHandler with Logging { - import AuthenticationHandler._ - import AuthSchemes._ - import KerberosUtil._ private var gssManager: GSSManager = _ private var conf: KyuubiConf = _ @@ -143,7 +143,7 @@ class KerberosAuthenticationHandler extends AuthenticationHandler with Logging { val serverToken = gssContext.acceptSecContext(clientToken, 0, clientToken.length) if (serverToken != null && serverToken.nonEmpty) { val authenticate = Base64.getEncoder.encodeToString(serverToken) - response.setHeader(WWW_AUTHENTICATE, s"$NEGOTIATE $authenticate") + response.setHeader(WWW_AUTHENTICATE_HEADER, s"$NEGOTIATE $authenticate") } if (!gssContext.isEstablished) { response.setStatus(HttpServletResponse.SC_UNAUTHORIZED) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/KyuubiInternalAuthenticationHandler.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/KyuubiInternalAuthenticationHandler.scala index 7af6389ccee..d910f4a8396 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/KyuubiInternalAuthenticationHandler.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/authentication/KyuubiInternalAuthenticationHandler.scala @@ -17,17 +17,17 @@ package org.apache.kyuubi.server.http.authentication -import java.nio.charset.Charset +import java.nio.charset.StandardCharsets import java.util.Base64 import javax.servlet.http.{HttpServletRequest, HttpServletResponse} import org.apache.kyuubi.Logging import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.server.http.authentication.AuthSchemes.AuthScheme +import org.apache.kyuubi.server.http.util.HttpAuthUtils.WWW_AUTHENTICATE_HEADER import org.apache.kyuubi.service.authentication.InternalSecurityAccessor class KyuubiInternalAuthenticationHandler extends AuthenticationHandler with Logging { - import AuthenticationHandler._ private var conf: KyuubiConf = _ override val authScheme: AuthScheme = AuthSchemes.KYUUBI_INTERNAL @@ -48,10 +48,10 @@ class KyuubiInternalAuthenticationHandler extends AuthenticationHandler with Log val authorization = getAuthorization(request) val inputToken = Option(authorization).map(a => Base64.getDecoder.decode(a.getBytes())) .getOrElse(Array.empty[Byte]) - val creds = new String(inputToken, Charset.forName("UTF-8")).split(":") + val creds = new String(inputToken, StandardCharsets.UTF_8).split(":") if (creds.size < 2 || creds(0).trim.isEmpty || creds(1).trim.isEmpty) { - response.setHeader(WWW_AUTHENTICATE, authScheme.toString) + response.setHeader(WWW_AUTHENTICATE_HEADER, authScheme.toString) response.setStatus(HttpServletResponse.SC_UNAUTHORIZED) } else { val Seq(user, password) = creds.toSeq.take(2) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/util/HttpAuthUtils.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/util/HttpAuthUtils.scala index 7bb11747668..e840a307c47 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/util/HttpAuthUtils.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/http/util/HttpAuthUtils.scala @@ -17,19 +17,33 @@ package org.apache.kyuubi.server.http.util +import java.nio.charset.StandardCharsets import java.security.SecureRandom import java.util -import java.util.StringTokenizer +import java.util.{Base64, StringTokenizer} import scala.collection.mutable import org.apache.kyuubi.Logging object HttpAuthUtils extends Logging { - val WWW_AUTHENTICATE = "WWW-Authenticate" - val AUTHORIZATION = "Authorization" - val BASIC = "Basic" + // HTTP header used by the server endpoint during an authentication sequence. + val WWW_AUTHENTICATE_HEADER = "WWW-Authenticate" + // HTTP header used by the client endpoint during an authentication sequence. + val AUTHORIZATION_HEADER = "Authorization" + // HTTP header prefix used by the SPNEGO client/server endpoints during an + // authentication sequence. val NEGOTIATE = "Negotiate" + // HTTP header prefix used during the Basic authentication sequence. + val BASIC = "Basic" + // HTTP header prefix used during the Basic authentication sequence. + val DIGEST = "Digest" + + // RFC 7617: The 'Basic' HTTP Authentication Scheme + def basicAuthorizationHeader(userId: String, password: String = "none"): String = + "BASIC " + new String( + Base64.getEncoder.encode(s"$userId:$password".getBytes()), + StandardCharsets.UTF_8) private val COOKIE_ATTR_SEPARATOR = "&" private val COOKIE_CLIENT_USER_NAME = "cu" diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataManager.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataManager.scala index 1da9e1f3148..6dd0e76e0b3 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataManager.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/MetadataManager.scala @@ -32,6 +32,7 @@ import org.apache.kyuubi.server.metadata.api.{Metadata, MetadataFilter} import org.apache.kyuubi.service.AbstractService import org.apache.kyuubi.session.SessionType import org.apache.kyuubi.util.{ClassUtils, JdbcUtils, ThreadUtils} +import org.apache.kyuubi.util.ThreadUtils.scheduleTolerableRunnableWithFixedDelay class MetadataManager extends AbstractService("MetadataManager") { import MetadataManager._ @@ -209,7 +210,8 @@ class MetadataManager extends AbstractService("MetadataManager") { } } - metadataCleaner.scheduleWithFixedDelay( + scheduleTolerableRunnableWithFixedDelay( + metadataCleaner, cleanerTask, interval, interval, @@ -298,7 +300,9 @@ class MetadataManager extends AbstractService("MetadataManager") { } } } - requestsAsyncRetryTrigger.scheduleWithFixedDelay( + + scheduleTolerableRunnableWithFixedDelay( + requestsAsyncRetryTrigger, triggerTask, requestsRetryInterval, requestsRetryInterval, diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala index 9b1c89d779b..0a6d402296b 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala @@ -74,7 +74,7 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { JDBCMetadataStoreConf.getMetadataStoreJDBCDataSourceProperties(conf) private val hikariConfig = new HikariConfig(datasourceProperties) hikariConfig.setDriverClassName(driverClass) - hikariConfig.setJdbcUrl(conf.get(METADATA_STORE_JDBC_URL)) + hikariConfig.setJdbcUrl(getMetadataStoreJdbcUrl(conf)) hikariConfig.setUsername(conf.get(METADATA_STORE_JDBC_USER)) hikariConfig.setPassword(conf.get(METADATA_STORE_JDBC_PASSWORD)) hikariConfig.setPoolName("jdbc-metadata-store-pool") diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala index 96a5539fb27..e2b06541ddc 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala @@ -19,12 +19,26 @@ package org.apache.kyuubi.server.metadata.jdbc import java.util.Properties +import org.apache.kyuubi.Utils import org.apache.kyuubi.config.{ConfigEntry, KyuubiConf, OptionalConfigEntry} import org.apache.kyuubi.config.KyuubiConf.buildConf object JDBCMetadataStoreConf { final val METADATA_STORE_JDBC_DATASOURCE_PREFIX = "kyuubi.metadata.store.jdbc.datasource" + def getMetadataStoreJdbcUrl(conf: KyuubiConf): String = { + val rawJdbcUrl = conf.get(METADATA_STORE_JDBC_URL) + if (rawJdbcUrl.contains("")) { + rawJdbcUrl.replace( + "", + sys.env.getOrElse( + "KYUUBI_HOME", + Utils.getCodeSourceLocation(getClass).split("kyuubi-server").head)) + } else { + rawJdbcUrl + } + } + /** Get metadata store jdbc datasource properties. */ def getMetadataStoreJDBCDataSourceProperties(conf: KyuubiConf): Properties = { val datasourceProperties = new Properties() @@ -70,14 +84,14 @@ object JDBCMetadataStoreConf { val METADATA_STORE_JDBC_URL: ConfigEntry[String] = buildConf("kyuubi.metadata.store.jdbc.url") - .doc("The JDBC url for server JDBC metadata store. By default, it is a SQLite" + - " database url, and the state information is not shared across kyuubi instances. To" + - " enable high availability for multiple kyuubi instances," + - " please specify a production JDBC url.") + .doc("The JDBC url for server JDBC metadata store. By default, it is a SQLite database " + + "url, and the state information is not shared across Kyuubi instances. To enable high " + + "availability for multiple kyuubi instances, please specify a production JDBC url. " + + "Note: this value support the variables substitution: ``.") .version("1.6.0") .serverOnly .stringConf - .createWithDefault("jdbc:sqlite:kyuubi_state_store.db") + .createWithDefault("jdbc:sqlite:/kyuubi_state_store.db") val METADATA_STORE_JDBC_USER: ConfigEntry[String] = buildConf("kyuubi.metadata.store.jdbc.user") diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/mysql/MySQLCommandHandler.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/mysql/MySQLCommandHandler.scala index 5f7a07f5875..ad91335bf9c 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/mysql/MySQLCommandHandler.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/mysql/MySQLCommandHandler.scala @@ -25,7 +25,6 @@ import scala.concurrent.{ExecutionContext, ExecutionContextExecutor, Future} import scala.util.{Failure, Success} import io.netty.channel.{ChannelHandlerContext, SimpleChannelInboundHandler} -import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.{KyuubiSQLException, Logging} import org.apache.kyuubi.config.KyuubiReservedKeys._ @@ -35,6 +34,7 @@ import org.apache.kyuubi.server.mysql.MySQLCommandHandler._ import org.apache.kyuubi.server.mysql.constant.MySQLCtxAttrKey._ import org.apache.kyuubi.service.BackendService import org.apache.kyuubi.session.SessionHandle +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion object MySQLCommandHandler { val connIdCounter = new AtomicInteger diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/mysql/MySQLQueryResult.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/mysql/MySQLQueryResult.scala index 59371b923e9..b2bba52ca2f 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/mysql/MySQLQueryResult.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/mysql/MySQLQueryResult.scala @@ -19,9 +19,8 @@ package org.apache.kyuubi.server.mysql import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift._ - import org.apache.kyuubi.server.mysql.constant.MySQLDataType +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ object MySQLQueryResult { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/mysql/constant/MySQLDataType.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/mysql/constant/MySQLDataType.scala index a3b21fad84d..a1818bb486b 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/mysql/constant/MySQLDataType.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/mysql/constant/MySQLDataType.scala @@ -19,7 +19,7 @@ package org.apache.kyuubi.server.mysql.constant import java.sql.Types -import org.apache.hive.service.rpc.thrift.TTypeId +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId sealed abstract class MySQLDataType(val value: Int) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/Query.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/Query.scala index dc9de4ae2e0..83c4d8281fb 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/Query.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/Query.scala @@ -30,7 +30,6 @@ import scala.collection.mutable import Slug.Context.{EXECUTING_QUERY, QUEUED_QUERY} import com.google.common.hash.Hashing import io.trino.client.QueryResults -import org.apache.hive.service.rpc.thrift.{TBoolValue, TColumnDesc, TColumnValue, TGetResultSetMetadataResp, TPrimitiveTypeEntry, TProtocolVersion, TRow, TRowSet, TTableSchema, TTypeDesc, TTypeEntry, TTypeId} import org.apache.kyuubi.operation.{FetchOrientation, OperationHandle, OperationState, OperationStatus} import org.apache.kyuubi.operation.OperationState.{FINISHED, INITIALIZED, OperationState, PENDING} @@ -38,6 +37,7 @@ import org.apache.kyuubi.server.trino.api.Query.KYUUBI_SESSION_ID import org.apache.kyuubi.service.BackendService import org.apache.kyuubi.service.TFrontendService.OK_STATUS import org.apache.kyuubi.session.SessionHandle +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TBoolValue, TColumnDesc, TColumnValue, TGetResultSetMetadataResp, TPrimitiveTypeEntry, TProtocolVersion, TRow, TRowSet, TTableSchema, TTypeDesc, TTypeEntry, TTypeId} case class Query( queryId: QueryId, diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoContext.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoContext.scala index 842f0ceec73..fd364321d0d 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoContext.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/trino/api/TrinoContext.scala @@ -28,11 +28,11 @@ import scala.collection.JavaConverters._ import com.google.common.collect.ImmutableList import io.trino.client.{ClientStandardTypes, ClientTypeSignature, ClientTypeSignatureParameter, Column, NamedClientTypeSignature, QueryError, QueryResults, RowFieldName, StatementStats, Warning} import io.trino.client.ProtocolHeaders.TRINO_HEADERS -import org.apache.hive.service.rpc.thrift.{TCLIServiceConstants, TGetResultSetMetadataResp, TRowSet, TTypeEntry, TTypeId} import org.apache.kyuubi.operation.OperationState.FINISHED import org.apache.kyuubi.operation.OperationStatus import org.apache.kyuubi.server.trino.api.Query.KYUUBI_SESSION_ID +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TCLIServiceConstants, TGetResultSetMetadataResp, TRowSet, TTypeEntry, TTypeId} // TODO: Support replace `preparedStatement` for Trino-jdbc /** diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiBatchSession.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiBatchSession.scala index 8489e6d307b..531bbc3af87 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiBatchSession.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiBatchSession.scala @@ -19,8 +19,6 @@ package org.apache.kyuubi.session import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift.TProtocolVersion - import org.apache.kyuubi.client.util.BatchUtils._ import org.apache.kyuubi.config.{KyuubiConf, KyuubiReservedKeys} import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_BATCH_PRIORITY @@ -30,6 +28,7 @@ import org.apache.kyuubi.events.{EventBus, KyuubiSessionEvent} import org.apache.kyuubi.operation.OperationState import org.apache.kyuubi.server.metadata.api.Metadata import org.apache.kyuubi.session.SessionType.SessionType +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion class KyuubiBatchSession( user: String, @@ -81,12 +80,12 @@ class KyuubiBatchSession( sessionConf.getBatchConf(batchType) ++ sessionManager.validateBatchConf(conf) val optimizedConf: Map[String, String] = { - val confOverlay = sessionManager.sessionConfAdvisor.getConfOverlay( + val confOverlay = sessionManager.sessionConfAdvisor.map(_.getConfOverlay( user, - normalizedConf.asJava) + normalizedConf.asJava).asScala).reduce(_ ++ _) if (confOverlay != null) { val overlayConf = new KyuubiConf(false) - confOverlay.asScala.foreach { case (k, v) => overlayConf.set(k, v) } + confOverlay.foreach { case (k, v) => overlayConf.set(k, v) } normalizedConf ++ overlayConf.getBatchConf(batchType) } else { warn(s"the server plugin return null value for user: $user, ignore it") diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSession.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSession.scala index a4c345af39c..19f4039876b 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSession.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSession.scala @@ -17,13 +17,13 @@ package org.apache.kyuubi.session import com.codahale.metrics.MetricRegistry -import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.config.KyuubiReservedKeys.{KYUUBI_SESSION_CONNECTION_URL_KEY, KYUUBI_SESSION_REAL_USER_KEY} import org.apache.kyuubi.events.{EventBus, KyuubiSessionEvent} import org.apache.kyuubi.metrics.MetricsConstants.{CONN_OPEN, CONN_TOTAL} import org.apache.kyuubi.metrics.MetricsSystem import org.apache.kyuubi.session.SessionType.SessionType +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion abstract class KyuubiSession( protocol: TProtocolVersion, diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionImpl.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionImpl.scala index 6dd1810a8de..a5d160e0714 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionImpl.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionImpl.scala @@ -21,12 +21,11 @@ import java.util.Base64 import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift._ - import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.client.KyuubiSyncThriftClient import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.config.KyuubiConf._ +import org.apache.kyuubi.config.KyuubiConf.EngineOpenOnFailure._ import org.apache.kyuubi.config.KyuubiReservedKeys.{KYUUBI_ENGINE_CREDENTIALS_KEY, KYUUBI_SESSION_HANDLE_KEY, KYUUBI_SESSION_SIGN_PUBLICKEY, KYUUBI_SESSION_USER_SIGN} import org.apache.kyuubi.engine.{EngineRef, KyuubiApplicationManager} import org.apache.kyuubi.events.{EventBus, KyuubiSessionEvent} @@ -35,6 +34,8 @@ import org.apache.kyuubi.operation.{Operation, OperationHandle} import org.apache.kyuubi.operation.log.OperationLog import org.apache.kyuubi.service.authentication.InternalSecurityAccessor import org.apache.kyuubi.session.SessionType.SessionType +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ +import org.apache.kyuubi.shaded.thrift.transport.TTransportException import org.apache.kyuubi.sql.parser.server.KyuubiParser import org.apache.kyuubi.sql.plan.command.RunnableCommand import org.apache.kyuubi.util.SignUtils @@ -53,11 +54,11 @@ class KyuubiSessionImpl( override val sessionType: SessionType = SessionType.INTERACTIVE private[kyuubi] val optimizedConf: Map[String, String] = { - val confOverlay = sessionManager.sessionConfAdvisor.getConfOverlay( + val confOverlay = sessionManager.sessionConfAdvisor.map(_.getConfOverlay( user, - normalizedConf.asJava) + normalizedConf.asJava).asScala).reduce(_ ++ _) if (confOverlay != null) { - normalizedConf ++ confOverlay.asScala + normalizedConf ++ confOverlay } else { warn(s"the server plugin return null value for user: $user, ignore it") normalizedConf @@ -99,12 +100,12 @@ class KyuubiSessionImpl( sessionManager.getConf) } - private var _client: KyuubiSyncThriftClient = _ + @volatile private var _client: KyuubiSyncThriftClient = _ def client: KyuubiSyncThriftClient = _client - private var _engineSessionHandle: SessionHandle = _ + @volatile private var _engineSessionHandle: SessionHandle = _ - private var openSessionError: Option[Throwable] = None + @volatile private var openSessionError: Option[Throwable] = None override def open(): Unit = handleSessionException { traceMetricsOnOpen() @@ -141,10 +142,21 @@ class KyuubiSessionImpl( val maxAttempts = sessionManager.getConf.get(ENGINE_OPEN_MAX_ATTEMPTS) val retryWait = sessionManager.getConf.get(ENGINE_OPEN_RETRY_WAIT) + val openOnFailure = + EngineOpenOnFailure.withName(sessionManager.getConf.get(ENGINE_OPEN_ON_FAILURE)) var attempt = 0 var shouldRetry = true while (attempt <= maxAttempts && shouldRetry) { val (host, port) = engine.getOrCreate(discoveryClient, extraEngineLog) + + def deregisterEngine(): Unit = + try { + engine.deregister(discoveryClient, (host, port)) + } catch { + case e: Throwable => + warn(s"Error on de-registering engine [${engine.engineSpace} $host:$port]", e) + } + try { val passwd = if (sessionManager.getConf.get(ENGINE_SECURITY_ENABLED)) { @@ -159,7 +171,7 @@ class KyuubiSessionImpl( s" with ${_engineSessionHandle}]") shouldRetry = false } catch { - case e: org.apache.thrift.transport.TTransportException + case e: TTransportException if attempt < maxAttempts && e.getCause.isInstanceOf[java.net.ConnectException] && e.getCause.getMessage.contains("Connection refused") => warn( @@ -167,6 +179,10 @@ class KyuubiSessionImpl( s" $attempt/$maxAttempts times, retrying", e.getCause) Thread.sleep(retryWait) + openOnFailure match { + case DEREGISTER_IMMEDIATELY => deregisterEngine() + case _ => + } shouldRetry = true case e: Throwable => error( @@ -174,6 +190,10 @@ class KyuubiSessionImpl( s" for $user session failed", e) openSessionError = Some(e) + openOnFailure match { + case DEREGISTER_IMMEDIATELY | DEREGISTER_AFTER_RETRY => deregisterEngine() + case _ => + } throw e } finally { attempt += 1 @@ -290,7 +310,7 @@ class KyuubiSessionImpl( private val engineAliveTimeout = sessionConf.get(KyuubiConf.ENGINE_ALIVE_TIMEOUT) private val aliveProbeEnabled = sessionConf.get(KyuubiConf.ENGINE_ALIVE_PROBE_ENABLED) private val engineAliveMaxFailCount = sessionConf.get(KyuubiConf.ENGINE_ALIVE_MAX_FAILURES) - private var engineAliveFailCount = 0 + @volatile private var engineAliveFailCount = 0 def checkEngineConnectionAlive(): Boolean = { try { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala index 02a3ee32c7c..0696af74fa4 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionManager.scala @@ -23,7 +23,6 @@ import scala.collection.JavaConverters._ import com.codahale.metrics.MetricRegistry import com.google.common.annotations.VisibleForTesting -import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.KyuubiSQLException import org.apache.kyuubi.client.api.v1.dto.{Batch, BatchRequest} @@ -39,8 +38,10 @@ import org.apache.kyuubi.operation.{KyuubiOperationManager, OperationState} import org.apache.kyuubi.plugin.{GroupProvider, PluginLoader, SessionConfAdvisor} import org.apache.kyuubi.server.metadata.{MetadataManager, MetadataRequestsRetryRef} import org.apache.kyuubi.server.metadata.api.{Metadata, MetadataFilter} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion import org.apache.kyuubi.sql.parser.server.KyuubiParser import org.apache.kyuubi.util.{SignUtils, ThreadUtils} +import org.apache.kyuubi.util.ThreadUtils.scheduleTolerableRunnableWithFixedDelay class KyuubiSessionManager private (name: String) extends SessionManager(name) { @@ -58,7 +59,7 @@ class KyuubiSessionManager private (name: String) extends SessionManager(name) { if (conf.isRESTEnabled) Some(new MetadataManager()) else None // lazy is required for plugins since the conf is null when this class initialization - lazy val sessionConfAdvisor: SessionConfAdvisor = PluginLoader.loadSessionConfAdvisor(conf) + lazy val sessionConfAdvisor: Seq[SessionConfAdvisor] = PluginLoader.loadSessionConfAdvisor(conf) lazy val groupProvider: GroupProvider = PluginLoader.loadGroupProvider(conf) private var limiter: Option[SessionLimiter] = None @@ -396,20 +397,22 @@ class KyuubiSessionManager private (name: String) extends SessionManager(name) { private def startEngineAliveChecker(): Unit = { val interval = conf.get(KyuubiConf.ENGINE_ALIVE_PROBE_INTERVAL) val checkTask: Runnable = () => { - allSessions().foreach { session => - if (!session.asInstanceOf[KyuubiSessionImpl].checkEngineConnectionAlive()) { + allSessions().foreach { + case session: KyuubiSessionImpl => try { - closeSession(session.handle) - logger.info(s"The session ${session.handle} has been closed " + - s"due to engine unresponsiveness (checked by the engine alive checker).") + if (!session.checkEngineConnectionAlive()) { + closeSession(session.handle) + logger.info(s"The session ${session.handle} has been closed " + + s"due to engine unresponsiveness (checked by the engine alive checker).") + } } catch { - case e: KyuubiSQLException => - warn(s"Error closing session ${session.handle}", e) + case e: Throwable => warn(s"Error closing session ${session.handle}", e) } - } + case _ => } } - engineConnectionAliveChecker.scheduleWithFixedDelay( + scheduleTolerableRunnableWithFixedDelay( + engineConnectionAliveChecker, checkTask, interval, interval, diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/plan/command/DescribeSession.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/plan/command/DescribeSession.scala index 934aac9a2f0..e1d77f296e7 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/plan/command/DescribeSession.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/plan/command/DescribeSession.scala @@ -19,10 +19,9 @@ package org.apache.kyuubi.sql.plan.command import scala.collection.mutable.ListBuffer -import org.apache.hive.service.rpc.thrift.TTypeId - import org.apache.kyuubi.operation.IterableFetchIterator import org.apache.kyuubi.session.KyuubiSession +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId import org.apache.kyuubi.sql.schema.{Column, Row, Schema} /** diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/plan/command/RunnableCommand.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/plan/command/RunnableCommand.scala index deda7d0061f..cdfb515bd3a 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/plan/command/RunnableCommand.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/plan/command/RunnableCommand.scala @@ -17,13 +17,12 @@ package org.apache.kyuubi.sql.plan.command -import org.apache.hive.service.rpc.thrift.{TProtocolVersion, TRowSet} - import org.apache.kyuubi.operation.FetchIterator import org.apache.kyuubi.operation.FetchOrientation.{FETCH_FIRST, FETCH_NEXT, FETCH_PRIOR, FetchOrientation} import org.apache.kyuubi.session.KyuubiSession +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TProtocolVersion, TRowSet} import org.apache.kyuubi.sql.plan.KyuubiTreeNode -import org.apache.kyuubi.sql.schema.{Row, RowSetHelper, Schema} +import org.apache.kyuubi.sql.schema.{Row, Schema, ServerTRowSetGenerator} trait RunnableCommand extends KyuubiTreeNode { @@ -45,7 +44,7 @@ trait RunnableCommand extends KyuubiTreeNode { case FETCH_FIRST => iter.fetchAbsolute(0) } val taken = iter.take(rowSetSize) - val resultRowSet = RowSetHelper.toTRowSet( + val resultRowSet = new ServerTRowSetGenerator().toTRowSet( taken.toList, resultSchema, protocolVersion) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/Column.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/Column.scala index 5b71ffd44cf..0b27f5ce500 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/Column.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/Column.scala @@ -17,6 +17,6 @@ package org.apache.kyuubi.sql.schema -import org.apache.hive.service.rpc.thrift.TTypeId +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId case class Column(name: String, dataType: TTypeId, comment: Option[String] = None) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/RowSetHelper.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/RowSetHelper.scala deleted file mode 100644 index d76efaa4b8b..00000000000 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/RowSetHelper.scala +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.kyuubi.sql.schema - -import java.util - -import scala.collection.JavaConverters._ - -import org.apache.hive.service.rpc.thrift._ - -import org.apache.kyuubi.util.RowSetUtils._ - -object RowSetHelper { - - def toTRowSet( - rows: Seq[Row], - schema: Schema, - protocolVersion: TProtocolVersion): TRowSet = { - if (protocolVersion.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) { - toRowBasedSet(rows, schema) - } else { - toColumnBasedSet(rows, schema) - } - } - - def toRowBasedSet(rows: Seq[Row], schema: Schema): TRowSet = { - var i = 0 - val rowSize = rows.length - val tRows = new java.util.ArrayList[TRow](rowSize) - while (i < rowSize) { - val row = rows(i) - val tRow = new TRow() - var j = 0 - val columnSize = row.length - while (j < columnSize) { - val columnValue = toTColumnValue(j, row, schema) - tRow.addToColVals(columnValue) - j += 1 - } - i += 1 - tRows.add(tRow) - } - new TRowSet(0, tRows) - } - - private def toTColumnValue( - ordinal: Int, - row: Row, - types: Schema): TColumnValue = { - types(ordinal).dataType match { - case TTypeId.BOOLEAN_TYPE => - val boolValue = new TBoolValue - if (!row.isNullAt(ordinal)) boolValue.setValue(row.getBoolean(ordinal)) - TColumnValue.boolVal(boolValue) - - case TTypeId.BINARY_TYPE => - val byteValue = new TByteValue - if (!row.isNullAt(ordinal)) byteValue.setValue(row.getByte(ordinal)) - TColumnValue.byteVal(byteValue) - - case TTypeId.TINYINT_TYPE => - val tI16Value = new TI16Value - if (!row.isNullAt(ordinal)) tI16Value.setValue(row.getShort(ordinal)) - TColumnValue.i16Val(tI16Value) - - case TTypeId.INT_TYPE => - val tI32Value = new TI32Value - if (!row.isNullAt(ordinal)) tI32Value.setValue(row.getInt(ordinal)) - TColumnValue.i32Val(tI32Value) - - case TTypeId.BIGINT_TYPE => - val tI64Value = new TI64Value - if (!row.isNullAt(ordinal)) tI64Value.setValue(row.getLong(ordinal)) - TColumnValue.i64Val(tI64Value) - - case TTypeId.FLOAT_TYPE => - val tDoubleValue = new TDoubleValue - if (!row.isNullAt(ordinal)) { - val doubleValue = java.lang.Double.valueOf(row.getFloat(ordinal).toString) - tDoubleValue.setValue(doubleValue) - } - TColumnValue.doubleVal(tDoubleValue) - - case TTypeId.DOUBLE_TYPE => - val tDoubleValue = new TDoubleValue - if (!row.isNullAt(ordinal)) tDoubleValue.setValue(row.getDouble(ordinal)) - TColumnValue.doubleVal(tDoubleValue) - - case TTypeId.STRING_TYPE => - val tStringValue = new TStringValue - if (!row.isNullAt(ordinal)) tStringValue.setValue(row.getString(ordinal)) - TColumnValue.stringVal(tStringValue) - - case _ => - val tStrValue = new TStringValue - if (!row.isNullAt(ordinal)) { - tStrValue.setValue((row.get(ordinal), types(ordinal).dataType).toString()) - } - TColumnValue.stringVal(tStrValue) - } - } - - def toColumnBasedSet(rows: Seq[Row], schema: Schema): TRowSet = { - val rowSize = rows.length - val tRowSet = new TRowSet(0, new java.util.ArrayList[TRow](rowSize)) - var i = 0 - val columnSize = schema.length - while (i < columnSize) { - val field = schema(i) - val tColumn = toTColumn(rows, i, field.dataType) - tRowSet.addToColumns(tColumn) - i += 1 - } - tRowSet - } - - private def toTColumn(rows: Seq[Row], ordinal: Int, typ: TTypeId): TColumn = { - val nulls = new java.util.BitSet() - typ match { - case TTypeId.BOOLEAN_TYPE => - val values = getOrSetAsNull[java.lang.Boolean](rows, ordinal, nulls, true) - TColumn.boolVal(new TBoolColumn(values, nulls)) - - case TTypeId.BINARY_TYPE => - val values = getOrSetAsNull[java.lang.Byte](rows, ordinal, nulls, 0.toByte) - TColumn.byteVal(new TByteColumn(values, nulls)) - - case TTypeId.TINYINT_TYPE => - val values = getOrSetAsNull[java.lang.Short](rows, ordinal, nulls, 0.toShort) - TColumn.i16Val(new TI16Column(values, nulls)) - - case TTypeId.INT_TYPE => - val values = getOrSetAsNull[java.lang.Integer](rows, ordinal, nulls, 0) - TColumn.i32Val(new TI32Column(values, nulls)) - - case TTypeId.BIGINT_TYPE => - val values = getOrSetAsNull[java.lang.Long](rows, ordinal, nulls, 0L) - TColumn.i64Val(new TI64Column(values, nulls)) - - case TTypeId.FLOAT_TYPE => - val values = getOrSetAsNull[java.lang.Float](rows, ordinal, nulls, 0.toFloat) - .asScala.map(n => java.lang.Double.valueOf(n.toString)).asJava - TColumn.doubleVal(new TDoubleColumn(values, nulls)) - - case TTypeId.DOUBLE_TYPE => - val values = getOrSetAsNull[java.lang.Double](rows, ordinal, nulls, 0.toDouble) - TColumn.doubleVal(new TDoubleColumn(values, nulls)) - - case TTypeId.STRING_TYPE => - val values: util.List[String] = getOrSetAsNull[java.lang.String](rows, ordinal, nulls, "") - TColumn.stringVal(new TStringColumn(values, nulls)) - - case _ => - var i = 0 - val rowSize = rows.length - val values = new java.util.ArrayList[String](rowSize) - while (i < rowSize) { - val row = rows(i) - nulls.set(i, row.isNullAt(ordinal)) - val value = - if (row.isNullAt(ordinal)) { - "" - } else { - (row.get(ordinal), typ).toString() - } - values.add(value) - i += 1 - } - TColumn.stringVal(new TStringColumn(values, nulls)) - } - } - - private def getOrSetAsNull[T]( - rows: Seq[Row], - ordinal: Int, - nulls: java.util.BitSet, - defaultVal: T): java.util.List[T] = { - val size = rows.length - val ret = new java.util.ArrayList[T](size) - var idx = 0 - while (idx < size) { - val row = rows(idx) - val isNull = row.isNullAt(ordinal) - if (isNull) { - nulls.set(idx, true) - ret.add(idx, defaultVal) - } else { - ret.add(idx, row.getAs[T](ordinal)) - } - idx += 1 - } - ret - } - -} diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/SchemaHelper.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/SchemaHelper.scala index f9871ea9fb1..9ff356ccd6c 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/SchemaHelper.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/SchemaHelper.scala @@ -17,7 +17,7 @@ package org.apache.kyuubi.sql.schema -import org.apache.hive.service.rpc.thrift._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ object SchemaHelper { diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/ServerTRowSetGenerator.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/ServerTRowSetGenerator.scala new file mode 100644 index 00000000000..96294c6eb24 --- /dev/null +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/sql/schema/ServerTRowSetGenerator.scala @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.sql.schema + +import org.apache.kyuubi.engine.result.TRowSetGenerator +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TTypeId._ + +class ServerTRowSetGenerator + extends TRowSetGenerator[Schema, Row, TTypeId] { + + override def getColumnSizeFromSchemaType(schema: Schema): Int = schema.length + + override def getColumnType(schema: Schema, ordinal: Int): TTypeId = schema(ordinal).dataType + + override def isColumnNullAt(row: Row, ordinal: Int): Boolean = row.isNullAt(ordinal) + + override def getColumnAs[T](row: Row, ordinal: Int): T = row.getAs[T](ordinal) + + override def toTColumn(rows: Seq[Row], ordinal: Int, typ: TTypeId): TColumn = { + typ match { + case BOOLEAN_TYPE => asBooleanTColumn(rows, ordinal) + case BINARY_TYPE => asShortTColumn(rows, ordinal) + case TINYINT_TYPE => asShortTColumn(rows, ordinal) + case INT_TYPE => asIntegerTColumn(rows, ordinal) + case BIGINT_TYPE => asLongTColumn(rows, ordinal) + case FLOAT_TYPE => asFloatTColumn(rows, ordinal) + case DOUBLE_TYPE => asDoubleTColumn(rows, ordinal) + case STRING_TYPE => asStringTColumn(rows, ordinal) + case _ => + asStringTColumn( + rows, + ordinal, + convertFunc = (row, ordinal) => (row.get(ordinal), typ).toString()) + } + } + + override def toTColumnValue(row: Row, ordinal: Int, types: Schema): TColumnValue = { + getColumnType(types, ordinal) match { + case BOOLEAN_TYPE => asBooleanTColumnValue(row, ordinal) + case BINARY_TYPE => asByteTColumnValue(row, ordinal) + case TINYINT_TYPE => asShortTColumnValue(row, ordinal) + case INT_TYPE => asIntegerTColumnValue(row, ordinal) + case BIGINT_TYPE => asLongTColumnValue(row, ordinal) + case FLOAT_TYPE => asFloatTColumnValue(row, ordinal) + case DOUBLE_TYPE => asDoubleTColumnValue(row, ordinal) + case STRING_TYPE => asStringTColumnValue(row, ordinal) + case otherType => + asStringTColumnValue(row, ordinal, rawValue => (rawValue, otherType).toString()) + } + } + +} diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/util/KubernetesUtils.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/util/KubernetesUtils.scala index 9da3408a336..02b52f9266e 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/util/KubernetesUtils.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/util/KubernetesUtils.scala @@ -120,8 +120,8 @@ object KubernetesUtils extends Logging { opt2.foreach { _ => require(opt1.isEmpty, errMessage) } } - private def getResourceNamePrefix(appName: String, engineRefId: String): String = { - s"$appName-$engineRefId" + private def getResourceNamePrefix(appName: String, engineRefId: Option[String]): String = { + engineRefId.map(refId => s"$appName-$refId").getOrElse(appName) .trim .toLowerCase(Locale.ROOT) .replaceAll("[^a-z0-9\\-]", "-") @@ -130,21 +130,45 @@ object KubernetesUtils extends Logging { .replaceAll("^[0-9]", "x") } - def generateDriverPodName(appName: String, engineRefId: String): String = { - val resolvedResourceName = s"kyuubi-${getResourceNamePrefix(appName, engineRefId)}-driver" - if (resolvedResourceName.length <= DRIVER_POD_NAME_MAX_LENGTH) { - resolvedResourceName + def generateDriverPodName( + appName: String, + engineRefId: String, + forciblyRewrite: Boolean): String = { + val resourceNamePrefix = if (appName.contains(engineRefId)) { + getResourceNamePrefix(appName, None) + } else { + getResourceNamePrefix(appName, Some(engineRefId)) + } + val resolvedResourceName = if (resourceNamePrefix.startsWith("kyuubi-")) { + s"$resourceNamePrefix-driver" } else { + s"kyuubi-$resourceNamePrefix-driver" + } + if (forciblyRewrite || resolvedResourceName.length > DRIVER_POD_NAME_MAX_LENGTH) { s"kyuubi-$engineRefId-driver" + } else { + resolvedResourceName } } - def generateExecutorPodNamePrefix(appName: String, engineRefId: String): String = { - val resolvedResourceName = s"kyuubi-${getResourceNamePrefix(appName, engineRefId)}" - if (resolvedResourceName.length <= EXECUTOR_POD_NAME_PREFIX_MAX_LENGTH) { - resolvedResourceName + def generateExecutorPodNamePrefix( + appName: String, + engineRefId: String, + forciblyRewrite: Boolean): String = { + val resourceNamePrefix = if (appName.contains(engineRefId)) { + getResourceNamePrefix(appName, None) + } else { + getResourceNamePrefix(appName, Some(engineRefId)) + } + val resolvedResourceName = if (resourceNamePrefix.startsWith("kyuubi-")) { + s"$resourceNamePrefix" } else { + s"kyuubi-$resourceNamePrefix" + } + if (forciblyRewrite || resolvedResourceName.length > EXECUTOR_POD_NAME_PREFIX_MAX_LENGTH) { s"kyuubi-$engineRefId" + } else { + resolvedResourceName } } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/WithKyuubiServerOnYarn.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/WithKyuubiServerOnYarn.scala index 012f4df1608..5a674d98fd0 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/WithKyuubiServerOnYarn.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/WithKyuubiServerOnYarn.scala @@ -206,7 +206,7 @@ class KyuubiOperationYarnClusterSuite extends WithKyuubiServerOnYarn with HiveJD } val elapsedTime = System.currentTimeMillis() - startTime assert(elapsedTime < 60 * 1000) - assert(exception.getMessage contains "The engine application has been terminated.") + assert(exception.getMessage contains "Could not open client transport with JDBC Uri") } } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/config/AllKyuubiConfiguration.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/config/AllKyuubiConfiguration.scala index 8c0806ba3f5..75226a8b614 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/config/AllKyuubiConfiguration.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/config/AllKyuubiConfiguration.scala @@ -125,12 +125,12 @@ class AllKyuubiConfiguration extends KyuubiFunSuite { | override all settings in `$SPARK_HOME/conf/spark-defaults.conf`""" += "### Via JDBC Connection URL" += """ Setting them in the JDBC Connection URL - | supplies session-specific for each SQL engine. For example: - | ``` - |jdbc:hive2://localhost:10009/default;# - |spark.sql.shuffle.partitions=2;spark.executor.memory=5g - |``` - |""" += + | supplies session-specific for each SQL engine. For example:""" ++= + // scalastyle:off + """``` + |jdbc:hive2://localhost:10009/default;#spark.sql.shuffle.partitions=2;spark.executor.memory=5g + |```""" += + // scalastyle:on "" += "- **Runtime SQL Configuration**" += """ - For [Runtime SQL Configurations]( @@ -168,11 +168,14 @@ class AllKyuubiConfiguration extends KyuubiFunSuite { |```""" += """The below options in `kyuubi-defaults.conf` will set `parallelism.default: 2` | and `taskmanager.memory.process.size: 5g` into flink configurations.""" += - "### Via JDBC Connection URL" += - """Setting them in the JDBC Connection URL supplies session-specific - | for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default; - |#parallelism.default=2;taskmanager.memory.process.size=5g``` - |""" += + "### Via JDBC Connection URL" ++= + "Setting them in the JDBC Connection URL supplies session-specific for each SQL engine." + + " For example:" ++= + // scalastyle:off + """``` + | jdbc:hive2://localhost:10009/default;#flink.parallelism.default=2;flink.taskmanager.memory.process.size=5g + |```""" += + // scalastyle:on "### Via SET Statements" += """Please refer to the Flink official online documentation for [SET Statements] |(https://nightlies.apache.org/flink/flink-docs-stable/docs/dev/table/sql/set/)""" @@ -199,10 +202,14 @@ class AllKyuubiConfiguration extends KyuubiFunSuite { """The below options in `kyuubi-defaults.conf` will set `query_max_stage_count: 500` | and `parse_decimal_literals_as_double: true` into trino session properties.""" += "### Via JDBC Connection URL" += - """Setting them in the JDBC Connection URL supplies session-specific - | for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default; - |#trino.query_max_stage_count=500;trino.parse_decimal_literals_as_double=true``` + "Setting them in the JDBC Connection URL supplies session-specific for each SQL engine." + + " For example:" ++= + // scalastyle:off + """ ``` + | jdbc:hive2://localhost:10009/default;#trino.query_max_stage_count=500;trino.parse_decimal_literals_as_double=true + | ``` |""" += + // scalastyle:on "### Via SET Statements" += """Please refer to the Trino official online documentation for [SET Statements] |(https://trino.io/docs/current/sql/set-session.html)""" diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/JpsApplicationOperationSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/JpsApplicationOperationSuite.scala index a0914afcf0d..bdb0fa787fb 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/JpsApplicationOperationSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/JpsApplicationOperationSuite.scala @@ -83,6 +83,8 @@ class JpsApplicationOperationSuite extends KyuubiFunSuite { val desc1 = jps.getApplicationInfoByTag(ApplicationManagerInfo(None), id) assert(desc1.id != null) assert(desc1.name != null) + assert(!desc1.name.contains("org.apache.spark.launcher.Main")) + assert(desc1.name.contains("org.apache.spark.deploy.SparkSubmit")) assert(desc1.state == ApplicationState.RUNNING) val response = jps.killApplicationByTag(ApplicationManagerInfo(None), id) assert(response._1, response._2) diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/flink/FlinkProcessBuilderSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/flink/FlinkProcessBuilderSuite.scala index 26e355a87bd..84be010ed4b 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/flink/FlinkProcessBuilderSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/flink/FlinkProcessBuilderSuite.scala @@ -81,8 +81,11 @@ class FlinkProcessBuilderSuite extends KyuubiFunSuite { val actualCommands = builder.toString val classpathStr = constructClasspathStr(builder) val expectedCommands = - s"$javaPath -Xmx512m -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005 " + - s"-cp $classpathStr $mainClassStr \\\\\\n\\t--conf kyuubi.session.user=vinoyang $confStr" + s"""$javaPath \\\\ + |\\t-Xmx512m \\\\ + |\\t-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005 \\\\ + |\\t-cp $classpathStr $mainClassStr \\\\ + |\\t--conf kyuubi.session.user=vinoyang $confStr""".stripMargin val regex = new Regex(expectedCommands) val matcher = regex.pattern.matcher(actualCommands) assert(matcher.matches()) @@ -90,19 +93,20 @@ class FlinkProcessBuilderSuite extends KyuubiFunSuite { private def matchActualAndExpectedApplicationMode(builder: FlinkProcessBuilder): Unit = { val actualCommands = builder.toString + // scalastyle:off line.size.limit val expectedCommands = - escapePaths(s"${builder.flinkExecutable} run-application ") + - s"-t yarn-application " + - s"-Dyarn.ship-files=.*\\/flink-sql-client.*jar;.*\\/flink-sql-gateway.*jar;$tempUdfJar" + - s";.*\\/hive-site\\.xml " + - s"-Dyarn\\.application\\.name=kyuubi_.* " + - s"-Dyarn\\.tags=KYUUBI " + - s"-Dcontainerized\\.master\\.env\\.FLINK_CONF_DIR=\\. " + - s"-Dcontainerized\\.master\\.env\\.HIVE_CONF_DIR=\\. " + - s"-Dexecution.target=yarn-application " + - s"-c org\\.apache\\.kyuubi\\.engine\\.flink\\.FlinkSQLEngine " + - s".*kyuubi-flink-sql-engine_.*jar" + - s"(?: \\\\\\n\\t--conf \\S+=\\S+)+" + escapePaths( + s"""${builder.flinkExecutable} run-application \\\\ + |\\t-t yarn-application \\\\ + |\\t-Dyarn.ship-files=.*flink-sql-client.*jar;.*flink-sql-gateway.*jar;$tempUdfJar;.*hive-site.xml \\\\ + |\\t-Dyarn.application.name=kyuubi_.* \\\\ + |\\t-Dyarn.tags=KYUUBI \\\\ + |\\t-Dcontainerized.master.env.FLINK_CONF_DIR=. \\\\ + |\\t-Dcontainerized.master.env.HIVE_CONF_DIR=. \\\\ + |\\t-Dexecution.target=yarn-application \\\\ + |\\t-c org.apache.kyuubi.engine.flink.FlinkSQLEngine .*kyuubi-flink-sql-engine_.*jar""".stripMargin + + "(?: \\\\\\n\\t--conf \\S+=\\S+)+") + // scalastyle:on line.size.limit val regex = new Regex(expectedCommands) val matcher = regex.pattern.matcher(actualCommands) assert(matcher.matches()) diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilderSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilderSuite.scala index bb9884dfa4b..a2f39633ca4 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilderSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilderSuite.scala @@ -30,18 +30,18 @@ class HiveProcessBuilderSuite extends KyuubiFunSuite { override def env: Map[String, String] = super.env + (HIVE_HADOOP_CLASSPATH_KEY -> "/hadoop") } val commands = builder.toString.split('\n') - assert(commands.head.endsWith("bin/java"), "wrong exec") - assert(builder.toString.contains("--conf\nkyuubi.session.user=kyuubi")) + assert(commands.head.contains("bin/java"), "wrong exec") + assert(builder.toString.contains("--conf kyuubi.session.user=kyuubi")) assert(commands.exists(ss => ss.contains("kyuubi-hive-sql-engine")), "wrong classpath") - assert(builder.toString.contains("--conf\nkyuubi.on=off")) + assert(builder.toString.contains("--conf kyuubi.on=off")) } test("default engine memory") { val conf = KyuubiConf() .set(ENGINE_HIVE_EXTRA_CLASSPATH, "/hadoop") val builder = new HiveProcessBuilder("kyuubi", conf) - val commands = builder.toString.split('\n') - assert(commands.contains("-Xmx1g")) + val command = builder.toString + assert(command.contains("-Xmx1g")) } test("set engine memory") { @@ -49,8 +49,8 @@ class HiveProcessBuilderSuite extends KyuubiFunSuite { .set(ENGINE_HIVE_MEMORY, "5g") .set(ENGINE_HIVE_EXTRA_CLASSPATH, "/hadoop") val builder = new HiveProcessBuilder("kyuubi", conf) - val commands = builder.toString.split('\n') - assert(commands.contains("-Xmx5g")) + val command = builder.toString + assert(command.contains("-Xmx5g")) } test("set engine java opts") { @@ -60,8 +60,8 @@ class HiveProcessBuilderSuite extends KyuubiFunSuite { ENGINE_HIVE_JAVA_OPTIONS, "-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005") val builder = new HiveProcessBuilder("kyuubi", conf) - val commands = builder.toString.split('\n') - assert(commands.contains("-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005")) + val command = builder.toString + assert(command.contains("-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005")) } test("set engine extra classpath") { diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/jdbc/JdbcProcessBuilderSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/jdbc/JdbcProcessBuilderSuite.scala index f85e363d39e..2be39d0f319 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/jdbc/JdbcProcessBuilderSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/jdbc/JdbcProcessBuilderSuite.scala @@ -27,13 +27,13 @@ class JdbcProcessBuilderSuite extends KyuubiFunSuite { .set(ENGINE_JDBC_CONNECTION_URL.key, "") .set(ENGINE_JDBC_CONNECTION_PASSWORD.key, "123456") val builder = new JdbcProcessBuilder("kyuubi", conf) - val commands = builder.toString.split("\n") - assert(commands.head.endsWith("bin/java"), "wrong exec") - assert(builder.toString.contains("--conf\nkyuubi.session.user=kyuubi")) - assert(commands.exists(ss => ss.contains("kyuubi-jdbc-engine")), "wrong classpath") - assert(builder.toString.contains("--conf\nkyuubi.on=off")) - assert(builder.toString.contains( - "--conf\nkyuubi.engine.jdbc.connection.password=*********(redacted)")) + val command = builder.toString + assert(command.contains("bin/java"), "wrong exec") + assert(command.contains("--conf kyuubi.session.user=kyuubi")) + assert(command.contains("kyuubi-jdbc-engine"), "wrong classpath") + assert(command.contains("--conf kyuubi.on=off")) + assert(command.contains( + "--conf kyuubi.engine.jdbc.connection.password=*********(redacted)")) } test("capture error from jdbc process builder") { @@ -47,8 +47,8 @@ class JdbcProcessBuilderSuite extends KyuubiFunSuite { val conf = KyuubiConf() .set(ENGINE_JDBC_CONNECTION_URL.key, "") val builder = new JdbcProcessBuilder("kyuubi", conf) - val commands = builder.toString.split("\n") - assert(commands.contains("-Xmx1g")) + val command = builder.toString + assert(command.contains("-Xmx1g")) } test("set engine memory") { @@ -56,8 +56,8 @@ class JdbcProcessBuilderSuite extends KyuubiFunSuite { .set(ENGINE_JDBC_MEMORY, "5g") .set(ENGINE_JDBC_CONNECTION_URL.key, "") val builder = new JdbcProcessBuilder("kyuubi", conf) - val commands = builder.toString.split("\n") - assert(commands.contains("-Xmx5g")) + val command = builder.toString + assert(command.contains("-Xmx5g")) } test("set engine java options") { @@ -67,8 +67,8 @@ class JdbcProcessBuilderSuite extends KyuubiFunSuite { ENGINE_JDBC_JAVA_OPTIONS, "-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005") val builder = new JdbcProcessBuilder("kyuubi", conf) - val commands = builder.toString.split("\n") - assert(commands.contains("-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005")) + val command = builder.toString + assert(command.contains("-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005")) } test("set extra classpath") { @@ -76,7 +76,7 @@ class JdbcProcessBuilderSuite extends KyuubiFunSuite { .set(ENGINE_JDBC_CONNECTION_URL.key, "") .set(ENGINE_JDBC_EXTRA_CLASSPATH, "/dummy_classpath/*") val builder = new JdbcProcessBuilder("kyuubi", conf) - val commands = builder.toString - assert(commands.contains("/dummy_classpath/*")) + val command = builder.toString + assert(command.contains("/dummy_classpath/*")) } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/InitializeSQLSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/InitializeSQLSuite.scala index 10d662467cf..e119d980266 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/InitializeSQLSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/InitializeSQLSuite.scala @@ -19,19 +19,19 @@ package org.apache.kyuubi.engine.spark import org.apache.kyuubi.WithKyuubiServer import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.config.KyuubiConf.{ENGINE_INITIALIZE_SQL, ENGINE_SESSION_INITIALIZE_SQL} +import org.apache.kyuubi.config.KyuubiConf.{ENGINE_SESSION_SPARK_INITIALIZE_SQL, ENGINE_SPARK_INITIALIZE_SQL} import org.apache.kyuubi.operation.HiveJDBCTestHelper class InitializeSQLSuite extends WithKyuubiServer with HiveJDBCTestHelper { override protected val conf: KyuubiConf = { KyuubiConf() .set( - ENGINE_INITIALIZE_SQL.key, + ENGINE_SPARK_INITIALIZE_SQL.key, "CREATE DATABASE IF NOT EXISTS INIT_DB;" + "CREATE TABLE IF NOT EXISTS INIT_DB.test(a int) USING CSV;" + "INSERT OVERWRITE TABLE INIT_DB.test VALUES (1);") .set( - ENGINE_SESSION_INITIALIZE_SQL.key, + ENGINE_SESSION_SPARK_INITIALIZE_SQL.key, "CREATE DATABASE IF NOT EXISTS INIT_DB;" + "CREATE TABLE IF NOT EXISTS INIT_DB.test(a int) USING CSV;" + "INSERT INTO INIT_DB.test VALUES (2);") diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/PySparkTests.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/PySparkTests.scala index 16a7f728ea6..c723dcf4aa8 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/PySparkTests.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/PySparkTests.scala @@ -132,6 +132,61 @@ class PySparkTests extends WithKyuubiServer with HiveJDBCTestHelper { }) } + test("Support python magic syntax for python notebook") { + checkPythonRuntimeAndVersion() + withSessionConf()(Map(KyuubiConf.ENGINE_SPARK_PYTHON_MAGIC_ENABLED.key -> "true"))() { + withMultipleConnectionJdbcStatement()({ stmt => + val statement = stmt.asInstanceOf[KyuubiStatement] + statement.executePython("x = [[1, 'a'], [3, 'b']]") + + val resultSet1 = statement.executePython("%json x") + assert(resultSet1.next()) + val output1 = resultSet1.getString("output") + assert(output1 == "{\"application/json\":[[1,\"a\"],[3,\"b\"]]}") + + val resultSet2 = statement.executePython("%table x") + assert(resultSet2.next()) + val output2 = resultSet2.getString("output") + assert(output2 == "{\"application/vnd.livy.table.v1+json\":{" + + "\"headers\":[" + + "{\"name\":\"0\",\"type\":\"INT_TYPE\"},{\"name\":\"1\",\"type\":\"STRING_TYPE\"}" + + "]," + + "\"data\":[" + + "[1,\"a\"],[3,\"b\"]" + + "]}}") + + Seq("table", "json", "matplot").foreach { magic => + val e = intercept[KyuubiSQLException] { + statement.executePython(s"%$magic invalid_value") + }.getMessage + assert(e.contains("KeyError: 'invalid_value'")) + } + + statement.executePython("y = [[1, 2], [3, 'b']]") + var e = intercept[KyuubiSQLException] { + statement.executePython("%table y") + }.getMessage + assert(e.contains("table rows have different types")) + + e = intercept[KyuubiSQLException] { + statement.executePython("%magic_unknown") + }.getMessage + assert(e.contains("unknown magic command 'magic_unknown'")) + }) + } + + withSessionConf()(Map(KyuubiConf.ENGINE_SPARK_PYTHON_MAGIC_ENABLED.key -> "false"))() { + withMultipleConnectionJdbcStatement()({ stmt => + val statement = stmt.asInstanceOf[KyuubiStatement] + statement.executePython("x = [[1, 'a'], [3, 'b']]") + val e = intercept[KyuubiSQLException] { + statement.executePython("%json x") + }.getMessage + assert(e.contains("SyntaxError: invalid syntax")) + }) + } + } + private def runPySparkTest( pyCode: String, output: String): Unit = { diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala index 408f42f6404..8cbbed5af40 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilderSuite.scala @@ -28,13 +28,14 @@ import org.scalatestplus.mockito.MockitoSugar import org.apache.kyuubi._ import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.config.KyuubiConf.{ENGINE_LOG_TIMEOUT, ENGINE_SPARK_MAIN_RESOURCE} +import org.apache.kyuubi.config.KyuubiConf.{ENGINE_LOG_TIMEOUT, ENGINE_SPARK_MAIN_RESOURCE, KUBERNETES_FORCIBLY_REWRITE_DRIVER_POD_NAME, KUBERNETES_FORCIBLY_REWRITE_EXEC_POD_NAME_PREFIX} import org.apache.kyuubi.engine.ProcBuilder.KYUUBI_ENGINE_LOG_PATH_KEY import org.apache.kyuubi.engine.spark.SparkProcessBuilder._ import org.apache.kyuubi.ha.HighAvailabilityConf import org.apache.kyuubi.ha.client.AuthTypes import org.apache.kyuubi.service.ServiceUtils import org.apache.kyuubi.util.AssertionUtils._ +import org.apache.kyuubi.util.command.CommandLineUtils._ class SparkProcessBuilderSuite extends KerberizedTestHelper with MockitoSugar { private def conf = KyuubiConf().set("kyuubi.on", "off") @@ -336,6 +337,15 @@ class SparkProcessBuilderSuite extends KerberizedTestHelper with MockitoSugar { val conf4 = Map(APP_KEY -> chineseAppName) val driverPodName4 = processBuilder.appendPodNameConf(conf4).get(KUBERNETES_DRIVER_POD_NAME) assert(driverPodName4 === Some(s"kyuubi-test-$engineRefId-driver")) + val newProcessBuilder = new SparkProcessBuilder( + "kyuubi", + conf.set(MASTER_KEY, "k8s://internal").set(DEPLOY_MODE_KEY, "cluster").set( + KUBERNETES_FORCIBLY_REWRITE_DRIVER_POD_NAME, + true), + engineRefId) + val conf5 = Map(APP_KEY -> "test-forcibly-rewrite-app") + val driverPodName5 = newProcessBuilder.appendPodNameConf(conf5).get(KUBERNETES_DRIVER_POD_NAME) + assert(driverPodName5 === Some(s"kyuubi-$engineRefId-driver")) } test("[KYUUBI #5165] Test SparkProcessBuilder#appendExecutorPodPrefix") { @@ -363,6 +373,16 @@ class SparkProcessBuilderSuite extends KerberizedTestHelper with MockitoSugar { val execPodNamePrefix3 = processBuilder .appendPodNameConf(conf3).get(KUBERNETES_EXECUTOR_POD_NAME_PREFIX) assert(execPodNamePrefix3 === Some(s"kyuubi-$engineRefId")) + val newProcessBuilder = new SparkProcessBuilder( + "kyuubi", + conf.set(MASTER_KEY, "k8s://internal").set(DEPLOY_MODE_KEY, "cluster").set( + KUBERNETES_FORCIBLY_REWRITE_EXEC_POD_NAME_PREFIX, + true), + engineRefId) + val conf5 = Map(APP_KEY -> "test-forcibly-rewrite-app") + val execPodNamePrefix4 = newProcessBuilder + .appendPodNameConf(conf5).get(KUBERNETES_EXECUTOR_POD_NAME_PREFIX) + assert(execPodNamePrefix4 === Some(s"kyuubi-$engineRefId")) } test("extract spark core scala version") { @@ -404,9 +424,23 @@ class SparkProcessBuilderSuite extends KerberizedTestHelper with MockitoSugar { } } } + + test("default spark.yarn.maxAppAttempts conf in yarn mode") { + val conf1 = KyuubiConf(false) + conf1.set("spark.master", "k8s://test:12345") + val builder1 = new SparkProcessBuilder("", conf1) + val commands1 = builder1.toString.split(' ') + assert(!commands1.contains("spark.yarn.maxAppAttempts")) + + val conf2 = KyuubiConf(false) + conf2.set("spark.master", "yarn") + val builder2 = new SparkProcessBuilder("", conf2) + val commands2 = builder2.toString.split(' ') + assert(commands2.contains("spark.yarn.maxAppAttempts=1")) + } } class FakeSparkProcessBuilder(config: KyuubiConf) extends SparkProcessBuilder("fake", config) { - override protected lazy val commands: Array[String] = Array("ls") + override protected lazy val commands: Iterable[String] = Seq("ls") } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/trino/TrinoProcessBuilderSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/trino/TrinoProcessBuilderSuite.scala index 2c37c41bc4b..a4dfad186a1 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/trino/TrinoProcessBuilderSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/trino/TrinoProcessBuilderSuite.scala @@ -30,11 +30,11 @@ class TrinoProcessBuilderSuite extends KyuubiFunSuite { .set(ENGINE_TRINO_CONNECTION_CATALOG, "dummy_catalog") val builder = new TrinoProcessBuilder("kyuubi", conf) val commands = builder.toString.split("\n") - assert(commands.head.endsWith("java")) - assert(builder.toString.contains(s"--conf\n${KYUUBI_SESSION_USER_KEY}=kyuubi")) - assert(builder.toString.contains(s"--conf\n${ENGINE_TRINO_CONNECTION_URL.key}=dummy_url")) + assert(commands.head.contains("java")) + assert(builder.toString.contains(s"--conf ${KYUUBI_SESSION_USER_KEY}=kyuubi")) + assert(builder.toString.contains(s"--conf ${ENGINE_TRINO_CONNECTION_URL.key}=dummy_url")) assert(builder.toString.contains( - s"--conf\n${ENGINE_TRINO_CONNECTION_CATALOG.key}=dummy_catalog")) + s"--conf ${ENGINE_TRINO_CONNECTION_CATALOG.key}=dummy_catalog")) } test("capture error from trino process builder") { @@ -49,8 +49,8 @@ class TrinoProcessBuilderSuite extends KyuubiFunSuite { .set(ENGINE_TRINO_CONNECTION_URL, "dummy_url") .set(ENGINE_TRINO_CONNECTION_CATALOG, "dummy_catalog") val builder = new TrinoProcessBuilder("kyuubi", conf) - val commands = builder.toString.split("\n") - assert(commands.contains("-Xmx1g")) + val command = builder.toString + assert(command.contains("-Xmx1g")) } test("set engine memory") { @@ -59,8 +59,8 @@ class TrinoProcessBuilderSuite extends KyuubiFunSuite { .set(ENGINE_TRINO_CONNECTION_CATALOG, "dummy_catalog") .set(ENGINE_TRINO_MEMORY, "5g") val builder = new TrinoProcessBuilder("kyuubi", conf) - val commands = builder.toString.split("\n") - assert(commands.contains("-Xmx5g")) + val command = builder.toString + assert(command.contains("-Xmx5g")) } test("set engine java options") { @@ -71,8 +71,8 @@ class TrinoProcessBuilderSuite extends KyuubiFunSuite { ENGINE_TRINO_JAVA_OPTIONS, "-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005") val builder = new TrinoProcessBuilder("kyuubi", conf) - val commands = builder.toString.split("\n") - assert(commands.contains("-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005")) + val command = builder.toString + assert(command.contains("-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005")) } test("set extra classpath") { diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala index 1dc24aeec94..f78d68eaf71 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/events/handler/ServerJsonLoggingEventHandlerSuite.scala @@ -27,7 +27,6 @@ import scala.util.matching.Regex import com.fasterxml.jackson.databind.ObjectMapper import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} -import org.apache.hive.service.rpc.thrift.{TOpenSessionReq, TStatusCode} import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi._ @@ -40,6 +39,7 @@ import org.apache.kyuubi.operation.OperationState._ import org.apache.kyuubi.server.KyuubiServer import org.apache.kyuubi.service.ServiceState import org.apache.kyuubi.session.{KyuubiSessionManager, SessionType} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TOpenSessionReq, TStatusCode} class ServerJsonLoggingEventHandlerSuite extends WithKyuubiServer with HiveJDBCTestHelper with BatchTestHelper { diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiIncrementCollectSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiIncrementCollectSuite.scala index be7f0e80856..7e2890c42ee 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiIncrementCollectSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiIncrementCollectSuite.scala @@ -20,11 +20,11 @@ package org.apache.kyuubi.operation import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer -import org.apache.hive.service.rpc.thrift._ import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.WithKyuubiServer import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ class KyuubiIncrementCollectSuite extends WithKyuubiServer with HiveJDBCTestHelper { diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerConnectionSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerConnectionSuite.scala index 97ab21998b9..1324c70d775 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerConnectionSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerConnectionSuite.scala @@ -23,7 +23,6 @@ import java.util.Properties import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift._ import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.{KYUUBI_VERSION, WithKyuubiServer} @@ -35,6 +34,7 @@ import org.apache.kyuubi.jdbc.hive.{KyuubiConnection, KyuubiSQLException} import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.plugin.SessionConfAdvisor import org.apache.kyuubi.session.{KyuubiSessionImpl, KyuubiSessionManager, SessionHandle, SessionType} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift._ /** * UT with Connection level engine shared cost much time, only run basic jdbc tests. diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerUserSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerUserSuite.scala index a67534164bd..de491e03f21 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerUserSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiOperationPerUserSuite.scala @@ -20,7 +20,6 @@ package org.apache.kyuubi.operation import java.util.{Properties, UUID} import org.apache.hadoop.fs.{FileSystem, FileUtil, Path} -import org.apache.hive.service.rpc.thrift.{TExecuteStatementReq, TGetInfoReq, TGetInfoType, TStatusCode} import org.scalatest.time.SpanSugar._ import org.apache.kyuubi.{KYUUBI_VERSION, Utils, WithKyuubiServer, WithSimpleDFSService} @@ -30,6 +29,7 @@ import org.apache.kyuubi.jdbc.KyuubiHiveDriver import org.apache.kyuubi.jdbc.hive.{KyuubiConnection, KyuubiStatement} import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.session.{KyuubiSessionImpl, SessionHandle} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TExecuteStatementReq, TGetInfoReq, TGetInfoType, TStatusCode} import org.apache.kyuubi.util.SemanticVersion import org.apache.kyuubi.zookeeper.ZookeeperConf diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiRestAuthenticationSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiRestAuthenticationSuite.scala index 089b756f54f..260264b6797 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiRestAuthenticationSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/operation/KyuubiRestAuthenticationSuite.scala @@ -29,8 +29,8 @@ import org.apache.hadoop.security.UserGroupInformation import org.apache.kyuubi.RestClientTestHelper import org.apache.kyuubi.client.api.v1.dto.{SessionHandle, SessionOpenCount, SessionOpenRequest} import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.server.http.authentication.AuthenticationHandler.AUTHORIZATION_HEADER import org.apache.kyuubi.server.http.authentication.AuthSchemes +import org.apache.kyuubi.server.http.util.HttpAuthUtils._ import org.apache.kyuubi.service.authentication.InternalSecurityAccessor import org.apache.kyuubi.session.KyuubiSession @@ -52,13 +52,10 @@ class KyuubiRestAuthenticationSuite extends RestClientTestHelper { } test("test with LDAP authorization") { - val encodeAuthorization = new String( - Base64.getEncoder.encode( - s"$ldapUser:$ldapUserPasswd".getBytes()), - "UTF-8") + val response = webTarget.path("api/v1/sessions/count") .request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader(ldapUser, ldapUserPasswd)) .get() assert(HttpServletResponse.SC_OK == response.getStatus) @@ -67,13 +64,9 @@ class KyuubiRestAuthenticationSuite extends RestClientTestHelper { } test("test with CUSTOM authorization") { - val encodeAuthorization = new String( - Base64.getEncoder.encode( - s"$customUser:$customPasswd".getBytes()), - "UTF-8") val response = webTarget.path("api/v1/sessions/count") .request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader(customUser, customPasswd)) .get() assert(HttpServletResponse.SC_FORBIDDEN == response.getStatus) @@ -170,7 +163,7 @@ class KyuubiRestAuthenticationSuite extends RestClientTestHelper { "UTF-8") var response = webTarget.path("api/v1/sessions/count") .request() - .header(AUTHORIZATION_HEADER, s"${AuthSchemes.KYUUBI_INTERNAL.toString} $encodeAuthorization") + .header(AUTHORIZATION_HEADER, s"${AuthSchemes.KYUUBI_INTERNAL} $encodeAuthorization") .get() assert(HttpServletResponse.SC_OK == response.getStatus) @@ -183,7 +176,7 @@ class KyuubiRestAuthenticationSuite extends RestClientTestHelper { "UTF-8") response = webTarget.path("api/v1/sessions/count") .request() - .header(AUTHORIZATION_HEADER, s"${AuthSchemes.KYUUBI_INTERNAL.toString} $badAuthorization") + .header(AUTHORIZATION_HEADER, s"${AuthSchemes.KYUUBI_INTERNAL} $badAuthorization") .get() assert(HttpServletResponse.SC_UNAUTHORIZED == response.getStatus) diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/plugin/PluginLoaderSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/plugin/PluginLoaderSuite.scala index e24b79c2cb5..bd7f78e2423 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/plugin/PluginLoaderSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/plugin/PluginLoaderSuite.scala @@ -27,15 +27,15 @@ class PluginLoaderSuite extends KyuubiFunSuite { test("SessionConfAdvisor - wrong class") { val conf = new KyuubiConf(false) - assert(PluginLoader.loadSessionConfAdvisor(conf).isInstanceOf[DefaultSessionConfAdvisor]) + assert(PluginLoader.loadSessionConfAdvisor(conf).head.isInstanceOf[DefaultSessionConfAdvisor]) - conf.set(KyuubiConf.SESSION_CONF_ADVISOR, classOf[InvalidSessionConfAdvisor].getName) + conf.set(KyuubiConf.SESSION_CONF_ADVISOR, Seq(classOf[InvalidSessionConfAdvisor].getName)) val msg1 = intercept[KyuubiException] { PluginLoader.loadSessionConfAdvisor(conf) }.getMessage assert(msg1.contains(s"is not a child of '${classOf[SessionConfAdvisor].getName}'")) - conf.set(KyuubiConf.SESSION_CONF_ADVISOR, "non.exists") + conf.set(KyuubiConf.SESSION_CONF_ADVISOR, Seq("non.exists")) val msg2 = intercept[IllegalArgumentException] { PluginLoader.loadSessionConfAdvisor(conf) }.getMessage @@ -44,27 +44,46 @@ class PluginLoaderSuite extends KyuubiFunSuite { test("FileSessionConfAdvisor") { val conf = new KyuubiConf(false) - conf.set(KyuubiConf.SESSION_CONF_ADVISOR, classOf[FileSessionConfAdvisor].getName) + conf.set(KyuubiConf.SESSION_CONF_ADVISOR, Seq(classOf[FileSessionConfAdvisor].getName)) val advisor = PluginLoader.loadSessionConfAdvisor(conf) - val emptyConfig = advisor.getConfOverlay("chris", conf.getAll.asJava) + val emptyConfig = + advisor.map(_.getConfOverlay("chris", conf.getAll.asJava).asScala).reduce(_ ++ _).asJava assert(emptyConfig.isEmpty) conf.set(KyuubiConf.SESSION_CONF_PROFILE, "non.exists") - val nonExistsConfig = advisor.getConfOverlay("chris", conf.getAll.asJava) + val nonExistsConfig = + advisor.map(_.getConfOverlay("chris", conf.getAll.asJava).asScala).reduce(_ ++ _).asJava assert(nonExistsConfig.isEmpty) conf.set(KyuubiConf.SESSION_CONF_PROFILE, "cluster-a") - val clusterAConf = advisor.getConfOverlay("chris", conf.getAll.asJava) + val clusterAConf = + advisor.map(_.getConfOverlay("chris", conf.getAll.asJava).asScala).reduce(_ ++ _).asJava assert(clusterAConf.get("kyuubi.ha.namespace") == "kyuubi-ns-a") assert(clusterAConf.get("kyuubi.zk.ha.namespace") == null) assert(clusterAConf.size() == 5) - val clusterAConfFromCache = advisor.getConfOverlay("chris", conf.getAll.asJava) + val clusterAConfFromCache = + advisor.map(_.getConfOverlay("chris", conf.getAll.asJava).asScala).reduce(_ ++ _).asJava assert(clusterAConfFromCache.get("kyuubi.ha.namespace") == "kyuubi-ns-a") assert(clusterAConfFromCache.get("kyuubi.zk.ha.namespace") == null) assert(clusterAConfFromCache.size() == 5) } + test("SessionConfAdvisor - multi class") { + val conf = new KyuubiConf(false) + conf.set( + KyuubiConf.SESSION_CONF_ADVISOR, + Seq(classOf[FileSessionConfAdvisor].getName, classOf[TestSessionConfAdvisor].getName)) + val advisor = PluginLoader.loadSessionConfAdvisor(conf) + conf.set(KyuubiConf.SESSION_CONF_PROFILE, "cluster-a") + val clusterAConf = + advisor.map(_.getConfOverlay("chris", conf.getAll.asJava).asScala).reduce(_ ++ _).asJava + assert(clusterAConf.get("kyuubi.ha.namespace") == "kyuubi-ns-a") + assert(clusterAConf.get("kyuubi.zk.ha.namespace") == null) + assert(clusterAConf.get("spark.k3") == "v3") + assert(clusterAConf.size() == 7) + } + test("GroupProvider - wrong class") { val conf = new KyuubiConf(false) conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") @@ -99,3 +118,11 @@ class PluginLoaderSuite extends KyuubiFunSuite { class InvalidSessionConfAdvisor class InvalidGroupProvider + +class TestSessionConfAdvisor extends SessionConfAdvisor { + override def getConfOverlay( + user: String, + sessionConf: java.util.Map[String, String]): java.util.Map[String, String] = { + Map("spark.k3" -> "v3", "spark.k4" -> "v4").asJava + } +} diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/KyuubiTBinaryFrontendServiceSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/KyuubiTBinaryFrontendServiceSuite.scala index 5c54cbbb4b7..9b41fb067c3 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/KyuubiTBinaryFrontendServiceSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/KyuubiTBinaryFrontendServiceSuite.scala @@ -19,13 +19,13 @@ package org.apache.kyuubi.server import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift.{TOpenSessionReq, TSessionHandle} import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.{KyuubiFunSuite, Utils, WithKyuubiServer} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.operation.TClientTestUtils +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.{TOpenSessionReq, TSessionHandle} class KyuubiTBinaryFrontendServiceSuite extends WithKyuubiServer with KyuubiFunSuite { diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/AdminResourceSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/AdminResourceSuite.scala index ea87e3ea0d8..0951d82727c 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/AdminResourceSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/AdminResourceSuite.scala @@ -17,22 +17,21 @@ package org.apache.kyuubi.server.api.v1 -import java.nio.charset.StandardCharsets import java.time.Duration -import java.util.{Base64, UUID} +import java.util.UUID import javax.ws.rs.client.Entity -import javax.ws.rs.core.{GenericType, MediaType} +import javax.ws.rs.core.{GenericType, MediaType, Response} import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V2 import org.mockito.Mockito.lenient import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.scalatestplus.mockito.MockitoSugar.mock import org.apache.kyuubi.{KYUUBI_VERSION, KyuubiFunSuite, RestFrontendTestHelper, Utils} -import org.apache.kyuubi.client.api.v1.dto.{Engine, OperationData, ServerData, SessionData, SessionHandle, SessionOpenRequest} +import org.apache.kyuubi.client.api.v1.dto._ import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.config.KyuubiConf._ import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_CONNECTION_URL_KEY import org.apache.kyuubi.engine.{ApplicationManagerInfo, ApplicationState, EngineRef, KyuubiApplicationManager} import org.apache.kyuubi.engine.EngineType.SPARK_SQL @@ -42,22 +41,20 @@ import org.apache.kyuubi.ha.client.{DiscoveryPaths, ServiceDiscovery} import org.apache.kyuubi.ha.client.DiscoveryClientProvider.withDiscoveryClient import org.apache.kyuubi.plugin.PluginLoader import org.apache.kyuubi.server.KyuubiRestFrontendService -import org.apache.kyuubi.server.http.authentication.AuthenticationHandler.AUTHORIZATION_HEADER +import org.apache.kyuubi.server.http.util.HttpAuthUtils +import org.apache.kyuubi.server.http.util.HttpAuthUtils.AUTHORIZATION_HEADER +import org.apache.kyuubi.service.authentication.AnonymousAuthenticationProviderImpl +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V2 class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { private val engineMgr = new KyuubiApplicationManager() override protected lazy val conf: KyuubiConf = KyuubiConf() - .set(KyuubiConf.SERVER_ADMINISTRATORS, Set("admin001")) - .set(KyuubiConf.ENGINE_IDLE_TIMEOUT, Duration.ofMinutes(3).toMillis) - - private val encodeAuthorization: String = { - new String( - Base64.getEncoder.encode( - s"${Utils.currentUser}:".getBytes()), - StandardCharsets.UTF_8) - } + .set(AUTHENTICATION_METHOD, Set("CUSTOM")) + .set(AUTHENTICATION_CUSTOM_CLASS, classOf[AnonymousAuthenticationProviderImpl].getName) + .set(SERVER_ADMINISTRATORS, Set("admin001")) + .set(ENGINE_IDLE_TIMEOUT, Duration.ofMinutes(3).toMillis) override def beforeAll(): Unit = { super.beforeAll() @@ -74,70 +71,64 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { var response = webTarget.path("api/v1/admin/refresh/hadoop_conf") .request() .post(null) - assert(405 == response.getStatus) + assert(response.getStatus === 401) response = webTarget.path("api/v1/admin/refresh/hadoop_conf") .request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .post(null) - assert(200 == response.getStatus) + assert(response.getStatus === 200) - val admin001AuthHeader = new String( - Base64.getEncoder.encode("admin001".getBytes()), - StandardCharsets.UTF_8) response = webTarget.path("api/v1/admin/refresh/hadoop_conf") .request() - .header(AUTHORIZATION_HEADER, s"BASIC $admin001AuthHeader") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader("admin001")) .post(null) - assert(200 == response.getStatus) + assert(response.getStatus === 200) - val admin002AuthHeader = new String( - Base64.getEncoder.encode("admin002".getBytes()), - StandardCharsets.UTF_8) response = webTarget.path("api/v1/admin/refresh/hadoop_conf") .request() - .header(AUTHORIZATION_HEADER, s"BASIC $admin002AuthHeader") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader("admin002")) .post(null) - assert(405 == response.getStatus) + assert(response.getStatus === 405) } test("refresh user defaults config of the kyuubi server") { var response = webTarget.path("api/v1/admin/refresh/user_defaults_conf") .request() .post(null) - assert(405 == response.getStatus) + assert(response.getStatus === 401) response = webTarget.path("api/v1/admin/refresh/user_defaults_conf") .request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .post(null) - assert(200 == response.getStatus) + assert(response.getStatus === 200) } test("refresh unlimited users of the kyuubi server") { var response = webTarget.path("api/v1/admin/refresh/unlimited_users") .request() .post(null) - assert(405 == response.getStatus) + assert(response.getStatus === 401) response = webTarget.path("api/v1/admin/refresh/unlimited_users") .request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .post(null) - assert(200 == response.getStatus) + assert(response.getStatus === 200) } test("refresh deny users of the kyuubi server") { var response = webTarget.path("api/v1/admin/refresh/deny_users") .request() .post(null) - assert(405 == response.getStatus) + assert(response.getStatus === 401) response = webTarget.path("api/v1/admin/refresh/deny_users") .request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .post(null) - assert(200 == response.getStatus) + assert(response.getStatus === 200) } test("list/close sessions") { @@ -145,13 +136,15 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { var response = webTarget.path("api/v1/sessions") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .post(Entity.entity(requestObj, MediaType.APPLICATION_JSON_TYPE)) + assert(response.getStatus === 200) // get session list var response2 = webTarget.path("api/v1/admin/sessions").request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .get() - assert(200 == response2.getStatus) + assert(response2.getStatus === 200) val sessions1 = response2.readEntity(new GenericType[Seq[SessionData]]() {}) assert(sessions1.nonEmpty) assert(sessions1.head.getConf.get(KYUUBI_SESSION_CONNECTION_URL_KEY) === fe.connectionUrl) @@ -159,13 +152,13 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { // close an opened session val sessionHandle = response.readEntity(classOf[SessionHandle]).getIdentifier response = webTarget.path(s"api/v1/admin/sessions/$sessionHandle").request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .delete() - assert(200 == response.getStatus) + assert(response.getStatus === 200) // get session list again response2 = webTarget.path("api/v1/admin/sessions").request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .get() assert(200 == response2.getStatus) val sessions2 = response2.readEntity(classOf[Seq[SessionData]]) @@ -205,26 +198,26 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { var response = webTarget.path("api/v1/admin/sessions") .queryParam("users", "admin") .request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .get() var sessions = response.readEntity(classOf[Seq[SessionData]]) - assert(200 == response.getStatus) + assert(response.getStatus === 200) assert(sessions.size == 2) response = webTarget.path("api/v1/admin/sessions") .queryParam("users", "test_user_1,test_user_2") .request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .get() sessions = response.readEntity(classOf[Seq[SessionData]]) - assert(200 == response.getStatus) + assert(response.getStatus === 200) assert(sessions.size == 2) // list operations response = webTarget.path("api/v1/admin/operations") .queryParam("users", "test_user_1,test_user_2") .request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .get() var operations = response.readEntity(classOf[Seq[OperationData]]) assert(operations.size == 2) @@ -232,10 +225,10 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { response = webTarget.path("api/v1/admin/operations") .queryParam("sessionHandle", sessionHandle.identifier) .request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .get() operations = response.readEntity(classOf[Seq[OperationData]]) - assert(200 == response.getStatus) + assert(response.getStatus === 200) assert(operations.size == 1) } @@ -250,22 +243,22 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { // list operations var response = webTarget.path("api/v1/admin/operations").request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .get() - assert(200 == response.getStatus) + assert(response.getStatus === 200) var operations = response.readEntity(new GenericType[Seq[OperationData]]() {}) assert(operations.nonEmpty) assert(operations.map(op => op.getIdentifier).contains(operation.identifier.toString)) // close operation response = webTarget.path(s"api/v1/admin/operations/${operation.identifier}").request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .delete() - assert(200 == response.getStatus) + assert(response.getStatus === 200) // list again response = webTarget.path("api/v1/admin/operations").request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .get() operations = response.readEntity(new GenericType[Seq[OperationData]]() {}) assert(!operations.map(op => op.getIdentifier).contains(operation.identifier.toString)) @@ -297,10 +290,10 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { .queryParam("sharelevel", "USER") .queryParam("type", "spark_sql") .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .delete() - assert(200 == response.getStatus) + assert(response.getStatus === 200) assert(client.pathExists(engineSpace)) eventually(timeout(5.seconds), interval(100.milliseconds)) { assert(client.getChildren(engineSpace).isEmpty, s"refId same with $id?") @@ -343,10 +336,10 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { .queryParam("sharelevel", "GROUP") .queryParam("type", "spark_sql") .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .delete() - assert(200 == response.getStatus) + assert(response.getStatus === 200) assert(client.pathExists(engineSpace)) eventually(timeout(5.seconds), interval(100.milliseconds)) { assert(client.getChildren(engineSpace).isEmpty, s"refId same with $id?") @@ -387,10 +380,73 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { .queryParam("type", "spark_sql") .queryParam("subdomain", id) .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .delete() - assert(200 == response.getStatus) + assert(response.getStatus === 200) + } + } + + test("delete engine - user share level & proxyUser") { + val normalUser = "kyuubi" + + val id = UUID.randomUUID().toString + conf.set(KyuubiConf.ENGINE_SHARE_LEVEL, USER.toString) + conf.set(KyuubiConf.ENGINE_TYPE, SPARK_SQL.toString) + conf.set(KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_PORT, 0) + conf.set(HighAvailabilityConf.HA_NAMESPACE, "kyuubi_test") + conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") + + // In EngineRef, when use hive.server2.proxy.user or kyuubi.session.proxy.user + // the user is the proxyUser, and in our test it is normalUser + val engine = + new EngineRef(conf.clone, user = normalUser, PluginLoader.loadGroupProvider(conf), id, null) + + // so as the firstChild in engineSpace we use normalUser + val engineSpace = DiscoveryPaths.makePath( + s"kyuubi_test_${KYUUBI_VERSION}_USER_SPARK_SQL", + normalUser, + "default") + + withDiscoveryClient(conf) { client => + engine.getOrCreate(client) + + assert(client.pathExists(engineSpace)) + assert(client.getChildren(engineSpace).size == 1) + + def runDeleteEngine( + kyuubiProxyUser: Option[String], + hs2ProxyUser: Option[String]): Response = { + var internalWebTarget = webTarget.path("api/v1/admin/engine") + .queryParam("sharelevel", "USER") + .queryParam("type", "SPARK_SQL") + + kyuubiProxyUser.map(username => + internalWebTarget = internalWebTarget.queryParam("proxyUser", username)) + hs2ProxyUser.map(username => + internalWebTarget = internalWebTarget.queryParam("hive.server2.proxy.user", username)) + + internalWebTarget.request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader("anonymous")) + .delete() + } + + // use proxyUser + val deleteEngineResponse1 = runDeleteEngine(Option(normalUser), None) + assert(deleteEngineResponse1.getStatus === 405) + val errorMessage = s"Failed to validate proxy privilege of anonymous for $normalUser" + assert(deleteEngineResponse1.readEntity(classOf[String]).contains(errorMessage)) + + // it should be the same behavior as hive.server2.proxy.user + val deleteEngineResponse2 = runDeleteEngine(None, Option(normalUser)) + assert(deleteEngineResponse2.getStatus === 405) + assert(deleteEngineResponse2.readEntity(classOf[String]).contains(errorMessage)) + + // when both set, proxyUser takes precedence + val deleteEngineResponse3 = + runDeleteEngine(Option(normalUser), Option(s"${normalUser}HiveServer2")) + assert(deleteEngineResponse3.getStatus === 405) + assert(deleteEngineResponse3.readEntity(classOf[String]).contains(errorMessage)) } } @@ -419,10 +475,10 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { val response = webTarget.path("api/v1/admin/engine") .queryParam("type", "spark_sql") .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .get - assert(200 == response.getStatus) + assert(response.getStatus === 200) val engines = response.readEntity(new GenericType[Seq[Engine]]() {}) assert(engines.size == 1) assert(engines(0).getEngineType == "SPARK_SQL") @@ -465,10 +521,10 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { val response = webTarget.path("api/v1/admin/engine") .queryParam("type", "spark_sql") .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .get - assert(200 == response.getStatus) + assert(response.getStatus === 200) val engines = response.readEntity(new GenericType[Seq[Engine]]() {}) assert(engines.size == 1) assert(engines(0).getEngineType == "SPARK_SQL") @@ -524,9 +580,9 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { val response = webTarget.path("api/v1/admin/engine") .queryParam("type", "spark_sql") .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .get - assert(200 == response.getStatus) + assert(response.getStatus === 200) val result = response.readEntity(new GenericType[Seq[Engine]]() {}) assert(result.size == 2) @@ -534,7 +590,7 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { .queryParam("type", "spark_sql") .queryParam("subdomain", id1) .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .get assert(200 == response1.getStatus) val result1 = response1.readEntity(new GenericType[Seq[Engine]]() {}) @@ -552,53 +608,25 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { } } - test("list server") { - // Mock Kyuubi Server - val serverDiscovery = mock[ServiceDiscovery] - lenient.when(serverDiscovery.fe).thenReturn(fe) - val namespace = conf.get(HighAvailabilityConf.HA_NAMESPACE) - withDiscoveryClient(conf) { client => - client.registerService(conf, namespace, serverDiscovery) - - val response = webTarget.path("api/v1/admin/server") - .request() - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") - .get - - assert(200 == response.getStatus) - val result = response.readEntity(new GenericType[Seq[ServerData]]() {}) - assert(result.size == 1) - val testServer = result.head - val restFrontendService = fe.asInstanceOf[KyuubiRestFrontendService] - - assert(namespace.equals(testServer.getNamespace.replaceFirst("/", ""))) - assert(restFrontendService.host.equals(testServer.getHost)) - assert(restFrontendService.connectionUrl.equals(testServer.getInstance())) - assert(!testServer.getAttributes.isEmpty) - val attributes = testServer.getAttributes - assert(attributes.containsKey("serviceUri") && - attributes.get("serviceUri").equals(fe.connectionUrl)) - assert(attributes.containsKey("version")) - assert(attributes.containsKey("sequence")) - assert("Running".equals(testServer.getStatus)) - } - } + test("list engine - user share level & proxyUser") { + val normalUser = "kyuubi" - test("list all engine - user share level") { val id = UUID.randomUUID().toString conf.set(KyuubiConf.ENGINE_SHARE_LEVEL, USER.toString) conf.set(KyuubiConf.ENGINE_TYPE, SPARK_SQL.toString) conf.set(KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_PORT, 0) conf.set(HighAvailabilityConf.HA_NAMESPACE, "kyuubi_test") - conf.set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 180000L) conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") + // In EngineRef, when use hive.server2.proxy.user or kyuubi.session.proxy.user + // the user is the proxyUser, and in our test it is normalUser val engine = - new EngineRef(conf.clone, Utils.currentUser, PluginLoader.loadGroupProvider(conf), id, null) + new EngineRef(conf.clone, user = normalUser, PluginLoader.loadGroupProvider(conf), id, null) + // so as the firstChild in engineSpace we use normalUser val engineSpace = DiscoveryPaths.makePath( s"kyuubi_test_${KYUUBI_VERSION}_USER_SPARK_SQL", - Utils.currentUser, + normalUser, "") withDiscoveryClient(conf) { client => @@ -607,131 +635,71 @@ class AdminResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { assert(client.pathExists(engineSpace)) assert(client.getChildren(engineSpace).size == 1) - val response = webTarget.path("api/v1/admin/engine") - .queryParam("all", "true") - .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") - .get - - assert(200 == response.getStatus) - val engines = response.readEntity(new GenericType[Seq[Engine]]() {}) - assert(engines.size == 1) - assert(engines(0).getEngineType == "SPARK_SQL") - assert(engines(0).getSharelevel == "USER") - assert(engines(0).getSubdomain == "default") - - // kill the engine application - engineMgr.killApplication(ApplicationManagerInfo(None), id) - eventually(timeout(30.seconds), interval(100.milliseconds)) { - assert(engineMgr.getApplicationInfo(ApplicationManagerInfo(None), id).exists( - _.state == ApplicationState.NOT_FOUND)) + def runListEngine(kyuubiProxyUser: Option[String], hs2ProxyUser: Option[String]): Response = { + var internalWebTarget = webTarget.path("api/v1/admin/engine") + .queryParam("sharelevel", "USER") + .queryParam("type", "SPARK_SQL") + + kyuubiProxyUser.map { username => + internalWebTarget = internalWebTarget.queryParam("proxyUser", username) + } + hs2ProxyUser.map { username => + internalWebTarget = internalWebTarget.queryParam("hive.server2.proxy.user", username) + } + + internalWebTarget.request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader("anonymous")) + .get } - } - } - - test("list all engines - group share level") { - val id = UUID.randomUUID().toString - conf.set(KyuubiConf.ENGINE_SHARE_LEVEL, GROUP.toString) - conf.set(KyuubiConf.ENGINE_TYPE, SPARK_SQL.toString) - conf.set(KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_PORT, 0) - conf.set(HighAvailabilityConf.HA_NAMESPACE, "kyuubi_test") - conf.set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 180000L) - conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") - - val engine = - new EngineRef(conf.clone, Utils.currentUser, PluginLoader.loadGroupProvider(conf), id, null) - - val engineSpace = DiscoveryPaths.makePath( - s"kyuubi_test_${KYUUBI_VERSION}_GROUP_SPARK_SQL", - fe.asInstanceOf[KyuubiRestFrontendService].sessionManager.groupProvider.primaryGroup( - Utils.currentUser, - null), - "") - withDiscoveryClient(conf) { client => - engine.getOrCreate(client) - - assert(client.pathExists(engineSpace)) - assert(client.getChildren(engineSpace).size == 1) - - val response = webTarget.path("api/v1/admin/engine") - .queryParam("all", "true") - .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") - .get - - assert(200 == response.getStatus) - val engines = response.readEntity(new GenericType[Seq[Engine]]() {}) - assert(engines.size == 1) - assert(engines(0).getEngineType == "SPARK_SQL") - assert(engines(0).getSharelevel == "GROUP") - assert(engines(0).getSubdomain == "default") - - // kill the engine application - engineMgr.killApplication(ApplicationManagerInfo(None), id) - eventually(timeout(30.seconds), interval(100.milliseconds)) { - assert(engineMgr.getApplicationInfo(ApplicationManagerInfo(None), id).exists( - _.state == ApplicationState.NOT_FOUND)) - } + // use proxyUser + val listEngineResponse1 = runListEngine(Option(normalUser), None) + assert(listEngineResponse1.getStatus === 405) + val errorMessage = s"Failed to validate proxy privilege of anonymous for $normalUser" + assert(listEngineResponse1.readEntity(classOf[String]).contains(errorMessage)) + + // it should be the same behavior as hive.server2.proxy.user + val listEngineResponse2 = runListEngine(None, Option(normalUser)) + assert(listEngineResponse2.getStatus === 405) + assert(listEngineResponse2.readEntity(classOf[String]).contains(errorMessage)) + + // when both set, proxyUser takes precedence + val listEngineResponse3 = + runListEngine(Option(normalUser), Option(s"${normalUser}HiveServer2")) + assert(listEngineResponse3.getStatus === 405) + assert(listEngineResponse3.readEntity(classOf[String]).contains(errorMessage)) } } - test("list all engines - connection share level") { - conf.set(KyuubiConf.ENGINE_SHARE_LEVEL, CONNECTION.toString) - conf.set(KyuubiConf.ENGINE_TYPE, SPARK_SQL.toString) - conf.set(KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_PORT, 0) - conf.set(HighAvailabilityConf.HA_NAMESPACE, "kyuubi_test") - conf.set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 180000L) - conf.set(KyuubiConf.GROUP_PROVIDER, "hadoop") - - val engineSpace = DiscoveryPaths.makePath( - s"kyuubi_test_${KYUUBI_VERSION}_CONNECTION_SPARK_SQL", - Utils.currentUser, - "") - - val id1 = UUID.randomUUID().toString - val engine1 = - new EngineRef(conf.clone, Utils.currentUser, PluginLoader.loadGroupProvider(conf), id1, null) - val engineSpace1 = DiscoveryPaths.makePath( - s"kyuubi_test_${KYUUBI_VERSION}_CONNECTION_SPARK_SQL", - Utils.currentUser, - id1) - - val id2 = UUID.randomUUID().toString - val engine2 = - new EngineRef(conf.clone, Utils.currentUser, PluginLoader.loadGroupProvider(conf), id2, null) - val engineSpace2 = DiscoveryPaths.makePath( - s"kyuubi_test_${KYUUBI_VERSION}_CONNECTION_SPARK_SQL", - Utils.currentUser, - id2) - + test("list server") { + // Mock Kyuubi Server + val serverDiscovery = mock[ServiceDiscovery] + lenient.when(serverDiscovery.fe).thenReturn(fe) + val namespace = conf.get(HighAvailabilityConf.HA_NAMESPACE) withDiscoveryClient(conf) { client => - engine1.getOrCreate(client) - engine2.getOrCreate(client) - - assert(client.pathExists(engineSpace)) - assert(client.getChildren(engineSpace).size == 2) - assert(client.pathExists(engineSpace1)) - assert(client.pathExists(engineSpace2)) + client.registerService(conf, namespace, serverDiscovery) - val response = webTarget.path("api/v1/admin/engine") - .queryParam("all", "true") - .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + val response = webTarget.path("api/v1/admin/server") + .request() + .header(AUTHORIZATION_HEADER, HttpAuthUtils.basicAuthorizationHeader(Utils.currentUser)) .get - assert(200 == response.getStatus) - val result = response.readEntity(new GenericType[Seq[Engine]]() {}) - assert(result.size == 2) - // kill the engine application - engineMgr.killApplication(ApplicationManagerInfo(None), id1) - engineMgr.killApplication(ApplicationManagerInfo(None), id2) - eventually(timeout(30.seconds), interval(100.milliseconds)) { - assert(engineMgr.getApplicationInfo(ApplicationManagerInfo(None), id1) - .exists(_.state == ApplicationState.NOT_FOUND)) - assert(engineMgr.getApplicationInfo(ApplicationManagerInfo(None), id2) - .exists(_.state == ApplicationState.NOT_FOUND)) - } + assert(response.getStatus === 200) + val result = response.readEntity(new GenericType[Seq[ServerData]]() {}) + assert(result.size == 1) + val testServer = result.head + val restFrontendService = fe.asInstanceOf[KyuubiRestFrontendService] + + assert(namespace.equals(testServer.getNamespace.replaceFirst("/", ""))) + assert(restFrontendService.host.equals(testServer.getHost)) + assert(restFrontendService.connectionUrl.equals(testServer.getInstance())) + assert(!testServer.getAttributes.isEmpty) + val attributes = testServer.getAttributes + assert(attributes.containsKey("serverUri") && + attributes.get("serverUri").equals(fe.connectionUrl)) + assert(attributes.containsKey("version")) + assert(attributes.containsKey("sequence")) + assert("Running".equals(testServer.getStatus)) } } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/BatchesResourceSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/BatchesResourceSuite.scala index 7270f68d6b7..6ae2bd04063 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/BatchesResourceSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/BatchesResourceSuite.scala @@ -19,15 +19,15 @@ package org.apache.kyuubi.server.api.v1 import java.net.InetAddress import java.nio.file.Paths -import java.util.{Base64, UUID} +import java.util.UUID import javax.ws.rs.client.Entity import javax.ws.rs.core.{MediaType, Response} import scala.collection.JavaConverters._ +import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.concurrent.duration.DurationInt -import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.glassfish.jersey.media.multipart.FormDataMultiPart import org.glassfish.jersey.media.multipart.file.FileDataBodyPart @@ -43,10 +43,11 @@ import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.operation.{BatchJobSubmission, OperationState} import org.apache.kyuubi.operation.OperationState.OperationState import org.apache.kyuubi.server.{KyuubiBatchService, KyuubiRestFrontendService} -import org.apache.kyuubi.server.http.authentication.AuthenticationHandler.AUTHORIZATION_HEADER +import org.apache.kyuubi.server.http.util.HttpAuthUtils.{basicAuthorizationHeader, AUTHORIZATION_HEADER} import org.apache.kyuubi.server.metadata.api.{Metadata, MetadataFilter} -import org.apache.kyuubi.service.authentication.{InternalSecurityAccessor, KyuubiAuthenticationFactory} +import org.apache.kyuubi.service.authentication.{AnonymousAuthenticationProviderImpl, KyuubiAuthenticationFactory} import org.apache.kyuubi.session.{KyuubiBatchSession, KyuubiSessionManager, SessionHandle, SessionType} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion class BatchesV1ResourceSuite extends BatchesResourceSuiteBase { override def batchVersion: String = "1" @@ -58,8 +59,8 @@ class BatchesV2ResourceSuite extends BatchesResourceSuiteBase { override def batchVersion: String = "2" override def customConf: Map[String, String] = Map( - KyuubiConf.METADATA_REQUEST_ASYNC_RETRY_ENABLED.key -> "false", - KyuubiConf.BATCH_SUBMITTER_ENABLED.key -> "true") + METADATA_REQUEST_ASYNC_RETRY_ENABLED.key -> "false", + BATCH_SUBMITTER_ENABLED.key -> "true") override def afterEach(): Unit = { val sessionManager = fe.be.sessionManager.asInstanceOf[KyuubiSessionManager] @@ -82,23 +83,17 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite def customConf: Map[String, String] override protected lazy val conf: KyuubiConf = { + val testResourceDir = Paths.get(sparkBatchTestResource.get).getParent val kyuubiConf = KyuubiConf() - .set(KyuubiConf.ENGINE_SECURITY_ENABLED, true) - .set(KyuubiConf.ENGINE_SECURITY_SECRET_PROVIDER, "simple") - .set(KyuubiConf.SIMPLE_SECURITY_SECRET_PROVIDER_PROVIDER_SECRET, "ENGINE____SECRET") - .set(KyuubiConf.BATCH_IMPL_VERSION, batchVersion) - .set( - KyuubiConf.SESSION_LOCAL_DIR_ALLOW_LIST, - Set(Paths.get(sparkBatchTestResource.get).getParent.toString)) + .set(AUTHENTICATION_METHOD, Set("CUSTOM")) + .set(AUTHENTICATION_CUSTOM_CLASS, classOf[AnonymousAuthenticationProviderImpl].getName) + .set(SERVER_ADMINISTRATORS, Set("admin")) + .set(BATCH_IMPL_VERSION, batchVersion) + .set(SESSION_LOCAL_DIR_ALLOW_LIST, Set(testResourceDir.toString)) customConf.foreach { case (k, v) => kyuubiConf.set(k, v) } kyuubiConf } - override def beforeAll(): Unit = { - super.beforeAll() - InternalSecurityAccessor.initialize(conf, true) - } - override def afterEach(): Unit = { val sessionManager = fe.be.sessionManager.asInstanceOf[KyuubiSessionManager] sessionManager.allSessions().foreach { session => @@ -115,6 +110,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite val response = webTarget.path("api/v1/batches") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .post(Entity.entity(requestObj, MediaType.APPLICATION_JSON_TYPE)) assert(response.getStatus === 200) var batch = response.readEntity(classOf[Batch]) @@ -138,6 +134,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite val proxyUserRequest = requestObj val proxyUserResponse = webTarget.path("api/v1/batches") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .post(Entity.entity(proxyUserRequest, MediaType.APPLICATION_JSON_TYPE)) assert(proxyUserResponse.getStatus === 405) var errorMessage = "Failed to validate proxy privilege of anonymous for root" @@ -145,6 +142,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite var getBatchResponse = webTarget.path(s"api/v1/batches/${batch.getId}") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() assert(getBatchResponse.getStatus === 200) batch = getBatchResponse.readEntity(classOf[Batch]) @@ -169,6 +167,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite // invalid batchId getBatchResponse = webTarget.path(s"api/v1/batches/invalidBatchId") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() assert(getBatchResponse.getStatus === 404) @@ -180,6 +179,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite .queryParam("from", "0") .queryParam("size", "1") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() log = logResponse.readEntity(classOf[OperationLog]) assert(log.getRowCount === 1) @@ -193,6 +193,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite .queryParam("from", "-1") .queryParam("size", "100") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() log = logResponse.readEntity(classOf[OperationLog]) if (log.getRowCount > 0) { @@ -206,40 +207,32 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite } // invalid user name - val encodeAuthorization = - new String(Base64.getEncoder.encode(batch.getId.getBytes()), "UTF-8") var deleteBatchResponse = webTarget.path(s"api/v1/batches/${batch.getId}") .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader(batch.getId)) .delete() assert(deleteBatchResponse.getStatus === 405) - errorMessage = s"${batch.getId} is not allowed to close the session belong to anonymous" + errorMessage = s"Failed to validate proxy privilege of ${batch.getId} for anonymous" assert(deleteBatchResponse.readEntity(classOf[String]).contains(errorMessage)) // invalid batchId deleteBatchResponse = webTarget.path(s"api/v1/batches/notValidUUID") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .delete() assert(deleteBatchResponse.getStatus === 404) // non-existed batch session deleteBatchResponse = webTarget.path(s"api/v1/batches/${UUID.randomUUID().toString}") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .delete() assert(deleteBatchResponse.getStatus === 404) - // invalid proxy user - deleteBatchResponse = webTarget.path(s"api/v1/batches/${batch.getId}") - .queryParam("hive.server2.proxy.user", "invalidProxy") - .request(MediaType.APPLICATION_JSON_TYPE) - .delete() - assert(deleteBatchResponse.getStatus === 405) - errorMessage = "Failed to validate proxy privilege of anonymous for invalidProxy" - assert(deleteBatchResponse.readEntity(classOf[String]).contains(errorMessage)) - // check close batch session deleteBatchResponse = webTarget.path(s"api/v1/batches/${batch.getId}") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .delete() assert(deleteBatchResponse.getStatus === 200) val closeBatchResponse = deleteBatchResponse.readEntity(classOf[CloseBatchResponse]) @@ -247,6 +240,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite // check state after close batch session getBatchResponse = webTarget.path(s"api/v1/batches/${batch.getId}") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() assert(getBatchResponse.getStatus === 200) batch = getBatchResponse.readEntity(classOf[Batch]) @@ -260,6 +254,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite // close the closed batch session deleteBatchResponse = webTarget.path(s"api/v1/batches/${batch.getId}") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .delete() assert(deleteBatchResponse.getStatus === 200) assert(!deleteBatchResponse.readEntity(classOf[CloseBatchResponse]).isSuccess) @@ -275,6 +270,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite val response = webTarget.path("api/v1/batches") .request(MediaType.APPLICATION_JSON) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .post(Entity.entity(multipart, MediaType.MULTIPART_FORM_DATA)) assert(response.getStatus === 200) val batch = response.readEntity(classOf[Batch]) @@ -297,6 +293,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite eventually(timeout(5.seconds), interval(200.millis)) { val resp = webTarget.path(s"api/v1/batches/${batch.getId}") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() val batchState = resp.readEntity(classOf[Batch]).getState assert(batchState === "PENDING" || batchState === "RUNNING") @@ -304,6 +301,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite webTarget.path(s"api/v1/batches/${batch.getId}") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .delete() eventually(timeout(5.seconds), interval(200.millis)) { assert(KyuubiApplicationManager.uploadWorkDir.toFile.listFiles().isEmpty) @@ -318,6 +316,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite val resp1 = webTarget.path("api/v1/batches") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .post(Entity.entity(reqObj, MediaType.APPLICATION_JSON_TYPE)) assert(resp1.getStatus === 200) val batch1 = resp1.readEntity(classOf[Batch]) @@ -325,6 +324,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite val resp2 = webTarget.path("api/v1/batches") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .post(Entity.entity(reqObj, MediaType.APPLICATION_JSON_TYPE)) assert(resp2.getStatus === 200) val batch2 = resp2.readEntity(classOf[Batch]) @@ -348,6 +348,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite .queryParam("from", "0") .queryParam("size", "2") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() assert(response.getStatus === 200) @@ -402,6 +403,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite .queryParam("from", "0") .queryParam("size", "2") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() assert(response2.getStatus === 200) @@ -414,6 +416,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite .queryParam("from", "2") .queryParam("size", "2") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() assert(response3.getStatus === 200) @@ -426,6 +429,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite .queryParam("from", "3") .queryParam("size", "2") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() assert(response4.getStatus === 200) @@ -437,6 +441,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite .queryParam("from", "2") .queryParam("size", "2") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() assert(response5.getStatus === 200) @@ -449,6 +454,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite .queryParam("from", "2") .queryParam("size", "2") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() assert(response6.getStatus === 200) @@ -463,6 +469,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite .queryParam("from", "2") .queryParam("size", "2") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() assert(response7.getStatus === 500) } @@ -493,6 +500,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite "resource is a required parameter")).foreach { case (req, msg) => val response = webTarget.path("api/v1/batches") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .post(Entity.entity(req, MediaType.APPLICATION_JSON_TYPE)) assert(response.getStatus === 500) assert(response.readEntity(classOf[String]).contains(msg)) @@ -506,6 +514,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite "Invalid batchId: 3ea7ddbe-0c35-45da-85ad-3186770181a7")).foreach { case (batchId, msg) => val response = webTarget.path(s"api/v1/batches/$batchId") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get assert(response.getStatus === 404) assert(response.readEntity(classOf[String]).contains(msg)) @@ -622,6 +631,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite .queryParam("from", "0") .queryParam("size", "1") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() batchVersion match { case "1" => @@ -640,6 +650,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite .queryParam("from", "0") .queryParam("size", "1") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() assert(logResponse.getStatus === 404) assert(logResponse.readEntity(classOf[String]).contains("Invalid batchId")) @@ -654,6 +665,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite .queryParam("from", "0") .queryParam("size", "1") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() assert(logResponse.getStatus === 500) assert(logResponse.readEntity(classOf[String]).contains( @@ -676,13 +688,10 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite engineType = "SPARK") sessionManager.insertMetadata(metadata) - val encodeAuthorization = - new String(Base64.getEncoder.encode("kyuubi".getBytes()), "UTF-8") - // delete the batch in the same kyuubi instance but not found in-memory var deleteResp = webTarget.path(s"api/v1/batches/${metadata.identifier}") .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("kyuubi")) .delete() assert(deleteResp.getStatus === 200) assert(!deleteResp.readEntity(classOf[CloseBatchResponse]).isSuccess) @@ -690,7 +699,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite // delete batch that is not existing deleteResp = webTarget.path(s"api/v1/batches/${UUID.randomUUID.toString}") .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("kyuubi")) .delete() assert(deleteResp.getStatus === 404) assert(deleteResp.readEntity(classOf[String]).contains("Invalid batchId:")) @@ -703,7 +712,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite // delete batch that need make redirection deleteResp = webTarget.path(s"api/v1/batches/${metadata2.identifier}") .request(MediaType.APPLICATION_JSON_TYPE) - .header(AUTHORIZATION_HEADER, s"BASIC $encodeAuthorization") + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("kyuubi")) .delete() assert(deleteResp.getStatus === 200) assert(deleteResp.readEntity(classOf[CloseBatchResponse]).getMsg.contains( @@ -718,6 +727,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite val response = webTarget.path("api/v1/batches") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .header(conf.get(FRONTEND_PROXY_HTTP_CLIENT_IP_HEADER), realClientIp) .post(Entity.entity(requestObj, MediaType.APPLICATION_JSON_TYPE)) assert(response.getStatus === 200) @@ -748,6 +758,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite eventually(timeout(10.seconds)) { val response = webTarget.path("api/v1/batches") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .post(Entity.entity(requestObj, MediaType.APPLICATION_JSON_TYPE)) assert(response.getStatus === 200) val batch = response.readEntity(classOf[Batch]) @@ -763,6 +774,7 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite val deleteResp = webTarget.path(s"api/v1/batches/$batchId") .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .delete() assert(deleteResp.getStatus === 200) @@ -824,10 +836,51 @@ abstract class BatchesResourceSuiteBase extends KyuubiFunSuite val response = webTarget.path("api/v1/batches") .queryParam("batchName", uniqueName) .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) .get() assert(response.getStatus == 200) val getBatchListResponse = response.readEntity(classOf[GetBatchesResponse]) assert(getBatchListResponse.getTotal == 1) } + + test("open batch session with proxyUser") { + val normalUser = "kyuubi" + + def runOpenBatchExecutor( + kyuubiProxyUser: Option[String], + hs2ProxyUser: Option[String]): Response = { + val conf = mutable.Map("spark.master" -> "local") + + kyuubiProxyUser.map { username => + conf += (PROXY_USER.key -> username) + } + hs2ProxyUser.map { username => + conf += (KyuubiAuthenticationFactory.HS2_PROXY_USER -> username) + } + val proxyUserRequest = newSparkBatchRequest(conf.toMap) + + webTarget.path("api/v1/batches") + .request(MediaType.APPLICATION_JSON_TYPE) + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("anonymous")) + .post(Entity.entity(proxyUserRequest, MediaType.APPLICATION_JSON_TYPE)) + } + + // use kyuubi.session.proxy.user + val proxyUserResponse1 = runOpenBatchExecutor(Option(normalUser), None) + assert(proxyUserResponse1.getStatus === 405) + val errorMessage = s"Failed to validate proxy privilege of anonymous for $normalUser" + assert(proxyUserResponse1.readEntity(classOf[String]).contains(errorMessage)) + + // it should be the same behavior as hive.server2.proxy.user + val proxyUserResponse2 = runOpenBatchExecutor(None, Option(normalUser)) + assert(proxyUserResponse2.getStatus === 405) + assert(proxyUserResponse2.readEntity(classOf[String]).contains(errorMessage)) + + // when both set, kyuubi.session.proxy.user takes precedence + val proxyUserResponse3 = + runOpenBatchExecutor(Option(normalUser), Option(s"${normalUser}HiveServer2")) + assert(proxyUserResponse3.getStatus === 405) + assert(proxyUserResponse3.readEntity(classOf[String]).contains(errorMessage)) + } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/OperationsResourceSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/OperationsResourceSuite.scala index 72cd4d87db1..c4d67ad6211 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/OperationsResourceSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/OperationsResourceSuite.scala @@ -23,16 +23,16 @@ import javax.ws.rs.core.MediaType import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V2 import org.scalatest.concurrent.PatienceConfiguration.Timeout import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.{KyuubiFunSuite, RestFrontendTestHelper} +import org.apache.kyuubi.client.api.v1.dto import org.apache.kyuubi.client.api.v1.dto._ import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.events.KyuubiOperationEvent import org.apache.kyuubi.operation.{ExecuteStatement, OperationState} import org.apache.kyuubi.operation.OperationState.{FINISHED, OperationState} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V2 class OperationsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { @@ -205,6 +205,23 @@ class OperationsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper assert(logRowSet.getRowCount == 1) } + test("support to return operation progress for REST api") { + val sessionHandle = fe.be.openSession( + HIVE_CLI_SERVICE_PROTOCOL_V2, + "admin", + "123456", + "localhost", + Map(KyuubiConf.SESSION_PROGRESS_ENABLE.key -> "true")) + val op = fe.be.executeStatement(sessionHandle, "show tables", Map.empty, runAsync = true, 3000) + eventually(Timeout(5.seconds)) { + val response = webTarget.path(s"api/v1/operations/${op.identifier}/event") + .request(MediaType.APPLICATION_JSON_TYPE).get() + assert(response.getStatus === 200) + val operationEvent = response.readEntity(classOf[dto.KyuubiOperationEvent]) + assert(operationEvent.getProgress != null) + } + } + def getOpHandleStr(statement: String = "show tables"): String = { val sessionHandle = fe.be.openSession( HIVE_CLI_SERVICE_PROTOCOL_V2, @@ -228,8 +245,8 @@ class OperationsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper val response = webTarget.path(s"api/v1/operations/$opHandleStr/event") .request(MediaType.APPLICATION_JSON_TYPE).get() assert(response.getStatus === 200) - val operationEvent = response.readEntity(classOf[KyuubiOperationEvent]) - assert(operationEvent.state === state.name()) + val operationEvent = response.readEntity(classOf[dto.KyuubiOperationEvent]) + assert(operationEvent.getState === state.name()) } } } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/SessionsResourceSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/SessionsResourceSuite.scala index b58e87bc8c2..af49598fe82 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/SessionsResourceSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/api/v1/SessionsResourceSuite.scala @@ -35,7 +35,7 @@ import org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_SESSION_CONNECTION_URL import org.apache.kyuubi.engine.ShareLevel import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.operation.OperationHandle -import org.apache.kyuubi.server.http.authentication.AuthenticationHandler.AUTHORIZATION_HEADER +import org.apache.kyuubi.server.http.util.HttpAuthUtils.{basicAuthorizationHeader, AUTHORIZATION_HEADER} import org.apache.kyuubi.session.SessionType class SessionsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { @@ -62,8 +62,11 @@ class SessionsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { val statistic = webTarget.path("api/v1/sessions/execPool/statistic").request().get() val execPoolStatistic1 = statistic.readEntity(classOf[ExecPoolStatistic]) + // because this operation is asynchronous, + // there is no guarantee that it will complete quickly or fail in the process + // so we can not guarantee the poolActiveThread count must equal to 1 assert(execPoolStatistic1.getExecPoolSize == 1 && - execPoolStatistic1.getExecPoolActiveCount == 1) + execPoolStatistic1.getExecPoolActiveCount <= 1) response = webTarget.path("api/v1/sessions/count").request().get() val openedSessionCount = response.readEntity(classOf[SessionOpenCount]) @@ -97,23 +100,23 @@ class SessionsResourceSuite extends KyuubiFunSuite with RestFrontendTestHelper { response = webTarget.path(s"api/v1/sessions/$sessionHandle").request().delete() assert(200 == response.getStatus) - // get session list again - response2 = webTarget.path("api/v1/sessions").request().get() - assert(200 == response2.getStatus) - val sessions2 = response2.readEntity(classOf[Seq[SessionData]]) - assert(sessions2.isEmpty) + // because delete is a asynchronous operation, we need eventually to + // make sure the delete operation process complete + eventually(timeout(3.seconds)) { + // get session list again + response2 = webTarget.path("api/v1/sessions").request().get() + assert(200 == response2.getStatus) + + val sessions = response2.readEntity(classOf[Seq[SessionData]]) + assert(sessions.isEmpty) + } } test("get session event") { val sessionOpenRequest = new SessionOpenRequest(Map("testConfig" -> "testValue").asJava) - - val user = "kyuubi".getBytes() - val sessionOpenResp = webTarget.path("api/v1/sessions") .request(MediaType.APPLICATION_JSON_TYPE) - .header( - AUTHORIZATION_HEADER, - s"Basic ${new String(Base64.getEncoder().encode(user), StandardCharsets.UTF_8)}") + .header(AUTHORIZATION_HEADER, basicAuthorizationHeader("kyuubi")) .post(Entity.entity(sessionOpenRequest, MediaType.APPLICATION_JSON_TYPE)) val sessionHandle = sessionOpenResp.readEntity(classOf[SessionHandle]).getIdentifier diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/AdminRestApiSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/AdminRestApiSuite.scala index d63e4660772..c8f1d68e67e 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/AdminRestApiSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/AdminRestApiSuite.scala @@ -21,7 +21,6 @@ import java.util.UUID import scala.collection.JavaConverters.asScalaBufferConverter -import org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V2 import org.mockito.Mockito.lenient import org.scalatestplus.mockito.MockitoSugar.mock @@ -33,6 +32,7 @@ import org.apache.kyuubi.ha.HighAvailabilityConf import org.apache.kyuubi.ha.client.{DiscoveryPaths, ServiceDiscovery} import org.apache.kyuubi.ha.client.DiscoveryClientProvider.withDiscoveryClient import org.apache.kyuubi.plugin.PluginLoader +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V2 class AdminRestApiSuite extends RestClientTestHelper { test("refresh kyuubi server hadoop conf") { @@ -74,7 +74,7 @@ class AdminRestApiSuite extends RestClientTestHelper { .build() val adminRestApi = new AdminRestApi(basicKyuubiRestClient) - var engines = adminRestApi.listEngines("spark_sql", "user", "default", "", "false").asScala + var engines = adminRestApi.listEngines("spark_sql", "user", "default", "").asScala assert(engines.size == 1) assert(engines(0).getUser == user) assert(engines(0).getVersion == KYUUBI_VERSION) @@ -87,7 +87,7 @@ class AdminRestApiSuite extends RestClientTestHelper { val result = adminRestApi.deleteEngine("spark_sql", "user", "default", "") assert(result == s"Engine ${engineSpace} is deleted successfully.") - engines = adminRestApi.listEngines("spark_sql", "user", "default", "", "false").asScala + engines = adminRestApi.listEngines("spark_sql", "user", "default", "").asScala assert(engines.isEmpty) } diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchCliSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchCliSuite.scala index bcf8c450eb8..4d5e352f182 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchCliSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchCliSuite.scala @@ -25,7 +25,6 @@ import java.util.UUID import org.apache.hadoop.security.UserGroupInformation import org.apache.hadoop.shaded.com.nimbusds.jose.util.StandardCharset -import org.apache.hive.service.rpc.thrift.TProtocolVersion import org.scalatest.time.SpanSugar.convertIntToGrainOfTime import org.apache.kyuubi.{BatchTestHelper, RestClientTestHelper, Utils} @@ -36,6 +35,7 @@ import org.apache.kyuubi.engine.ApplicationManagerInfo import org.apache.kyuubi.metrics.{MetricsConstants, MetricsSystem} import org.apache.kyuubi.server.metadata.api.MetadataFilter import org.apache.kyuubi.session.KyuubiSessionManager +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion class BatchCliSuite extends RestClientTestHelper with TestPrematureExit with BatchTestHelper { @@ -83,7 +83,7 @@ class BatchCliSuite extends RestClientTestHelper with TestPrematureExit with Bat | resource: ${sparkBatchTestResource.get} | className: org.apache.spark.examples.DriverSubmissionTest | args: - | - 10 + | - 120 | configs: | spark.master: local | wait.completion: true @@ -147,14 +147,20 @@ class BatchCliSuite extends RestClientTestHelper with TestPrematureExit with Bat "batch", batchId, "--size", - "2", + "100", "--username", ldapUser, "--password", ldapUserPasswd) - result = testPrematureExitForControlCli(logArgs, "") - val rows = result.split("\n") - assert(rows.length == 2) + eventually(timeout(60.seconds), interval(100.milliseconds)) { + invalidCount += 1 + result = testPrematureExitForControlCli(logArgs, "") + val rows = result.split("\n") + assert(rows.length >= 2) + // org.apache.spark.examples.DriverSubmissionTest output + assert(result.contains("Alive for")) + invalidCount -= 1 + } val deleteArgs = Array( "delete", @@ -168,7 +174,7 @@ class BatchCliSuite extends RestClientTestHelper with TestPrematureExit with Bat eventually(timeout(3.seconds), interval(200.milliseconds)) { assert(MetricsSystem.counterValue( - MetricsConstants.REST_CONN_TOTAL).getOrElse(0L) - totalConnections - invalidCount === 5) + MetricsConstants.REST_CONN_TOTAL).getOrElse(0L) - totalConnections - invalidCount >= 5) assert(MetricsSystem.counterValue(MetricsConstants.REST_CONN_OPEN).getOrElse(0L) === 0) } } @@ -206,12 +212,16 @@ class BatchCliSuite extends RestClientTestHelper with TestPrematureExit with Bat "batch", batchId, "--size", - "2", + "100", "--authSchema", "spnego") - result = testPrematureExitForControlCli(logArgs, "") - val rows = result.split("\n") - assert(rows.length == 2) + eventually(timeout(60.seconds), interval(100.milliseconds)) { + result = testPrematureExitForControlCli(logArgs, "") + val rows = result.split("\n") + assert(rows.length >= 2) + // org.apache.spark.examples.DriverSubmissionTest output + assert(result.contains("Alive for")) + } val deleteArgs = Array( "delete", diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchRestApiSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchRestApiSuite.scala index d04826a9d20..20ec2fc0a5f 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchRestApiSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/BatchRestApiSuite.scala @@ -74,16 +74,9 @@ class BatchRestApiSuite extends RestClientTestHelper with BatchTestHelper { } // delete batch - val closeResp = batchRestApi.deleteBatch(batch.getId(), null) + val closeResp = batchRestApi.deleteBatch(batch.getId()) assert(closeResp.getMsg.nonEmpty) - // delete batch - error - val e = intercept[KyuubiRestException] { - batchRestApi.deleteBatch(batch.getId(), "fake") - } - assert(e.getCause.toString.contains( - s"Failed to validate proxy privilege of ${ldapUser} for fake")) - basicKyuubiRestClient.close() } @@ -170,7 +163,7 @@ class BatchRestApiSuite extends RestClientTestHelper with BatchTestHelper { } // delete batch - val closeResp = batchRestApi.deleteBatch(batch.getId(), proxyUser) + val closeResp = batchRestApi.deleteBatch(batch.getId()) assert(closeResp.getMsg.nonEmpty) // list batches diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/OperationRestApiSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/OperationRestApiSuite.scala index fed685c4478..e02cfd260c3 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/OperationRestApiSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/OperationRestApiSuite.scala @@ -19,7 +19,6 @@ package org.apache.kyuubi.server.rest.client import scala.collection.JavaConverters._ -import org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V2 import org.scalatest.concurrent.PatienceConfiguration.Timeout import org.scalatest.time.SpanSugar.convertIntToGrainOfTime @@ -28,6 +27,7 @@ import org.apache.kyuubi.client.{KyuubiRestClient, OperationRestApi} import org.apache.kyuubi.client.api.v1.dto.OpActionRequest import org.apache.kyuubi.client.exception.KyuubiRestException import org.apache.kyuubi.operation.OperationState +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V2 class OperationRestApiSuite extends RestClientTestHelper { diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/SessionCtlSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/SessionCtlSuite.scala index 5d219de33cc..fb43fcf8169 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/SessionCtlSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/SessionCtlSuite.scala @@ -17,10 +17,9 @@ package org.apache.kyuubi.server.rest.client -import org.apache.hive.service.rpc.thrift.TProtocolVersion - import org.apache.kyuubi.RestClientTestHelper import org.apache.kyuubi.ctl.{CtlConf, TestPrematureExit} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion class SessionCtlSuite extends RestClientTestHelper with TestPrematureExit { override def beforeAll(): Unit = { diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/SessionRestApiSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/SessionRestApiSuite.scala index a1dfd243229..8afb3ccad97 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/SessionRestApiSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/rest/client/SessionRestApiSuite.scala @@ -23,7 +23,6 @@ import java.util.Collections import scala.collection.JavaConverters._ import scala.concurrent.duration.DurationInt -import org.apache.hive.service.rpc.thrift.TGetInfoType import org.scalatest.concurrent.PatienceConfiguration.Timeout import org.apache.kyuubi.RestClientTestHelper @@ -32,6 +31,7 @@ import org.apache.kyuubi.client.api.v1.dto._ import org.apache.kyuubi.client.exception.KyuubiRestException import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.session.SessionType +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TGetInfoType class SessionRestApiSuite extends RestClientTestHelper { test("get/close/list/count session") { diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/TrinoContextSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/TrinoContextSuite.scala index 6c5a01e4659..967a882d866 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/TrinoContextSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/trino/api/TrinoContextSuite.scala @@ -24,7 +24,6 @@ import javax.ws.rs.core.MediaType import scala.collection.JavaConverters._ import io.trino.client.ProtocolHeaders.TRINO_HEADERS -import org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V9 import org.scalatest.concurrent.PatienceConfiguration.Timeout import org.scalatest.time.SpanSugar.convertIntToGrainOfTime @@ -32,6 +31,7 @@ import org.apache.kyuubi.{KyuubiFunSuite, RestFrontendTestHelper} import org.apache.kyuubi.events.KyuubiOperationEvent import org.apache.kyuubi.operation.{FetchOrientation, OperationHandle} import org.apache.kyuubi.operation.OperationState.{FINISHED, OperationState} +import org.apache.kyuubi.shaded.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V9 class TrinoContextSuite extends KyuubiFunSuite with RestFrontendTestHelper { diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/service/CheckServerSPISuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/service/CheckServerSPISuite.scala new file mode 100644 index 00000000000..295c7df3de9 --- /dev/null +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/service/CheckServerSPISuite.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.service + +import java.nio.file.Paths + +// scalastyle:off +import org.scalatest.funsuite.AnyFunSuite + +import org.apache.kyuubi.util.AssertionUtils._ +import org.apache.kyuubi.util.GoldenFileUtils._ + +class CheckServerSPISuite extends AnyFunSuite { + // scalastyle:on + + test("check server SPI service file sorted") { + Seq( + "org.apache.hadoop.security.token.TokenIdentifier", + "org.apache.kyuubi.credentials.HadoopDelegationTokenProvider", + "org.apache.kyuubi.engine.ApplicationOperation") + .foreach { fileName => + val filePath = Paths.get( + s"${getCurrentModuleHome(this)}/src/main/resources/META-INF/services/$fileName") + assertFileContentSorted(filePath) + } + } +} diff --git a/kyuubi-server/web-ui/.env b/kyuubi-server/web-ui/.env new file mode 100644 index 00000000000..fb092780fc0 --- /dev/null +++ b/kyuubi-server/web-ui/.env @@ -0,0 +1,16 @@ + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + +VITE_APP_VERSION=$npm_package_version diff --git a/kyuubi-server/web-ui/.env.production b/kyuubi-server/web-ui/.env.production index 1781b580123..9d442ad4523 100644 --- a/kyuubi-server/web-ui/.env.production +++ b/kyuubi-server/web-ui/.env.production @@ -13,6 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -NODE_ENV=production - VITE_APP_DEV_WEB_URL='/' diff --git a/kyuubi-server/web-ui/package-lock.json b/kyuubi-server/web-ui/package-lock.json index fa01c240573..3dab868017f 100644 --- a/kyuubi-server/web-ui/package-lock.json +++ b/kyuubi-server/web-ui/package-lock.json @@ -9,11 +9,13 @@ "version": "1.9.0-SNAPSHOT", "dependencies": { "@element-plus/icons-vue": "^2.0.9", - "axios": "^0.27.2", + "axios": "^1.6.0", "date-fns": "^2.29.3", "element-plus": "^2.2.12", + "monaco-editor": "^0.44.0", "pinia": "^2.0.18", "pinia-plugin-persistedstate": "^2.1.1", + "sql-formatter": "^13.0.1", "swagger-ui-dist": "^4.9.1", "vue": "^3.2.37", "vue-i18n": "^9.2.2", @@ -1491,8 +1493,7 @@ "node_modules/argparse": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" }, "node_modules/array-union": { "version": "2.1.0", @@ -1523,12 +1524,13 @@ "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" }, "node_modules/axios": { - "version": "0.27.2", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.27.2.tgz", - "integrity": "sha512-t+yRIyySRTp/wua5xEr+z1q60QmLq8ABsS5O9Me1AsE5dfKqgnCFzwiCZZ/cGNd1lq4/7akDWMxdhVlucjmnOQ==", + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.0.tgz", + "integrity": "sha512-EZ1DYihju9pwVB+jg67ogm+Tmqc6JmhamRN6I4Zt8DfZu5lbcQGw3ozH9lFejSJgs/ibaef3A9PMXPLeefFGJg==", "dependencies": { - "follow-redirects": "^1.14.9", - "form-data": "^4.0.0" + "follow-redirects": "^1.15.0", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" } }, "node_modules/balanced-match": { @@ -1715,6 +1717,11 @@ "node": ">= 0.8" } }, + "node_modules/commander": { + "version": "2.20.3", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", + "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==" + }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -1911,6 +1918,11 @@ "node": ">=8" } }, + "node_modules/discontinuous-range": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/discontinuous-range/-/discontinuous-range-1.0.0.tgz", + "integrity": "sha512-c68LpLbO+7kP/b1Hr1qs8/BJ09F5khZGTxqxZuhzxpmwJKOgRFHJWIb9/KmqnqHhLdO55aOxFH/EGBvUQbL/RQ==" + }, "node_modules/doctrine": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", @@ -2571,6 +2583,17 @@ "node": "*" } }, + "node_modules/get-stdin": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/get-stdin/-/get-stdin-8.0.0.tgz", + "integrity": "sha512-sY22aA6xchAzprjyqmSEQv4UbAAzRN0L2dQB0NlN5acTTK9Don6nhoc3eAbUnpZiCANAMfd/+40kVdKfFygohg==", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/glob": { "version": "7.2.3", "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", @@ -3160,6 +3183,16 @@ "ufo": "^1.1.2" } }, + "node_modules/monaco-editor": { + "version": "0.44.0", + "resolved": "https://registry.npmjs.org/monaco-editor/-/monaco-editor-0.44.0.tgz", + "integrity": "sha512-5SmjNStN6bSuSE5WPT2ZV+iYn1/yI9sd4Igtk23ChvqB7kDk9lZbB9F5frsuvpB+2njdIeGGFf2G4gbE6rCC9Q==" + }, + "node_modules/moo": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/moo/-/moo-0.5.2.tgz", + "integrity": "sha512-iSAJLHYKnX41mKcJKjqvnAN9sf0LMDTXDEvFv+ffuRR9a1MIuXLjMNL6EsnDHSkKLTWNqQQ5uo61P4EbU4NU+Q==" + }, "node_modules/ms": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", @@ -3189,6 +3222,27 @@ "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", "dev": true }, + "node_modules/nearley": { + "version": "2.20.1", + "resolved": "https://registry.npmjs.org/nearley/-/nearley-2.20.1.tgz", + "integrity": "sha512-+Mc8UaAebFzgV+KpI5n7DasuuQCHA89dmwm7JXw3TV43ukfNQ9DnBH3Mdb2g/I4Fdxc26pwimBWvjIw0UAILSQ==", + "dependencies": { + "commander": "^2.19.0", + "moo": "^0.5.0", + "railroad-diagrams": "^1.0.0", + "randexp": "0.4.6" + }, + "bin": { + "nearley-railroad": "bin/nearley-railroad.js", + "nearley-test": "bin/nearley-test.js", + "nearley-unparse": "bin/nearley-unparse.js", + "nearleyc": "bin/nearleyc.js" + }, + "funding": { + "type": "individual", + "url": "https://nearley.js.org/#give-to-nearley" + } + }, "node_modules/normalize-path": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", @@ -3545,6 +3599,11 @@ "url": "https://github.com/chalk/ansi-styles?sponsor=1" } }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, "node_modules/psl": { "version": "1.9.0", "resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz", @@ -3586,6 +3645,23 @@ } ] }, + "node_modules/railroad-diagrams": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/railroad-diagrams/-/railroad-diagrams-1.0.0.tgz", + "integrity": "sha512-cz93DjNeLY0idrCNOH6PviZGRN9GJhsdm9hpn1YCS879fj4W+x5IFJhhkRZcwVgMmFF7R82UA/7Oh+R8lLZg6A==" + }, + "node_modules/randexp": { + "version": "0.4.6", + "resolved": "https://registry.npmjs.org/randexp/-/randexp-0.4.6.tgz", + "integrity": "sha512-80WNmd9DA0tmZrw9qQa62GPPWfuXJknrmVmLcxvq4uZBdYqb1wYoKTmnlGUchvVWe0XiLupYkBoXVOxz3C8DYQ==", + "dependencies": { + "discontinuous-range": "1.0.0", + "ret": "~0.1.10" + }, + "engines": { + "node": ">=0.12" + } + }, "node_modules/react-is": { "version": "17.0.2", "resolved": "https://registry.npmjs.org/react-is/-/react-is-17.0.2.tgz", @@ -3636,6 +3712,14 @@ "node": ">=4" } }, + "node_modules/ret": { + "version": "0.1.15", + "resolved": "https://registry.npmjs.org/ret/-/ret-0.1.15.tgz", + "integrity": "sha512-TTlYpa+OL+vMMNG24xSlQGEJ3B/RzEfUlLct7b5G/ytav+wPrplCpVMFuwzXbkecJrb6IYo1iFb0S9v37754mg==", + "engines": { + "node": ">=0.12" + } + }, "node_modules/reusify": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", @@ -3813,6 +3897,19 @@ "resolved": "https://registry.npmjs.org/sourcemap-codec/-/sourcemap-codec-1.4.8.tgz", "integrity": "sha512-9NykojV5Uih4lgo5So5dtw+f0JgJX30KCNI8gwhz2J9A15wD0Ml6tjHKwf6fTSa6fAdVBdZeNOs9eJ71qCk8vA==" }, + "node_modules/sql-formatter": { + "version": "13.1.0", + "resolved": "https://registry.npmjs.org/sql-formatter/-/sql-formatter-13.1.0.tgz", + "integrity": "sha512-/nZQXuN7KzipFNM20ko+dHY4kOr9rymSfZLUDED8rhx3m8OK5y74jcyN+y1L51ZqHqiB0kp40VdpZP99uWvQdA==", + "dependencies": { + "argparse": "^2.0.1", + "get-stdin": "=8.0.0", + "nearley": "^2.20.1" + }, + "bin": { + "sql-formatter": "bin/sql-formatter-cli.cjs" + } + }, "node_modules/stackback": { "version": "0.0.2", "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz", @@ -5492,8 +5589,7 @@ "argparse": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", - "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", - "dev": true + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" }, "array-union": { "version": "2.1.0", @@ -5518,12 +5614,13 @@ "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" }, "axios": { - "version": "0.27.2", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.27.2.tgz", - "integrity": "sha512-t+yRIyySRTp/wua5xEr+z1q60QmLq8ABsS5O9Me1AsE5dfKqgnCFzwiCZZ/cGNd1lq4/7akDWMxdhVlucjmnOQ==", + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.0.tgz", + "integrity": "sha512-EZ1DYihju9pwVB+jg67ogm+Tmqc6JmhamRN6I4Zt8DfZu5lbcQGw3ozH9lFejSJgs/ibaef3A9PMXPLeefFGJg==", "requires": { - "follow-redirects": "^1.14.9", - "form-data": "^4.0.0" + "follow-redirects": "^1.15.0", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" } }, "balanced-match": { @@ -5668,6 +5765,11 @@ "delayed-stream": "~1.0.0" } }, + "commander": { + "version": "2.20.3", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", + "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==" + }, "concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -5821,6 +5923,11 @@ "path-type": "^4.0.0" } }, + "discontinuous-range": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/discontinuous-range/-/discontinuous-range-1.0.0.tgz", + "integrity": "sha512-c68LpLbO+7kP/b1Hr1qs8/BJ09F5khZGTxqxZuhzxpmwJKOgRFHJWIb9/KmqnqHhLdO55aOxFH/EGBvUQbL/RQ==" + }, "doctrine": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", @@ -6306,6 +6413,11 @@ "integrity": "sha512-Hm0ixYtaSZ/V7C8FJrtZIuBBI+iSgL+1Aq82zSu8VQNB4S3Gk8e7Qs3VwBDJAhmRZcFqkl3tQu36g/Foh5I5ig==", "dev": true }, + "get-stdin": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/get-stdin/-/get-stdin-8.0.0.tgz", + "integrity": "sha512-sY22aA6xchAzprjyqmSEQv4UbAAzRN0L2dQB0NlN5acTTK9Don6nhoc3eAbUnpZiCANAMfd/+40kVdKfFygohg==" + }, "glob": { "version": "7.2.3", "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", @@ -6756,6 +6868,16 @@ "ufo": "^1.1.2" } }, + "monaco-editor": { + "version": "0.44.0", + "resolved": "https://registry.npmjs.org/monaco-editor/-/monaco-editor-0.44.0.tgz", + "integrity": "sha512-5SmjNStN6bSuSE5WPT2ZV+iYn1/yI9sd4Igtk23ChvqB7kDk9lZbB9F5frsuvpB+2njdIeGGFf2G4gbE6rCC9Q==" + }, + "moo": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/moo/-/moo-0.5.2.tgz", + "integrity": "sha512-iSAJLHYKnX41mKcJKjqvnAN9sf0LMDTXDEvFv+ffuRR9a1MIuXLjMNL6EsnDHSkKLTWNqQQ5uo61P4EbU4NU+Q==" + }, "ms": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", @@ -6773,6 +6895,17 @@ "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", "dev": true }, + "nearley": { + "version": "2.20.1", + "resolved": "https://registry.npmjs.org/nearley/-/nearley-2.20.1.tgz", + "integrity": "sha512-+Mc8UaAebFzgV+KpI5n7DasuuQCHA89dmwm7JXw3TV43ukfNQ9DnBH3Mdb2g/I4Fdxc26pwimBWvjIw0UAILSQ==", + "requires": { + "commander": "^2.19.0", + "moo": "^0.5.0", + "railroad-diagrams": "^1.0.0", + "randexp": "0.4.6" + } + }, "normalize-path": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", @@ -6999,6 +7132,11 @@ } } }, + "proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, "psl": { "version": "1.9.0", "resolved": "https://registry.npmjs.org/psl/-/psl-1.9.0.tgz", @@ -7023,6 +7161,20 @@ "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", "dev": true }, + "railroad-diagrams": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/railroad-diagrams/-/railroad-diagrams-1.0.0.tgz", + "integrity": "sha512-cz93DjNeLY0idrCNOH6PviZGRN9GJhsdm9hpn1YCS879fj4W+x5IFJhhkRZcwVgMmFF7R82UA/7Oh+R8lLZg6A==" + }, + "randexp": { + "version": "0.4.6", + "resolved": "https://registry.npmjs.org/randexp/-/randexp-0.4.6.tgz", + "integrity": "sha512-80WNmd9DA0tmZrw9qQa62GPPWfuXJknrmVmLcxvq4uZBdYqb1wYoKTmnlGUchvVWe0XiLupYkBoXVOxz3C8DYQ==", + "requires": { + "discontinuous-range": "1.0.0", + "ret": "~0.1.10" + } + }, "react-is": { "version": "17.0.2", "resolved": "https://registry.npmjs.org/react-is/-/react-is-17.0.2.tgz", @@ -7061,6 +7213,11 @@ "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", "dev": true }, + "ret": { + "version": "0.1.15", + "resolved": "https://registry.npmjs.org/ret/-/ret-0.1.15.tgz", + "integrity": "sha512-TTlYpa+OL+vMMNG24xSlQGEJ3B/RzEfUlLct7b5G/ytav+wPrplCpVMFuwzXbkecJrb6IYo1iFb0S9v37754mg==" + }, "reusify": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", @@ -7177,6 +7334,16 @@ "resolved": "https://registry.npmjs.org/sourcemap-codec/-/sourcemap-codec-1.4.8.tgz", "integrity": "sha512-9NykojV5Uih4lgo5So5dtw+f0JgJX30KCNI8gwhz2J9A15wD0Ml6tjHKwf6fTSa6fAdVBdZeNOs9eJ71qCk8vA==" }, + "sql-formatter": { + "version": "13.1.0", + "resolved": "https://registry.npmjs.org/sql-formatter/-/sql-formatter-13.1.0.tgz", + "integrity": "sha512-/nZQXuN7KzipFNM20ko+dHY4kOr9rymSfZLUDED8rhx3m8OK5y74jcyN+y1L51ZqHqiB0kp40VdpZP99uWvQdA==", + "requires": { + "argparse": "^2.0.1", + "get-stdin": "=8.0.0", + "nearley": "^2.20.1" + } + }, "stackback": { "version": "0.0.2", "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz", diff --git a/kyuubi-server/web-ui/package.json b/kyuubi-server/web-ui/package.json index 239c6270623..607fa4f3cd5 100644 --- a/kyuubi-server/web-ui/package.json +++ b/kyuubi-server/web-ui/package.json @@ -16,7 +16,7 @@ }, "dependencies": { "@element-plus/icons-vue": "^2.0.9", - "axios": "^0.27.2", + "axios": "^1.6.0", "date-fns": "^2.29.3", "element-plus": "^2.2.12", "monaco-editor": "^0.44.0", diff --git a/kyuubi-server/web-ui/pnpm-lock.yaml b/kyuubi-server/web-ui/pnpm-lock.yaml index a83d162ab6c..14f50016028 100644 --- a/kyuubi-server/web-ui/pnpm-lock.yaml +++ b/kyuubi-server/web-ui/pnpm-lock.yaml @@ -1,12 +1,12 @@ -lockfileVersion: '6.0' +lockfileVersion: '6.1' dependencies: '@element-plus/icons-vue': specifier: ^2.0.9 version: 2.0.9(vue@3.2.37) axios: - specifier: ^0.27.2 - version: 0.27.2 + specifier: ^1.6.0 + version: 1.6.0 date-fns: specifier: ^2.29.3 version: 2.29.3 @@ -1038,11 +1038,12 @@ packages: /asynckit@0.4.0: resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} - /axios@0.27.2: - resolution: {integrity: sha512-t+yRIyySRTp/wua5xEr+z1q60QmLq8ABsS5O9Me1AsE5dfKqgnCFzwiCZZ/cGNd1lq4/7akDWMxdhVlucjmnOQ==} + /axios@1.6.0: + resolution: {integrity: sha512-EZ1DYihju9pwVB+jg67ogm+Tmqc6JmhamRN6I4Zt8DfZu5lbcQGw3ozH9lFejSJgs/ibaef3A9PMXPLeefFGJg==} dependencies: follow-redirects: 1.15.1 form-data: 4.0.0 + proxy-from-env: 1.1.0 transitivePeerDependencies: - debug dev: false @@ -2308,6 +2309,10 @@ packages: react-is: 17.0.2 dev: true + /proxy-from-env@1.1.0: + resolution: {integrity: sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==} + dev: false + /psl@1.9.0: resolution: {integrity: sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag==} dev: true diff --git a/kyuubi-server/web-ui/src/api/editor/index.ts b/kyuubi-server/web-ui/src/api/editor/index.ts new file mode 100644 index 00000000000..daaf0471c12 --- /dev/null +++ b/kyuubi-server/web-ui/src/api/editor/index.ts @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import request from '@/utils/request' +import type { + IOpenSessionRequest, + IRunSqlRequest, + IGetSqlRowsetRequest, + IGetSqlMetadataRequest +} from './types' + +export function openSession(data: IOpenSessionRequest): any { + return request({ + url: 'api/v1/sessions', + method: 'post', + data + }) +} + +export function closeSession(identifier: string): any { + return request({ + url: `api/v1/sessions/${identifier}`, + method: 'delete' + }) +} + +export function runSql(data: IRunSqlRequest, identifier: string): any { + return request({ + url: `api/v1/sessions/${identifier}/operations/statement`, + method: 'post', + data + }) +} + +export function getSqlRowset(params: IGetSqlRowsetRequest): any { + return request({ + url: `api/v1/operations/${params.operationHandleStr}/rowset`, + method: 'get', + params + }) +} + +export function getSqlMetadata(params: IGetSqlMetadataRequest): any { + return request({ + url: `api/v1/operations/${params.operationHandleStr}/resultsetmetadata`, + method: 'get', + params + }) +} + +export function getLog(identifier: string): any { + return request({ + url: `api/v1/operations/${identifier}/log`, + method: 'get' + }) +} + +export function closeOperation(identifier: string) { + return request({ + url: `api/v1/admin/operations/${identifier}`, + method: 'delete' + }) +} diff --git a/kyuubi-server/web-ui/src/api/editor/types.ts b/kyuubi-server/web-ui/src/api/editor/types.ts new file mode 100644 index 00000000000..0bc4c2086c6 --- /dev/null +++ b/kyuubi-server/web-ui/src/api/editor/types.ts @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +interface IOpenSessionRequest { + 'kyuubi.engine.type': string +} + +interface IRunSqlRequest { + statement: string + runAsync: boolean +} + +interface IGetSqlRowsetRequest { + operationHandleStr: string + fetchorientation: 'FETCH_NEXT' + maxrows: number +} + +interface IGetSqlMetadataRequest { + operationHandleStr: string +} + +export { + IOpenSessionRequest, + IRunSqlRequest, + IGetSqlRowsetRequest, + IGetSqlMetadataRequest +} diff --git a/kyuubi-server/web-ui/src/api/server/index.ts b/kyuubi-server/web-ui/src/api/server/index.ts index e2d74d7dbaf..4dd402b67f7 100644 --- a/kyuubi-server/web-ui/src/api/server/index.ts +++ b/kyuubi-server/web-ui/src/api/server/index.ts @@ -17,7 +17,7 @@ import request from '@/utils/request' -export function getAllServer() { +export function getAllServer(): any { return request({ url: 'api/v1/admin/server', method: 'get' diff --git a/kyuubi-server/web-ui/src/api/server/types.ts b/kyuubi-server/web-ui/src/api/server/types.ts new file mode 100644 index 00000000000..c747f436007 --- /dev/null +++ b/kyuubi-server/web-ui/src/api/server/types.ts @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +interface IServer { + attributes: any | null + host: string + instance: string + namespace: string + nodeName: string + port: number + status: string +} + +export { IServer } diff --git a/kyuubi-server/web-ui/src/assets/images/document.svg b/kyuubi-server/web-ui/src/assets/images/document.svg new file mode 100644 index 00000000000..e3d1bfe1beb --- /dev/null +++ b/kyuubi-server/web-ui/src/assets/images/document.svg @@ -0,0 +1,22 @@ + + + + + diff --git a/kyuubi-server/web-ui/src/assets/images/kyuubi-logo.svg b/kyuubi-server/web-ui/src/assets/images/kyuubi-logo.svg new file mode 100644 index 00000000000..682bc80e768 --- /dev/null +++ b/kyuubi-server/web-ui/src/assets/images/kyuubi-logo.svg @@ -0,0 +1,126 @@ + + +image/svg+xml + + + + + + + + + + + + + + + + + diff --git a/kyuubi-server/web-ui/src/assets/kyuubi.png b/kyuubi-server/web-ui/src/assets/images/kyuubi.png similarity index 100% rename from kyuubi-server/web-ui/src/assets/kyuubi.png rename to kyuubi-server/web-ui/src/assets/images/kyuubi.png diff --git a/kyuubi-server/web-ui/src/components/monaco-editor/index.vue b/kyuubi-server/web-ui/src/components/monaco-editor/index.vue index 4c387172a1a..65a2dba3421 100644 --- a/kyuubi-server/web-ui/src/components/monaco-editor/index.vue +++ b/kyuubi-server/web-ui/src/components/monaco-editor/index.vue @@ -24,7 +24,7 @@ import * as monaco from 'monaco-editor' import { format } from 'sql-formatter' import EditorWorker from 'monaco-editor/esm/vs/editor/editor.worker?worker' - import { editorProps } from './type' + import { editorProps } from './types' import { useEditorStore } from '@/pinia/editor' import { ref, toRaw, watch, onBeforeUnmount, onMounted } from 'vue' @@ -88,7 +88,7 @@ editor = monaco.editor.create(codeEditBox.value, { value: props.modelValue, language: props.language, - theme: monacoEditorThemeRef.value, + theme: props.theme || monacoEditorThemeRef.value, ...props.options }) diff --git a/kyuubi-server/web-ui/src/components/monaco-editor/type.ts b/kyuubi-server/web-ui/src/components/monaco-editor/types.ts similarity index 93% rename from kyuubi-server/web-ui/src/components/monaco-editor/type.ts rename to kyuubi-server/web-ui/src/components/monaco-editor/types.ts index 80400565eb0..aa962d43c1a 100644 --- a/kyuubi-server/web-ui/src/components/monaco-editor/type.ts +++ b/kyuubi-server/web-ui/src/components/monaco-editor/types.ts @@ -53,10 +53,7 @@ export const editorProps = { default: 'sql' }, theme: { - type: String as PropType, - validator(value: string): boolean { - return ['vs', 'vs-dark'].includes(value) - }, + type: String as PropType, default: 'vs' }, options: { @@ -72,7 +69,7 @@ export const editorProps = { }, readOnly: false, contextmenu: true, - fontSize: 16, + fontSize: 14, scrollBeyondLastLine: true, overviewRulerBorder: false } diff --git a/kyuubi-server/web-ui/src/layout/components/aside/index.vue b/kyuubi-server/web-ui/src/layout/components/aside/index.vue index 52304abff1d..c5d1e41aeb5 100644 --- a/kyuubi-server/web-ui/src/layout/components/aside/index.vue +++ b/kyuubi-server/web-ui/src/layout/components/aside/index.vue @@ -18,8 +18,9 @@ @@ -37,34 +38,43 @@ const { isCollapse } = storeToRefs(store) const router = useRoute() const activePath = ref(router.path) + const version = import.meta.env.VITE_APP_VERSION diff --git a/kyuubi-server/web-ui/src/layout/components/aside/types.ts b/kyuubi-server/web-ui/src/layout/components/aside/types.ts index a7e495e187c..76bb1f387c6 100644 --- a/kyuubi-server/web-ui/src/layout/components/aside/types.ts +++ b/kyuubi-server/web-ui/src/layout/components/aside/types.ts @@ -43,61 +43,14 @@ export const MENUS = [ } ] }, - { - label: 'Workload', - icon: 'List', - children: [ - { - label: 'Analysis', - icon: 'VideoPlay', - router: '/workload/analysis' - }, - { - label: 'Queue', - icon: 'Select', - router: '/workload/queue' - }, - { - label: 'Session', - icon: 'Select', - router: '/workload/session' - }, - { - label: 'Query', - icon: 'Select', - router: '/workload/query' - } - ] - }, - { - label: 'Operation', - icon: 'List', - children: [ - { - label: 'Running Jobs', - icon: 'VideoPlay', - router: '/operation/runningJobs' - }, - { - label: 'Completed Jobs', - icon: 'Select', - router: '/operation/completedJobs' - } - ] - }, { label: 'Swagger', icon: 'List', router: '/swagger' }, { - label: 'Contact Us', - icon: 'PhoneFilled', - router: '/contact' - }, - { - label: 'SQL Lab', + label: 'SQL Editor', icon: 'Cpu', - router: '/lab' + router: '/editor' } ] diff --git a/kyuubi-server/web-ui/src/locales/en_US/index.ts b/kyuubi-server/web-ui/src/locales/en_US/index.ts index 8606c74da6f..9bb0144ff40 100644 --- a/kyuubi-server/web-ui/src/locales/en_US/index.ts +++ b/kyuubi-server/web-ui/src/locales/en_US/index.ts @@ -37,6 +37,11 @@ export default { engine_ui: 'Engine UI', failure_reason: 'Failure Reason', session_properties: 'Session Properties', + no_data: 'No data', + no_log: 'No log', + run_sql_tips: 'Run a SQL to get result', + result: 'Result', + log: 'Log', operation: { text: 'Operation', delete_confirm: 'Delete Confirm', @@ -44,7 +49,8 @@ export default { cancel_confirm: 'Cancel Confirm', close: 'Close', cancel: 'Cancel', - delete: 'Delete' + delete: 'Delete', + run: 'Run' }, message: { delete_succeeded: 'Delete {name} Succeeded', @@ -52,6 +58,10 @@ export default { close_succeeded: 'Close {name} Succeeded', close_failed: 'Close {name} Failed', cancel_succeeded: 'Cancel {name} Succeeded', - cancel_failed: 'Cancel {name} Failed' + cancel_failed: 'Cancel {name} Failed', + run_sql_failed: 'Run SQL Failed', + get_sql_log_failed: 'Get SQL Log Failed', + get_sql_result_failed: 'Get SQL Result Failed', + get_sql_metadata_failed: 'Get SQL Metadata Failed' } } diff --git a/kyuubi-server/web-ui/src/locales/zh_CN/index.ts b/kyuubi-server/web-ui/src/locales/zh_CN/index.ts index 0c4cb66db34..198f379eccb 100644 --- a/kyuubi-server/web-ui/src/locales/zh_CN/index.ts +++ b/kyuubi-server/web-ui/src/locales/zh_CN/index.ts @@ -37,6 +37,11 @@ export default { engine_ui: 'Engine UI', failure_reason: 'ๅคฑ่ดฅๅŽŸๅ› ', session_properties: 'Session ๅ‚ๆ•ฐ', + no_data: 'ๆ— ๆ•ฐๆฎ', + no_log: 'ๆ— ๆ—ฅๅฟ—', + run_sql_tips: '่ฏท่ฟ่กŒSQL่Žทๅ–็ป“ๆžœ', + result: '็ป“ๆžœ', + log: 'ๆ—ฅๅฟ—', operation: { text: 'ๆ“ไฝœ', delete_confirm: '็กฎ่ฎคๅˆ ้™ค', @@ -44,7 +49,8 @@ export default { cancel_confirm: '็กฎ่ฎคๅ–ๆถˆ', close: 'ๅ…ณ้—ญ', cancel: 'ๅ–ๆถˆ', - delete: 'ๅˆ ้™ค' + delete: 'ๅˆ ้™ค', + run: '่ฟ่กŒ' }, message: { delete_succeeded: 'ๅˆ ้™ค {name} ๆˆๅŠŸ', @@ -52,6 +58,10 @@ export default { close_succeeded: 'ๅ…ณ้—ญ {name} ๆˆๅŠŸ', close_failed: 'ๅ…ณ้—ญ {name} ๅคฑ่ดฅ', cancel_succeeded: 'ๅ–ๆถˆ {name} ๆˆๅŠŸ', - cancel_failed: 'ๅ–ๆถˆ {name} ๅคฑ่ดฅ' + cancel_failed: 'ๅ–ๆถˆ {name} ๅคฑ่ดฅ', + run_sql_failed: '่ฟ่กŒSQLๅคฑ่ดฅ', + get_sql_log_failed: '่Žทๅ–SQLๆ—ฅๅฟ—ๅคฑ่ดฅ', + get_sql_result_failed: '่Žทๅ–SQL็ป“ๆžœๅคฑ่ดฅ', + get_sql_metadata_failed: '่Žทๅ–SQLๅ…ƒๆ•ฐๆฎๅคฑ่ดฅ' } } diff --git a/kyuubi-server/web-ui/src/router/lab/index.ts b/kyuubi-server/web-ui/src/router/editor/index.ts similarity index 89% rename from kyuubi-server/web-ui/src/router/lab/index.ts rename to kyuubi-server/web-ui/src/router/editor/index.ts index d78838079bf..9d4df889cca 100644 --- a/kyuubi-server/web-ui/src/router/lab/index.ts +++ b/kyuubi-server/web-ui/src/router/editor/index.ts @@ -17,9 +17,9 @@ const routes = [ { - path: '/lab', - name: 'lab', - component: () => import('@/views/lab/index.vue') + path: '/editor', + name: 'editor', + component: () => import('@/views/editor/index.vue') } ] diff --git a/kyuubi-server/web-ui/src/router/index.ts b/kyuubi-server/web-ui/src/router/index.ts index c59c5f28c7b..7bbe344460e 100644 --- a/kyuubi-server/web-ui/src/router/index.ts +++ b/kyuubi-server/web-ui/src/router/index.ts @@ -17,13 +17,10 @@ import { createRouter, createWebHistory } from 'vue-router' import overviewRoutes from './overview' -import workloadRoutes from './workload' -import operationRoutes from './operation' -import contactRoutes from './contact' import managementRoutes from './management' import detailRoutes from './detail' import swaggerRoutes from './swagger' -import labRoutes from './lab' +import editorRoutes from './editor' const routes = [ { @@ -40,13 +37,10 @@ const routes = [ redirect: 'overview', children: [ ...overviewRoutes, - ...workloadRoutes, - ...operationRoutes, ...managementRoutes, ...detailRoutes, ...swaggerRoutes, - ...contactRoutes, - ...labRoutes + ...editorRoutes ] } ] diff --git a/kyuubi-server/web-ui/src/router/workload/index.ts b/kyuubi-server/web-ui/src/router/workload/index.ts deleted file mode 100644 index 7d7b91a47e5..00000000000 --- a/kyuubi-server/web-ui/src/router/workload/index.ts +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -const routes = [ - { - path: '/workload/analysis', - name: 'workload-analysis', - component: () => import('@/views/workload/analysis/index.vue') - }, - { - path: '/workload/queue', - name: 'workload-queue', - component: () => import('@/views/workload/queue/index.vue') - }, - { - path: '/workload/session', - name: 'workload-session', - component: () => import('@/views/workload/session/index.vue') - }, - { - path: '/workload/query', - name: 'workload-query', - component: () => import('@/views/workload/query/index.vue') - } -] - -export default routes diff --git a/kyuubi-server/web-ui/src/test/unit/views/layout/aside.spec.ts b/kyuubi-server/web-ui/src/test/unit/views/layout/aside.spec.ts index 3e31535aef8..b7999ad173d 100644 --- a/kyuubi-server/web-ui/src/test/unit/views/layout/aside.spec.ts +++ b/kyuubi-server/web-ui/src/test/unit/views/layout/aside.spec.ts @@ -34,5 +34,5 @@ test('mount component', () => { plugins: [mockRouter, getStore()] } }) - expect(wrapper.text()).toContain('Apache Kyuubi Dashboard') + expect(wrapper.text()).toContain(import.meta.env.VITE_APP_VERSION) }) diff --git a/kyuubi-server/web-ui/src/views/editor/components/Editor.vue b/kyuubi-server/web-ui/src/views/editor/components/Editor.vue new file mode 100644 index 00000000000..21faee5a33a --- /dev/null +++ b/kyuubi-server/web-ui/src/views/editor/components/Editor.vue @@ -0,0 +1,290 @@ + + + + + + + diff --git a/kyuubi-server/web-ui/src/views/workload/analysis/index.vue b/kyuubi-server/web-ui/src/views/editor/components/Log.vue similarity index 55% rename from kyuubi-server/web-ui/src/views/workload/analysis/index.vue rename to kyuubi-server/web-ui/src/views/editor/components/Log.vue index 31b42d46ede..d2a403d9e54 100644 --- a/kyuubi-server/web-ui/src/views/workload/analysis/index.vue +++ b/kyuubi-server/web-ui/src/views/editor/components/Log.vue @@ -17,13 +17,40 @@ --> - - + diff --git a/kyuubi-server/web-ui/src/views/editor/components/Result.vue b/kyuubi-server/web-ui/src/views/editor/components/Result.vue new file mode 100644 index 00000000000..de103ff13af --- /dev/null +++ b/kyuubi-server/web-ui/src/views/editor/components/Result.vue @@ -0,0 +1,144 @@ + + + + + + + diff --git a/kyuubi-server/web-ui/src/views/editor/components/types.ts b/kyuubi-server/web-ui/src/views/editor/components/types.ts new file mode 100644 index 00000000000..42475bf4ae8 --- /dev/null +++ b/kyuubi-server/web-ui/src/views/editor/components/types.ts @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +interface IResponse { + identifier: string +} + +interface ISqlResult { + dataName?: string + dataType: string + value: any +} + +interface IFields { + fields: ISqlResult[] +} + +interface ILog { + logRowSet: string[] + rowCount: number +} + +interface IErrorMessage { + title: string + description: string +} + +interface IError extends Error { + response?: { + data?: { + message?: string + } + } +} + +export { IResponse, ISqlResult, IFields, ILog, IErrorMessage, IError } diff --git a/kyuubi-server/web-ui/src/views/editor/index.vue b/kyuubi-server/web-ui/src/views/editor/index.vue new file mode 100644 index 00000000000..424d3e929c8 --- /dev/null +++ b/kyuubi-server/web-ui/src/views/editor/index.vue @@ -0,0 +1,141 @@ + + + + + + + diff --git a/kyuubi-server/web-ui/src/views/editor/styles/shared-styles.scss b/kyuubi-server/web-ui/src/views/editor/styles/shared-styles.scss new file mode 100644 index 00000000000..9027ef69a3a --- /dev/null +++ b/kyuubi-server/web-ui/src/views/editor/styles/shared-styles.scss @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@mixin sharedNoData { + position: absolute; + left: 50%; + top: 50%; + transform: translate(-50%, -50%); + font-size: 14px; + color: #999; + font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; + text-align: center; +} \ No newline at end of file diff --git a/kyuubi-server/web-ui/src/views/lab/index.vue b/kyuubi-server/web-ui/src/views/lab/index.vue deleted file mode 100644 index 26ecfac0d87..00000000000 --- a/kyuubi-server/web-ui/src/views/lab/index.vue +++ /dev/null @@ -1,64 +0,0 @@ - - - - - - - diff --git a/kyuubi-server/web-ui/src/views/operation/completedJobs/index.vue b/kyuubi-server/web-ui/src/views/operation/completedJobs/index.vue deleted file mode 100644 index 7b587c4fa6e..00000000000 --- a/kyuubi-server/web-ui/src/views/operation/completedJobs/index.vue +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - diff --git a/kyuubi-server/web-ui/src/views/operation/runningJobs/index.vue b/kyuubi-server/web-ui/src/views/operation/runningJobs/index.vue deleted file mode 100644 index 030b48ae9d8..00000000000 --- a/kyuubi-server/web-ui/src/views/operation/runningJobs/index.vue +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - diff --git a/kyuubi-server/web-ui/src/views/overview/index.vue b/kyuubi-server/web-ui/src/views/overview/index.vue index 3a2334c8e6b..a363d0e9560 100644 --- a/kyuubi-server/web-ui/src/views/overview/index.vue +++ b/kyuubi-server/web-ui/src/views/overview/index.vue @@ -20,7 +20,7 @@
    - +
    @@ -30,20 +30,7 @@ import { reactive } from 'vue' import cCard from '@/components/card/index.vue' - const cards = reactive([ - { - title: 'Opened Session', - value: 1 - }, - { - title: 'ExecPool Size', - value: 2 - }, - { - title: 'ExecPool ActiveCount', - value: 3 - } - ]) + const cards = reactive([]) diff --git a/kyuubi-server/web-ui/src/views/swagger/index.vue b/kyuubi-server/web-ui/src/views/swagger/index.vue index 7f8fb7f99a8..1ff671d1f14 100644 --- a/kyuubi-server/web-ui/src/views/swagger/index.vue +++ b/kyuubi-server/web-ui/src/views/swagger/index.vue @@ -18,7 +18,6 @@ diff --git a/kyuubi-server/web-ui/src/views/workload/query/index.vue b/kyuubi-server/web-ui/src/views/workload/query/index.vue deleted file mode 100644 index 45d0cd91b42..00000000000 --- a/kyuubi-server/web-ui/src/views/workload/query/index.vue +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - diff --git a/kyuubi-server/web-ui/src/views/workload/queue/index.vue b/kyuubi-server/web-ui/src/views/workload/queue/index.vue deleted file mode 100644 index bbeb8e985e9..00000000000 --- a/kyuubi-server/web-ui/src/views/workload/queue/index.vue +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - diff --git a/kyuubi-server/web-ui/src/views/workload/session/index.vue b/kyuubi-server/web-ui/src/views/workload/session/index.vue deleted file mode 100644 index bd4ec51d58e..00000000000 --- a/kyuubi-server/web-ui/src/views/workload/session/index.vue +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - diff --git a/kyuubi-util-scala/src/main/scala/org/apache/kyuubi/util/command/CommandLineUtils.scala b/kyuubi-util-scala/src/main/scala/org/apache/kyuubi/util/command/CommandLineUtils.scala new file mode 100644 index 00000000000..91327223a60 --- /dev/null +++ b/kyuubi-util-scala/src/main/scala/org/apache/kyuubi/util/command/CommandLineUtils.scala @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.util.command + +import java.io.File + +import scala.util.matching.Regex + +object CommandLineUtils { + val CONF = "--conf" + + val PATTERN_FOR_KEY_VALUE_ARG: Regex = "(.+?)=(.+)".r + + val REDACTION_REPLACEMENT_TEXT = "*********(redacted)" + + /** + * The Java command's option name for classpath + */ + val CP = "-cp" + + /** + * Assemble key value pair with "=" seperator + */ + def genKeyValuePair(key: String, value: String): String = s"$key=$value".trim + + /** + * Assemble key value pair with config option prefix + */ + def confKeyValue(key: String, value: String, confOption: String = CONF): Iterable[String] = + Seq(confOption, genKeyValuePair(key, value)) + + def confKeyValueStr(key: String, value: String, confOption: String = CONF): String = + confKeyValue(key, value, confOption).mkString(" ") + + def confKeyValues(configs: Iterable[(String, String)]): Iterable[String] = + configs.flatMap { case (k, v) => confKeyValue(k, v) }.toSeq + + /** + * Generate classpath option by assembling the classpath entries with "-cp" prefix + */ + def genClasspathOption(classpathEntries: Iterable[String]): Iterable[String] = + Seq(CP, classpathEntries.mkString(File.pathSeparator)) + + /** + * Match the conf string in the form of "key=value" + * and redact the value with the replacement text if keys are contained in given config keys + */ + def redactConfValues( + commands: Iterable[String], + redactKeys: Iterable[String]): Iterable[String] = { + redactKeys.toSet match { + case redactKeySet if redactKeySet.isEmpty => commands + case redactKeySet => commands.map { + case PATTERN_FOR_KEY_VALUE_ARG(key, _) if redactKeySet.contains(key) => + genKeyValuePair(key, REDACTION_REPLACEMENT_TEXT) + case part => part + } + } + } +} diff --git a/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/GlutenTest.java b/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/GlutenTest.java new file mode 100644 index 00000000000..8620df4b95a --- /dev/null +++ b/kyuubi-util-scala/src/test/java/org/apache/kyuubi/tags/GlutenTest.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kyuubi.tags; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; +import org.scalatest.TagAnnotation; + +@TagAnnotation +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.METHOD, ElementType.TYPE}) +public @interface GlutenTest {} diff --git a/kyuubi-util-scala/src/test/scala/org/apache/kyuubi/util/AssertionUtils.scala b/kyuubi-util-scala/src/test/scala/org/apache/kyuubi/util/AssertionUtils.scala index 9d33993b9d2..fc7d0db7ab9 100644 --- a/kyuubi-util-scala/src/test/scala/org/apache/kyuubi/util/AssertionUtils.scala +++ b/kyuubi-util-scala/src/test/scala/org/apache/kyuubi/util/AssertionUtils.scala @@ -17,9 +17,10 @@ package org.apache.kyuubi.util import java.nio.charset.StandardCharsets -import java.nio.file.Path +import java.nio.file.{Files, Path} import java.util.Locale +import scala.collection.JavaConverters._ import scala.collection.Traversable import scala.io.Source import scala.reflect.ClassTag @@ -29,6 +30,8 @@ import org.scalactic.Prettifier import org.scalactic.source.Position import org.scalatest.Assertions._ +import org.apache.kyuubi.util.GoldenFileUtils.getLicenceContent + object AssertionUtils { def assertEqualsIgnoreCase(expected: AnyRef)(actual: AnyRef)( @@ -106,6 +109,24 @@ object AssertionUtils { } } + def assertFileContentSorted( + filePath: Path, + headerSkipPrefix: String = "#", + licenceHeader: Iterable[String] = getLicenceContent(), + distinct: Boolean = true): Unit = { + val sortedLines = Files.readAllLines(filePath).asScala + .dropWhile(line => line.trim == "" || line.trim.startsWith(headerSkipPrefix)) + .map(_.trim).filter(_.nonEmpty) + .sorted + val expectedSortedLines = if (distinct) { + sortedLines.distinct + } else { + sortedLines + } + val expectedLines = licenceHeader ++ Seq("") ++ expectedSortedLines + assertFileContent(filePath, expectedLines, s"Check SPI provider file sorted $filePath") + } + /** * Assert the iterable contains all the expected elements */ @@ -151,7 +172,7 @@ object AssertionUtils { /** * Asserts that the given function throws an exception of the given type - * and with the exception message equals to expected string + * and with the exception message contains expected string */ def interceptContains[T <: Exception](f: => Any)(contained: String)(implicit classTag: ClassTag[T], @@ -160,4 +181,16 @@ object AssertionUtils { val exception = intercept[T](f)(classTag, pos) assert(exception.getMessage.contains(contained)) } + + /** + * Asserts that the given function throws an exception of the given type + * and with the exception message ends with expected string + */ + def interceptEndsWith[T <: Exception](f: => Any)(end: String)(implicit + classTag: ClassTag[T], + pos: Position): Unit = { + assert(end != null) + val exception = intercept[T](f)(classTag, pos) + assert(exception.getMessage.endsWith(end)) + } } diff --git a/kyuubi-util-scala/src/test/scala/org/apache/kyuubi/util/GoldenFileUtils.scala b/kyuubi-util-scala/src/test/scala/org/apache/kyuubi/util/GoldenFileUtils.scala index e9927f7e23e..0ab292c9ced 100644 --- a/kyuubi-util-scala/src/test/scala/org/apache/kyuubi/util/GoldenFileUtils.scala +++ b/kyuubi-util-scala/src/test/scala/org/apache/kyuubi/util/GoldenFileUtils.scala @@ -48,4 +48,34 @@ object GoldenFileUtils { assertFileContent(path, lines, regenScript) } } + + def getCurrentModuleHome(obj: Any): String = { + obj.getClass.getProtectionDomain.getCodeSource.getLocation.getPath + .split("target").head + } + + val apacheLicenceContent: String = + """ Licensed to the Apache Software Foundation (ASF) under one or more + | contributor license agreements. See the NOTICE file distributed with + | this work for additional information regarding copyright ownership. + | The ASF licenses this file to You under the Apache License, Version 2.0 + | (the "License"); you may not use this file except in compliance with + | the License. You may obtain a copy of the License at + | + | http://www.apache.org/licenses/LICENSE-2.0 + | + | Unless required by applicable law or agreed to in writing, software + | distributed under the License is distributed on an "AS IS" BASIS, + | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + | See the License for the specific language governing permissions and + | limitations under the License. + |""".stripMargin + + def getLicenceContent( + header: String = "#", + linePrefix: String = "#", + footer: String = "#"): Iterable[String] = { + val content = apacheLicenceContent.split("\n").map(line => linePrefix + line) + Seq(header) ++ content ++ Seq(footer) + } } diff --git a/kyuubi-util-scala/src/test/scala/org/apache/kyuubi/util/command/CommandUtilsSuite.scala b/kyuubi-util-scala/src/test/scala/org/apache/kyuubi/util/command/CommandUtilsSuite.scala new file mode 100644 index 00000000000..e000e7478b6 --- /dev/null +++ b/kyuubi-util-scala/src/test/scala/org/apache/kyuubi/util/command/CommandUtilsSuite.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kyuubi.util.command +// scalastyle:off +import org.scalatest.funsuite.AnyFunSuite + +import org.apache.kyuubi.util.AssertionUtils._ +import org.apache.kyuubi.util.command.CommandLineUtils._ + +// scalastyle:off +class CommandUtilsSuite extends AnyFunSuite { +// scalastyle:on + + test("assemble key value pair") { + assertResult("abc=123")(genKeyValuePair("abc", "123")) + assertResult("abc=123")(genKeyValuePair(" abc", "123 ")) + assertResult("abc.def=xyz.123")(genKeyValuePair("abc.def", "xyz.123")) + + assertMatches(genKeyValuePair("abc", "123"), PATTERN_FOR_KEY_VALUE_ARG) + assertMatches(genKeyValuePair(" abc", "123 "), PATTERN_FOR_KEY_VALUE_ARG) + assertMatches(genKeyValuePair("abc.def", "xyz.123"), PATTERN_FOR_KEY_VALUE_ARG) + } + + test("assemble key value pair with config option") { + assertResult("--conf abc=123")(confKeyValueStr("abc", "123")) + assertResult("--conf abc.def=xyz.123")(confKeyValueStr("abc.def", "xyz.123")) + + assertResult(Seq("--conf", "abc=123"))(confKeyValue("abc", "123")) + assertResult(Seq("--conf", "abc.def=xyz.123"))(confKeyValue("abc.def", "xyz.123")) + } + + test("assemble classpath options") { + assertResult(Seq("-cp", "/path/a.jar:/path2/b*.jar"))( + genClasspathOption(Seq("/path/a.jar", "/path2/b*.jar"))) + } +} diff --git a/kyuubi-zookeeper/pom.xml b/kyuubi-zookeeper/pom.xml index c4309fbab9b..eec201c2c61 100644 --- a/kyuubi-zookeeper/pom.xml +++ b/kyuubi-zookeeper/pom.xml @@ -38,7 +38,7 @@ org.apache.kyuubi - ${kyuubi-shaded-zookeeper.artifacts} + ${kyuubi-relocated-zookeeper.artifacts} diff --git a/kyuubi-zookeeper/src/main/scala/org/apache/kyuubi/zookeeper/EmbeddedZookeeper.scala b/kyuubi-zookeeper/src/main/scala/org/apache/kyuubi/zookeeper/EmbeddedZookeeper.scala index 17caffedff6..1592d906313 100644 --- a/kyuubi-zookeeper/src/main/scala/org/apache/kyuubi/zookeeper/EmbeddedZookeeper.scala +++ b/kyuubi-zookeeper/src/main/scala/org/apache/kyuubi/zookeeper/EmbeddedZookeeper.scala @@ -19,9 +19,10 @@ package org.apache.kyuubi.zookeeper import java.io.File import java.net.InetSocketAddress +import java.nio.file.Paths import org.apache.kyuubi.Utils._ -import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.config.{ConfigEntry, KyuubiConf} import org.apache.kyuubi.service.{AbstractService, ServiceState} import org.apache.kyuubi.shaded.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer} import org.apache.kyuubi.zookeeper.ZookeeperConf._ @@ -37,8 +38,9 @@ class EmbeddedZookeeper extends AbstractService("EmbeddedZookeeper") { private var host: String = _ override def initialize(conf: KyuubiConf): Unit = synchronized { - dataDirectory = new File(conf.get(ZK_DATA_DIR)) - dataLogDirectory = new File(conf.get(ZK_DATA_LOG_DIR)) + dataDirectory = resolvePathIfRelative(conf, ZK_DATA_DIR) + dataLogDirectory = resolvePathIfRelative(conf, ZK_DATA_LOG_DIR) + val clientPort = conf.get(ZK_CLIENT_PORT) val tickTime = conf.get(ZK_TICK_TIME) val maxClientCnxns = conf.get(ZK_MAX_CLIENT_CONNECTIONS) @@ -93,4 +95,10 @@ class EmbeddedZookeeper extends AbstractService("EmbeddedZookeeper") { assert(zks != null, s"$getName is in $getServiceState") s"$host:${serverFactory.getLocalPort}" } + + def resolvePathIfRelative(conf: KyuubiConf, configEntry: ConfigEntry[String]): File = { + val dirFromConfig = conf.get(configEntry) + Paths.get(sys.env.getOrElse(KyuubiConf.KYUUBI_HOME, ".")).resolve(dirFromConfig).toFile + } + } diff --git a/kyuubi-zookeeper/src/main/scala/org/apache/kyuubi/zookeeper/ZookeeperConf.scala b/kyuubi-zookeeper/src/main/scala/org/apache/kyuubi/zookeeper/ZookeeperConf.scala index 6ef494896a3..9b0844e6921 100644 --- a/kyuubi-zookeeper/src/main/scala/org/apache/kyuubi/zookeeper/ZookeeperConf.scala +++ b/kyuubi-zookeeper/src/main/scala/org/apache/kyuubi/zookeeper/ZookeeperConf.scala @@ -31,7 +31,8 @@ object ZookeeperConf { @deprecated("using kyuubi.zookeeper.embedded.data.dir instead", since = "1.2.0") val EMBEDDED_ZK_TEMP_DIR: ConfigEntry[String] = buildConf("kyuubi.zookeeper.embedded.directory") - .doc("The temporary directory for the embedded ZooKeeper server") + .doc("The temporary directory for the embedded ZooKeeper server. " + + "If it is a relative path, it is resolved relative to KYUUBI_HOME. ") .version("1.0.0") .stringConf .createWithDefault("embedded_zookeeper") @@ -58,12 +59,14 @@ object ZookeeperConf { val ZK_DATA_DIR: ConfigEntry[String] = buildConf("kyuubi.zookeeper.embedded.data.dir") .doc("dataDir for the embedded zookeeper server where stores the in-memory database" + - " snapshots and, unless specified otherwise, the transaction log of updates to the database.") + " snapshots and, unless specified otherwise, the transaction log of updates to the" + + " database. If it is a relative path, it is resolved relative to KYUUBI_HOME.") .version("1.2.0") .fallbackConf(EMBEDDED_ZK_TEMP_DIR) val ZK_DATA_LOG_DIR: ConfigEntry[String] = buildConf("kyuubi.zookeeper.embedded.data.log.dir") - .doc("dataLogDir for the embedded ZooKeeper server where writes the transaction log .") + .doc("dataLogDir for the embedded ZooKeeper server where writes the transaction log. " + + "If it is a relative path, it is resolved relative to KYUUBI_HOME.") .version("1.2.0") .fallbackConf(ZK_DATA_DIR) diff --git a/kyuubi-zookeeper/src/test/scala/org/apache/kyuubi/zookeeper/EmbeddedZookeeperSuite.scala b/kyuubi-zookeeper/src/test/scala/org/apache/kyuubi/zookeeper/EmbeddedZookeeperSuite.scala index 69e798ac538..8e1abda4feb 100644 --- a/kyuubi-zookeeper/src/test/scala/org/apache/kyuubi/zookeeper/EmbeddedZookeeperSuite.scala +++ b/kyuubi-zookeeper/src/test/scala/org/apache/kyuubi/zookeeper/EmbeddedZookeeperSuite.scala @@ -22,7 +22,7 @@ import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.shaded.curator.framework.CuratorFrameworkFactory import org.apache.kyuubi.shaded.curator.framework.imps.CuratorFrameworkState import org.apache.kyuubi.shaded.curator.retry.ExponentialBackoffRetry -import org.apache.kyuubi.zookeeper.ZookeeperConf.{ZK_CLIENT_PORT, ZK_CLIENT_PORT_ADDRESS} +import org.apache.kyuubi.zookeeper.ZookeeperConf.{ZK_CLIENT_PORT, ZK_CLIENT_PORT_ADDRESS, ZK_DATA_DIR, ZK_DATA_LOG_DIR} class EmbeddedZookeeperSuite extends KyuubiFunSuite { private var zkServer: EmbeddedZookeeper = _ @@ -64,4 +64,17 @@ class EmbeddedZookeeperSuite extends KyuubiFunSuite { zkServer.initialize(conf) assert(zkServer.getConnectString.contains("127.0.0.1")) } + + test("relative path from zookeeper config should be in kyuubi_home") { + zkServer = new EmbeddedZookeeper() + val conf = KyuubiConf() + .set(ZK_CLIENT_PORT, 0) + .set(ZK_DATA_LOG_DIR, "embedded_zookeeper_log") + .set(ZK_DATA_DIR, "/tmp/embedded_zookeeper_data") + + val dataDir = zkServer.resolvePathIfRelative(conf, ZK_DATA_DIR) + val dataLogDir = zkServer.resolvePathIfRelative(conf, ZK_DATA_LOG_DIR) + assert(dataDir.getAbsolutePath.equals("/tmp/embedded_zookeeper_data")) + assert(dataLogDir.getAbsolutePath.contains("/embedded_zookeeper_log")) + } } diff --git a/pom.xml b/pom.xml index b278d7be11c..f3b3d57644c 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.apache apache - 30 + 31 org.apache.kyuubi @@ -126,7 +126,7 @@ https://archive.apache.org/dist 2.3.0 1.67 - 4.2.8 + 4.2.23 1.5.0 1.15 3.2.2 @@ -173,22 +173,20 @@ 4.13.2 3.5.1 6.8.1 - kyuubi-shaded-zookeeper-34 - 0.1.0 + 0.2.0 + kyuubi-relocated-zookeeper-34 6.0.5 2.20.0 8.0.32 4.11.0 - - 4.1.93.Final + 4.1.100.Final 0.12.0 + 2.9.0 0.5.0-incubating ${spark.binary.version} 1.10.1 6.0.0 + 42.6.0 0.16.0 3.21.7 0.10.7 @@ -202,7 +200,7 @@ DO NOT forget to change the following properties when change the minor version of Spark: `delta.version`, `maven.plugin.scalatest.exclude.tags` --> - 3.4.1 + 3.4.2 3.4 spark-${spark.version}-bin-hadoop3${spark.archive.scala.suffix}.tgz @@ -233,7 +231,7 @@ ${project.build.directory}/scala-${scala.binary.version}/jars 3.3.0 - 1.6.8 + 1.7.1 1.6.1 1.12.1 @@ -243,7 +241,9 @@ false 2.30.0 - 0.8.7 + 3.2.1 + + 0.8.11 1.0.0 3.4.1 1.7.13 @@ -273,24 +273,6 @@ Apache Development Snapshot Repository https://repository.apache.org/content/repositories/snapshots - -XX:+IgnoreUnrecognizedVMOptions - --add-opens=java.base/java.lang=ALL-UNNAMED - --add-opens=java.base/java.lang.invoke=ALL-UNNAMED - --add-opens=java.base/java.lang.reflect=ALL-UNNAMED - --add-opens=java.base/java.io=ALL-UNNAMED - --add-opens=java.base/java.net=ALL-UNNAMED - --add-opens=java.base/java.nio=ALL-UNNAMED - --add-opens=java.base/java.util=ALL-UNNAMED - --add-opens=java.base/java.util.concurrent=ALL-UNNAMED - --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED - --add-opens=java.base/sun.nio.ch=ALL-UNNAMED - --add-opens=java.base/sun.nio.cs=ALL-UNNAMED - --add-opens=java.base/sun.security.action=ALL-UNNAMED - --add-opens=java.base/sun.security.tools.keytool=ALL-UNNAMED - --add-opens=java.base/sun.security.x509=ALL-UNNAMED - --add-opens=java.base/sun.util.calendar=ALL-UNNAMED - -Djdk.reflect.useDirectMethodHandle=false - -Dio.netty.tryReflectionSetAccessible=true -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005 @@ -298,8 +280,13 @@ org.apache.kyuubi - ${kyuubi-shaded-zookeeper.artifacts} - ${kyuubi-shaded-zookeeper.version} + kyuubi-relocated-hive-service-rpc + ${kyuubi-relocated.version} + + + org.apache.kyuubi + ${kyuubi-relocated-zookeeper.artifacts} + ${kyuubi-relocated.version} org.antlr @@ -581,6 +568,18 @@ ${testcontainers-scala.version} + + com.dimafeng + testcontainers-scala-mysql_${scala.binary.version} + ${testcontainers-scala.version} + + + + com.dimafeng + testcontainers-scala-postgresql_${scala.binary.version} + ${testcontainers-scala.version} + + com.dimafeng testcontainers-scala-trino_${scala.binary.version} @@ -1351,6 +1350,12 @@ ${phoenix.version} + + org.postgresql + postgresql + ${postgresql.version} + + org.apache.flink @@ -1401,6 +1406,12 @@ provided + + org.apache.flink + flink-table-planner-loader + ${flink.version} + + org.apache.flink flink-sql-client @@ -1697,7 +1708,7 @@ true false - ${extraJavaTestArgs} + ${maven.plugin.surefire.argLine} @@ -1709,7 +1720,7 @@ ${project.build.directory}/surefire-reports . TestSuite.txt - ${extraJavaTestArgs} + ${maven.plugin.surefire.argLine} ${project.build.directory}/work @@ -2054,11 +2065,6 @@ spotless-maven-plugin - - org.jacoco - jacoco-maven-plugin - - org.apache.maven.plugins maven-antrun-plugin @@ -2135,6 +2141,37 @@ + + java-17 + + 17 + + + 17 + + + ${java.version} + -XX:+IgnoreUnrecognizedVMOptions + --add-opens=java.base/java.lang=ALL-UNNAMED + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED + --add-opens=java.base/java.io=ALL-UNNAMED + --add-opens=java.base/java.net=ALL-UNNAMED + --add-opens=java.base/java.nio=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED + --add-opens=java.base/sun.nio.cs=ALL-UNNAMED + --add-opens=java.base/sun.security.action=ALL-UNNAMED + --add-opens=java.base/sun.security.tools.keytool=ALL-UNNAMED + --add-opens=java.base/sun.security.x509=ALL-UNNAMED + --add-opens=java.base/sun.util.calendar=ALL-UNNAMED + -Djdk.reflect.useDirectMethodHandle=false + -Dio.netty.tryReflectionSetAccessible=true + + + scala-2.13 @@ -2246,7 +2283,7 @@ delta-core 2.4.0 - 3.4.1 + 3.4.2 3.4 org.scalatest.tags.Slow @@ -2314,7 +2351,7 @@ zookeeper-3.6 - kyuubi-shaded-zookeeper-36 + kyuubi-relocated-zookeeper-36 @@ -2391,6 +2428,18 @@ + + codecov + + + + org.jacoco + jacoco-maven-plugin + + + + + apache-release