diff --git a/.github/workflows/build-clients.yml b/.github/workflows/build-clients.yml deleted file mode 100644 index 75eb02b9b32..00000000000 --- a/.github/workflows/build-clients.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Build clients for opencga - -on: - workflow_dispatch: - inputs: - branch: - description: "The branch, tag or SHA of the source code to build docker." - type: string - required: true - -jobs: - build-clients: - runs-on: ${{ vars.UBUNTU_VERSION }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: '10' - ref: ${{ github.event.inputs.branch }} - - name: Build clients - run: | - chmod +x client-builder.sh - ./client-builder.sh - - name: Upload clients folder as artifact - uses: actions/upload-artifact@v4 - with: - name: clients - path: build/dist \ No newline at end of file diff --git a/.github/workflows/check-junit-test.yml b/.github/workflows/check-junit-test.yml index 36710bbcdf4..5dcdca376d1 100644 --- a/.github/workflows/check-junit-test.yml +++ b/.github/workflows/check-junit-test.yml @@ -8,13 +8,14 @@ on: type: choice description: 'Hadoop flavour.' required: false - default: "hdp3.1" + default: "hdi5.1" options: - "all" - - "hdp3.1" + - "hbase2.0" - "hdi5.1" - "emr6.1" - "emr6.13" + - "emr7.5" module: type: choice description: 'OpenCGA module to test.' @@ -74,7 +75,7 @@ jobs: hadoop='["${{ inputs.hadoop }}"]' if [ "${{ inputs.hadoop }}" == "all" ]; then - hadoop='["hdp3.1", "hdi5.1", "emr6.1", "emr6.13"]' + hadoop='["hdi5.1", "emr6.1", "emr6.13", "emr7.5", "hbase2.0"]' elif [ "${{ inputs.module }}" == "all" ]; then # Only execute modules with matrix strategy if we are testing one single hadoop profile modules='["opencga-analysis", "opencga-app", "opencga-catalog", "opencga-client", "opencga-core", "opencga-master", "opencga-server", "opencga-storage", "opencga-test"]' @@ -102,4 +103,5 @@ jobs: module: ${{ matrix.module }} mvn_opts: ${{ inputs.mvn_opts }} sonar: false + upload-artifact: false secrets: inherit \ No newline at end of file diff --git a/.github/workflows/compare-vulnerabilities.yml b/.github/workflows/compare-vulnerabilities.yml index acc9f1120ae..661aea62f5b 100644 --- a/.github/workflows/compare-vulnerabilities.yml +++ b/.github/workflows/compare-vulnerabilities.yml @@ -18,4 +18,3 @@ jobs: branch_a: ${{ github.event.inputs.branch_a }} branch_b: ${{ github.event.inputs.branch_b }} secrets: inherit - diff --git a/.github/workflows/deploy-ext-tools.yml b/.github/workflows/deploy-ext-tools.yml index c0b5f573dfc..67ab89fe581 100644 --- a/.github/workflows/deploy-ext-tools.yml +++ b/.github/workflows/deploy-ext-tools.yml @@ -42,8 +42,3 @@ jobs: - name: Deploy in Docker Hub if: steps.check.outputs.exists == 'false' run: python3 ./opencga-app/app/cloud/docker/docker-build.py push --images ext-tools --tag ${{ steps.check.outputs.version }} --org opencb - - - - - diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 5492d997512..aca3c5c614a 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -11,26 +11,28 @@ jobs: build: uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop with: - maven_opts: -P storage-hadoop,${{ vars.HADOOP_FLAVOUR }} -Dopencga.war.name=opencga - + needs_hadoop_preparation: true + hadoop_flavour: ${{ vars.HADOOP_FLAVOUR }} + maven_opts: -P${{ vars.HADOOP_FLAVOUR }} -Dopencga.war.name=opencga -Dcheckstyle.skip + secrets: inherit deploy-maven: uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@develop needs: build with: - maven_opts: -P storage-hadoop,${{ vars.HADOOP_FLAVOUR }} -Dopencga.war.name=opencga -U -DskipTests + maven_opts: -P${{ vars.HADOOP_FLAVOUR }} -Dopencga.war.name=opencga secrets: inherit - deploy-zetta-docker: - uses: opencb/java-common-libs/.github/workflows/deploy-zetta-docker-hub-workflow.yml@develop + deploy-docker-workflow: + uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop needs: build with: - cli: python3 ./build/cloud/docker/docker-build.py push --images base --tag ${{ github.ref_name }} + cli: python3 ./build/cloud/docker/docker-build.py push --images workflow --tag "${{ needs.build.outputs.version }}" secrets: inherit - deploy-docker-workflow: - uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop + deploy-zetta-docker: + uses: opencb/java-common-libs/.github/workflows/deploy-zetta-docker-hub-workflow.yml@develop needs: build with: - cli: python3 ./build/cloud/docker/docker-build.py push --images workflow --tag ${{ github.ref_name }} + cli: python3 ./build/cloud/docker/docker-build.py push --images base,init --tag "${{ needs.build.outputs.version }}-${{ vars.HADOOP_FLAVOUR }}" secrets: inherit \ No newline at end of file diff --git a/.github/workflows/dockerfile-ext-tools.yml b/.github/workflows/dockerfile-ext-tools.yml index 2af929d2aec..d7fb4e9a770 100644 --- a/.github/workflows/dockerfile-ext-tools.yml +++ b/.github/workflows/dockerfile-ext-tools.yml @@ -8,6 +8,11 @@ on: jobs: build: uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop + with: + needs_hadoop_preparation: true + hadoop_flavour: ${{ vars.HADOOP_FLAVOUR }} + maven_opts: -P${{ vars.HADOOP_FLAVOUR }} -Dopencga.war.name=opencga -Dcheckstyle.skip + secrets: inherit deploy-docker-ext-tools: uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop diff --git a/.github/workflows/long-test-analysis.yml b/.github/workflows/long-test-analysis.yml index 10fdee5a0c3..e0a2071f928 100644 --- a/.github/workflows/long-test-analysis.yml +++ b/.github/workflows/long-test-analysis.yml @@ -12,10 +12,10 @@ jobs: strategy: fail-fast: false matrix: - hadoop: [ "hdp3.1", "hdi5.1", "emr6.1", "emr6.13" ] + hadoop: ["hdi5.1", "emr6.1", "emr6.13", "emr7.5", "hbase2.0"] uses: ./.github/workflows/test-analysis.yml with: test_profile: runShortTests,runMediumTests,runLongTests hadoop: ${{ matrix.hadoop }} + upload-artifact: false secrets: inherit - diff --git a/.github/workflows/manual-deploy-docker.yml b/.github/workflows/manual-deploy-docker.yml index ca488c4c144..f4416ba1294 100644 --- a/.github/workflows/manual-deploy-docker.yml +++ b/.github/workflows/manual-deploy-docker.yml @@ -10,46 +10,15 @@ on: description: "The tag for the new docker." type: string required: true - hadoop: - type: choice - description: 'Hadoop flavour. Any of: [hdp3.1, hdi5.1, emr6.1, emr6.13]' - required: false - default: hdp3.1 - options: - - hdp3.1 - - hdi5.1 - - emr6.1 - - emr6.13 jobs: build: - name: Build Java app - runs-on: ${{ vars.UBUNTU_VERSION }} - outputs: - version: ${{ steps.get_project_version.outputs.version }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: '10' - ref: "${{ inputs.branch }}" - - name: Set up JDK 8 - uses: actions/setup-java@v4 - with: - distribution: 'temurin' - java-version: '8' - cache: 'maven' - - name: Install dependencies branches - run: | - if [ -f "./.github/workflows/scripts/get_same_branch.sh" ]; then - chmod +x ./.github/workflows/scripts/get_same_branch.sh - ./.github/workflows/scripts/get_same_branch.sh ${{ github.ref_name }} ${{ inputs.hadoop }} - fi - - name: Maven Build (skip tests) - run: mvn -T 2 clean install -DskipTests -P${{ inputs.hadoop }} -Dopencga.war.name=opencga -Dcheckstyle.skip --no-transfer-progress -pl ':opencga-app' --also-make - - uses: actions/upload-artifact@v4 - with: - name: build-folder - path: build + uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@TASK-7809 + with: + needs_hadoop_preparation: true + hadoop_flavour: ${{ vars.HADOOP_FLAVOUR }} + maven_opts: -P${{ vars.HADOOP_FLAVOUR }} -Dopencga.war.name=opencga -Dcheckstyle.skip + secrets: inherit deploy-zetta-docker: uses: opencb/java-common-libs/.github/workflows/deploy-zetta-docker-hub-workflow.yml@develop diff --git a/.github/workflows/manual-deploy-ext-tools.yml b/.github/workflows/manual-deploy-ext-tools.yml index 637efe41f80..3c5607f1e85 100644 --- a/.github/workflows/manual-deploy-ext-tools.yml +++ b/.github/workflows/manual-deploy-ext-tools.yml @@ -10,41 +10,15 @@ on: description: "The tag for the new docker." type: string required: true - hadoop: - type: string - description: 'Hadoop flavour. Any of: [hdp3.1, hdi5.1, emr6.1, emr6.13]' - required: false - default: "hdp3.1" jobs: build: - name: Build Java app - runs-on: ${{ vars.UBUNTU_VERSION }} - outputs: - version: ${{ steps.get_project_version.outputs.version }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: '10' - ref: "${{ inputs.branch }}" - - name: Set up JDK 8 - uses: actions/setup-java@v4 - with: - distribution: 'temurin' - java-version: '8' - cache: 'maven' - - name: Install dependencies branches - run: | - if [ -f "./.github/workflows/scripts/get_same_branch.sh" ]; then - chmod +x ./.github/workflows/scripts/get_same_branch.sh - ./.github/workflows/scripts/get_same_branch.sh ${{ github.ref_name }} ${{ inputs.hadoop }} - fi - - name: Maven Build (skip tests) - run: mvn -T 2 clean install -DskipTests --no-transfer-progress - - uses: actions/upload-artifact@v4 - with: - name: build-folder - path: build + uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@TASK-7809 + with: + needs_hadoop_preparation: true + hadoop_flavour: ${{ vars.HADOOP_FLAVOUR }} + maven_opts: -P${{ vars.HADOOP_FLAVOUR }} -Dopencga.war.name=opencga -Dcheckstyle.skip + secrets: inherit deploy-docker-ext-tools: uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop diff --git a/.github/workflows/manual-deploy-python-notebook.yml b/.github/workflows/manual-deploy-python-notebook.yml index 04a5a719f77..fc7d1cce0b0 100644 --- a/.github/workflows/manual-deploy-python-notebook.yml +++ b/.github/workflows/manual-deploy-python-notebook.yml @@ -11,11 +11,6 @@ on: description: "The tag for the new docker." type: string required: true - hadoop: - type: string - description: 'Hadoop flavour. Any of: [hdp3.1, hdi5.1, emr6.1, emr6.13]' - required: false - default: "hdp3.1" pyopencga_version: type: string description: 'PyOpenCGA version.' @@ -23,33 +18,12 @@ on: jobs: build: - name: Build Java app - runs-on: ${{ vars.UBUNTU_VERSION }} - outputs: - version: ${{ steps.get_project_version.outputs.version }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: '10' - ref: "${{ inputs.branch }}" - - name: Set up JDK 8 - uses: actions/setup-java@v4 - with: - distribution: 'temurin' - java-version: '8' - cache: 'maven' - - name: Install dependencies branches - run: | - if [ -f "./.github/workflows/scripts/get_same_branch.sh" ]; then - chmod +x ./.github/workflows/scripts/get_same_branch.sh - ./.github/workflows/scripts/get_same_branch.sh ${{ github.ref_name }} ${{ inputs.hadoop }} - fi - - name: Maven Build (skip tests) - run: mvn -T 2 clean install -DskipTests - - uses: actions/upload-artifact@v4 - with: - name: build-folder - path: build + uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@TASK-7809 + with: + needs_hadoop_preparation: true + hadoop_flavour: ${{ vars.HADOOP_FLAVOUR }} + maven_opts: -P${{ vars.HADOOP_FLAVOUR }} -Dopencga.war.name=opencga -Dcheckstyle.skip + secrets: inherit deploy-docker-python-notebook: uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop diff --git a/.github/workflows/manual-deploy-r-builder.yml b/.github/workflows/manual-deploy-r-builder.yml index 99db0a9a6a6..721e05ab045 100644 --- a/.github/workflows/manual-deploy-r-builder.yml +++ b/.github/workflows/manual-deploy-r-builder.yml @@ -6,8 +6,10 @@ jobs: build: uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop with: - maven_opts: -Phdp3.1,RClient -Dopencga.war.name=opencga -Dcheckstyle.skip - build_folder: build- + needs_hadoop_preparation: true + hadoop_flavour: ${{ vars.HADOOP_FLAVOUR }} + maven_opts: -P${{ vars.HADOOP_FLAVOUR }} -Dopencga.war.name=opencga -Dcheckstyle.skip + secrets: inherit build-and-push-docker: runs-on: ubuntu-latest @@ -15,7 +17,6 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 - - uses: actions/download-artifact@v4 with: name: build-folder diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 081876a62a3..c3281d040f7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,50 +7,45 @@ on: workflow_dispatch: jobs: - # Add the build-hdp job - build-hdp: + # Add the build-hdi and deploy-docker-hdi jobs + build-hdi: uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop with: - maven_opts: -P storage-hadoop,${{ vars.HADOOP_FLAVOUR }},RClient,opencga-storage-hadoop-deps -Dopencga.war.name=opencga -Dcheckstyle.skip -pl ':opencga-app' --also-make - build_folder: build-folder + needs_hadoop_preparation: true + hadoop_flavour: hdi5.1 + maven_opts: -Phdi5.1 -Dopencga.war.name=opencga -Dcheckstyle.skip + build_folder: build-folder-hdi + secrets: inherit - # Add the deploy-zetta-docker job that depends on the build-hdp job - deploy-zetta-docker: + deploy-docker-hdi: uses: opencb/java-common-libs/.github/workflows/deploy-zetta-docker-hub-workflow.yml@develop - needs: build-hdp + needs: build-hdi with: - cli: python3 ./build/cloud/docker/docker-build.py push --images base --tag "${{ needs.build-hdp.outputs.version }}-hdp3.1" - build_folder: build-folder + cli: python3 ./build/cloud/docker/docker-build.py push --images base,init --tag "${{ needs.build-hdi.outputs.version }}-hdi5.1" --org zettagenomics + build_folder: build-folder-hdi secrets: inherit - # Add the deploy-maven and deploy-python jobs they depend on the build-hdp job - deploy-maven: - uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@develop - needs: build-hdp - with: - maven_opts: -P hdp3.1 -Dopencga.war.name=opencga - secrets: inherit check-pypi-version: runs-on: ubuntu-latest - needs: build-hdp + needs: build-hdi outputs: exists: ${{ steps.check.outputs.exists }} steps: - name: Check if python package exists on PyPI id: check run: | - if curl --silent --fail "https://pypi.org/pypi/pyopencga/${{ needs.build-hdp.outputs.version }}/json" > /dev/null; then + if curl --silent --fail "https://pypi.org/pypi/pyopencga/${{ needs.build-hdi.outputs.version }}/json" > /dev/null; then echo "exists=true" >> $GITHUB_OUTPUT - echo "The python package with version ${{ needs.build-hdp.outputs.version }} already exists on pypi" >> $GITHUB_STEP_SUMMARY + echo "The python package with version ${{ needs.build-hdi.outputs.version }} already exists on pypi" >> $GITHUB_STEP_SUMMARY else echo "exists=false" >> $GITHUB_OUTPUT - echo "The python package with version ${{ needs.build-hdp.outputs.version }} does not exist in pypi and must be pushed" >> $GITHUB_STEP_SUMMARY + echo "The python package with version ${{ needs.build-hdi.outputs.version }} does not exist in pypi and must be pushed" >> $GITHUB_STEP_SUMMARY fi deploy-python: needs: - - build-hdp + - build-hdi - check-pypi-version if: needs.check-pypi-version.outputs.exists == 'false' uses: opencb/java-common-libs/.github/workflows/deploy-python-workflow.yml@develop @@ -59,45 +54,42 @@ jobs: artifact: build-folder secrets: inherit + # Add the deploy-maven and deploy-python jobs they depend on the build-hdi job + deploy-maven: + uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@develop + needs: build-hdi + with: + maven_opts: -P hdi5.1 -Dopencga.war.name=opencga + secrets: inherit + # Add the release job that depends on all the previous jobs release: uses: opencb/java-common-libs/.github/workflows/release-github-workflow.yml@develop - needs: [ build-hdp, deploy-zetta-docker, deploy-maven, deploy-python, deploy-docker-python-notebook] + needs: [ build-hdi, deploy-docker-hdi, deploy-maven, deploy-python, deploy-docker-python-notebook] with: artifact: build-folder file: | - opencga-client-${{ needs.build-hdp.outputs.version }}.tar.gz - dist/*.* + opencga-client-${{ needs.build-hdi.outputs.version }}.tar.gz + clients/R/opencgaR_${{ needs.build-hdi.outputs.version }}.tar.gz deploy-docker-python-notebook: uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop - needs: [ build-hdp, deploy-python ] + needs: [ build-hdi, deploy-python ] with: - cli: python3 ./build/cloud/docker/docker-build.py push --images python-notebook --tag ${{ needs.build-hdp.outputs.version }} --docker-build-args "--build-arg VERSION=${{ needs.build-hdp.outputs.version }}" --org opencb + cli: python3 ./build/cloud/docker/docker-build.py push --images python-notebook --tag ${{ needs.build-hdi.outputs.version }} --docker-build-args "--build-arg VERSION=${{ needs.build-hdi.outputs.version }}" secrets: inherit - # Compile and deploy other hadoop flavours - # Add the build-hdi and deploy-docker-hdi jobs - build-hdi: - uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop - with: - maven_opts: -P hdi5.1-Dopencga.war.name=opencga -Dcheckstyle.skip - build_folder: build-folder-hdi - - deploy-docker-hdi: - uses: opencb/java-common-libs/.github/workflows/deploy-zetta-docker-hub-workflow.yml@develop - needs: build-hdi - with: - cli: python3 ./build/cloud/docker/docker-build.py push --images base,init --tag "${{ needs.build-hdi.outputs.version }}-hdi5.1" --org zettagenomics - build_folder: build-folder-hdi - secrets: inherit + # Compile and deploy other hadoop flavours # Add the build-emr and deploy-docker-emr jobs build-emr: uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop with: - maven_opts: -P emr6.1 -Dopencga.war.name=opencga -Dcheckstyle.skip + needs_hadoop_preparation: true + hadoop_flavour: emr6.1 + maven_opts: -Pemr6.1 -Dopencga.war.name=opencga -Dcheckstyle.skip build_folder: build-folder-emr + secrets: inherit deploy-docker-emr: uses: opencb/java-common-libs/.github/workflows/deploy-zetta-docker-hub-workflow.yml@develop @@ -111,8 +103,11 @@ jobs: build-emr613: uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop with: - maven_opts: -P emr6.13 -Dopencga.war.name=opencga -Dcheckstyle.skip + needs_hadoop_preparation: true + hadoop_flavour: emr6.13 + maven_opts: -Pemr6.13 -Dopencga.war.name=opencga -Dcheckstyle.skip build_folder: build-folder-emr613 + secrets: inherit deploy-docker-emr613: uses: opencb/java-common-libs/.github/workflows/deploy-zetta-docker-hub-workflow.yml@develop @@ -122,3 +117,21 @@ jobs: build_folder: build-folder-emr613 secrets: inherit +# Add more flavours if needed following the same pattern emr7.5 + build-emr75: + uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop + with: + needs_hadoop_preparation: true + hadoop_flavour: emr7.5 + maven_opts: -Pemr7.5 -Dopencga.war.name=opencga -Dcheckstyle.skip + build_folder: build-folder-emr75 + secrets: inherit + + deploy-docker-emr75: + uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop + needs: build-emr75 + with: + cli: python3 ./build/cloud/docker/docker-build.py push --images base,init --tag "${{ needs.build-emr613.outputs.version }}-emr7.5" + build_folder: build-folder-emr75 + secrets: inherit + diff --git a/.github/workflows/scripts/compile_same_branch.sh b/.github/workflows/scripts/compile_same_branch.sh new file mode 100755 index 00000000000..68032336bf6 --- /dev/null +++ b/.github/workflows/scripts/compile_same_branch.sh @@ -0,0 +1,19 @@ + +WORKSPACE=${WORKSPACE:-/home/runner/work/} + + +function compile() { + local REPO=$1 + if [ ! -d "${WORKSPACE}/$REPO" ]; then + echo "Directory ${WORKSPACE}/$REPO does not exist. Skip compile" + return 0; + fi + echo "::group::Compiling '$REPO' project from branch $BRANCH_NAME" + cd "${WORKSPACE}/$REPO" || exit 2 + mvn clean install -DskipTests --no-transfer-progress + echo "::endgroup::" +} + + +compile "java-common-libs" +compile "biodata" \ No newline at end of file diff --git a/.github/workflows/scripts/get_same_branch.sh b/.github/workflows/scripts/get_same_branch.sh old mode 100644 new mode 100755 index d03ed38e0f9..7d47eb99194 --- a/.github/workflows/scripts/get_same_branch.sh +++ b/.github/workflows/scripts/get_same_branch.sh @@ -1,30 +1,36 @@ #!/bin/bash BRANCH_NAME=$1 -HADOOP=${2:-hdp3.1} +DEPENDENCIES_SHA=${DEPENDENCIES_SHA:-""} +WORKSPACE=${WORKSPACE:-/home/runner/work/} if [[ -z "$BRANCH_NAME" ]]; then echo "The first parameter is mandatory and must be a valid branch name." exit 1 fi -function install(){ +function checkout(){ local REPO=$1 - cd /home/runner/work/ || exit 2 + echo "::group::Installing '$REPO' project from branch $BRANCH_NAME" + cd "${WORKSPACE}" || exit 2 git clone https://github.com/opencb/"$REPO".git -b "$BRANCH_NAME" if [ -d "./$REPO" ]; then cd "$REPO" || exit 2 + DEPENDENCIES_SHA=${DEPENDENCIES_SHA}:$(git rev-parse HEAD) echo "Branch name $BRANCH_NAME already exists." - if [[ "$REPO" == "opencga-hadoop-thirdparty" ]]; then - ./dev/build.sh "$HADOOP" - else - mvn clean install -DskipTests - fi else - echo "$CURRENT Branch is NOT EQUALS $BRANCH_NAME " + echo "Branch name $BRANCH_NAME does not exist in $REPO repository. Skipping installation." fi + echo "::endgroup::" } -install "java-common-libs" -install "biodata" -install "opencga-hadoop-thirdparty" \ No newline at end of file +checkout "java-common-libs" +checkout "biodata" + +## Apply sha1 to DEPENDENCIES_SHA if contains `:` +if [[ "$DEPENDENCIES_SHA" == *":"* ]]; then + DEPENDENCIES_SHA=$(echo -n "$DEPENDENCIES_SHA" | sha1sum | awk '{print $1}') +fi + +## Export DEPENDENCIES_SHA as github output +echo "dependencies_sha=$DEPENDENCIES_SHA" >> "$GITHUB_OUTPUT" \ No newline at end of file diff --git a/.github/workflows/scripts/prepare_hadoop.sh b/.github/workflows/scripts/prepare_hadoop.sh new file mode 100644 index 00000000000..92dbc7981a6 --- /dev/null +++ b/.github/workflows/scripts/prepare_hadoop.sh @@ -0,0 +1,108 @@ +#!/bin/bash + +set -e +set -o pipefail + + +function main() { + HADOOP_FLAVOUR="hbase2.0" + + while [[ $# -gt 0 ]]; do + key="$1" + value="$2" + case $key in + --hadoop-flavour) + HADOOP_FLAVOUR="$value" + shift # past argument + shift # past value + ;; + --hadoop-thirdparty-version) + HADOOP_THIRDPARTY_VERSION="$value" + shift # past argument + shift # past value + ;; + --verbose) + set -x + shift # past key + ;; + *) # unknown option + echo "Unknown option $key" + return 1 + ;; + esac + done + + if [ -z "$HADOOP_THIRDPARTY_VERSION" ]; then + HADOOP_THIRDPARTY_VERSION=$(mvn help:evaluate -Dexpression=opencga.hadoop.thirdparty.version -q -DforceStdout) + fi + + # Check if HADOOP_THIRDPARTY can be download + ARTIFACT="org.opencb.opencga.hadoop.thirdparty:opencga-hadoop-shaded-${HADOOP_FLAVOUR}:${HADOOP_THIRDPARTY_VERSION}" + echo "Looking for artifact:" + echo " - $ARTIFACT" + if mvn dependency:get "-Dartifact=${ARTIFACT}" &> /dev/null; then + echo "Hadoop thirdparty jar found." + return 0; + fi + + echo "Hadoop thirdparty jar not found in local maven repository. Building opencga-hadoop-thirdparty..." + local GIT_REF= + if [[ "$HADOOP_THIRDPARTY_VERSION" == *"-SNAPSHOT" ]]; then + local VERSION=$(echo "$HADOOP_THIRDPARTY_VERSION" | cut -d "-" -f 1) + local MAJOR=$(echo "$VERSION" | cut -d "." -f 1) + local MINOR=$(echo "$VERSION" | cut -d "." -f 2) + local PATCH=$(echo "$VERSION" | cut -d "." -f 3) + if [ $PATCH -gt 0 ]; then ## It's a hotfix + GIT_REF="release-$MAJOR.$MINOR.x" + elif [ $MINOR -eq 0 ]; then ## It's a develop branch + GIT_REF="develop" + else ## It's a release branch + GIT_REF="release-$MAJOR.x.x" + fi + else + GIT_REF="v$HADOOP_THIRDPARTY_VERSION" + fi + install "$GIT_REF" "$HADOOP_FLAVOUR" + if [ $? -ne 0 ]; then + echo "Failed to build opencga-hadoop-thirdparty." + return 1 + fi + + return 0; +} + + +function install(){ + local GIT_REF=${1:?"Git reference (branch, tag) is required"} + local HADOOP=${2:?"Hadoop flavour is required"} + echo "Installing $HADOOP hadoop flavour from $GIT_REF" + local REPO="opencga-hadoop-thirdparty" + local TMP_DIR_HOME="dependency-checkouts/" + mkdir -p "$TMP_DIR_HOME" + rm -rf "${TMP_DIR_HOME:?}"/* + TEMP_DIR="$(mktemp -d --tmpdir="$TMP_DIR_HOME" --suffix="$(date +%Y%m%d%H%M%S)-$REPO")" + cd "$TEMP_DIR" || return 2 + echo "Cloning repository $REPO with ref $GIT_REF" + + # Build HTTPS clone URL using optional token for private access + local CLONE_URL + if [[ -n "${THIRDPARTY_READ_TOKEN:-}" ]]; then + CLONE_URL="https://x-access-token:${THIRDPARTY_READ_TOKEN}@github.com/opencb/${REPO}.git" + else + CLONE_URL="git@github.com:opencb/${REPO}.git" + fi + + # Shallow clone at the requested ref + git clone --depth 1 -b "$GIT_REF" "$CLONE_URL" + PROJECT_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout) + echo "Cloned repository $REPO with ref $GIT_REF and version $PROJECT_VERSION" + cd "$REPO" || return 2 + ./dev/build.sh "$HADOOP" + cd - || return 2 +} + + + +main "$@" +exit $? + diff --git a/.github/workflows/task.yml b/.github/workflows/task.yml index ea57cbf7bc5..c30f682c36c 100644 --- a/.github/workflows/task.yml +++ b/.github/workflows/task.yml @@ -9,28 +9,31 @@ on: # WARNING Develop branch needed for prod jobs: - build: - uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop - with: - maven_opts: -Phdp3.1 -Dopencga.war.name=opencga -Dcheckstyle.skip test: uses: ./.github/workflows/test-analysis.yml - needs: build with: test_profile: runShortTests + upload-artifact: true + secrets: inherit + + deploy-docker: + uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop + needs: test + with: + cli: python3 ./build/cloud/docker/docker-build.py push --images workflow --tag "${{ needs.build.outputs.version }}" secrets: inherit deploy-zetta-docker: uses: opencb/java-common-libs/.github/workflows/deploy-zetta-docker-hub-workflow.yml@develop - needs: build + needs: test with: - cli: python3 ./build/cloud/docker/docker-build.py push --images base --tag ${{ github.ref_name }} + cli: python3 ./build/cloud/docker/docker-build.py push --images base,init --tag "${{ github.ref_name }}" # Use branch name as tag secrets: inherit deploy-docker-workflow: uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop - needs: build + needs: test with: cli: python3 ./build/cloud/docker/docker-build.py push --images workflow --tag ${{ github.ref_name }} secrets: inherit \ No newline at end of file diff --git a/.github/workflows/test-analysis.yml b/.github/workflows/test-analysis.yml index fe1f200588c..3258c723e60 100644 --- a/.github/workflows/test-analysis.yml +++ b/.github/workflows/test-analysis.yml @@ -10,9 +10,8 @@ on: required: true hadoop: type: string - description: 'Hadoop flavour. Any of: [hdp3.1, hdi5.1, emr6.1, emr6.13]' required: false - default: "hdp3.1" + default: "hdi5.1" mvn_opts: type: string required: false @@ -26,15 +25,31 @@ on: description: "Maven modules to test. Empty means all. Only top-level modules. Example: 'opencga-storage'" required: false default: "" + upload-artifact: + type: boolean + description: "Whether to upload the build artifact or not" + required: false + default: true secrets: SONAR_TOKEN: required: true jobs: + + build: + uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@TASK-7809 + with: + needs_hadoop_preparation: true + hadoop_flavour: ${{ inputs.hadoop }} + maven_opts: -P${{ inputs.hadoop }} -Dopencga.war.name=opencga -Dcheckstyle.skip + upload-artifact: ${{ inputs.upload-artifact }} + secrets: inherit + analysis: name: Execute Sonar Analysis runs-on: ${{ vars.UBUNTU_VERSION }} if: ${{ inputs.sonar }} + needs: build steps: - uses: actions/checkout@v4 with: @@ -45,14 +60,13 @@ jobs: distribution: 'temurin' java-version: '17' cache: 'maven' - - name: Install dependencies branches - run: | - if [ -f "./.github/workflows/scripts/get_same_branch.sh" ]; then - chmod +x ./.github/workflows/scripts/get_same_branch.sh - ./.github/workflows/scripts/get_same_branch.sh ${{ github.ref_name }} ${{ inputs.hadoop }} - else - echo "./.github/workflows/scripts/get_same_branch.sh does not exist." - fi + - name: Cache local Maven repository + uses: actions/cache/restore@v4 + with: + path: ~/.m2/repository + key: ${{ needs.build.outputs.cache-key }} + ## Force cache hit to avoid analyzing with incomplete dependencies + fail-on-cache-miss: true - name: Test and Analyze env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any @@ -63,6 +77,7 @@ jobs: test: name: Execute JUnit and Jacoco tests runs-on: ${{ vars.UBUNTU_VERSION }} + needs: build steps: - uses: actions/checkout@v4 with: @@ -73,14 +88,13 @@ jobs: distribution: 'temurin' java-version: '8' cache: 'maven' - - name: Install dependencies branches - run: | - if [ -f "./.github/workflows/scripts/get_same_branch.sh" ]; then - chmod +x ./.github/workflows/scripts/get_same_branch.sh - ./.github/workflows/scripts/get_same_branch.sh ${{ github.ref_name }} ${{ inputs.hadoop }} - else - echo "./.github/workflows/scripts/get_same_branch.sh does not exist." - fi + - name: Cache local Maven repository + uses: actions/cache/restore@v4 + with: + path: ~/.m2/repository + key: ${{ needs.build.outputs.cache-key }} + ## Force cache hit to avoid testing with incomplete dependencies + fail-on-cache-miss: true - name: Install Samtools run: sudo apt-get install samtools python3-deeptools - name: Start MongoDB v6.0 @@ -88,8 +102,6 @@ jobs: with: mongodb-version: 6.0 mongodb-replica-set: rs-test - - name: Maven build (skip tests) - run: mvn -B clean install -DskipTests -P ${{ inputs.hadoop }} -Dcheckstyle.skip ${{ inputs.mvn_opts }} - name: Build Junit log file name id: BuildJunitLogFileName run: | @@ -120,4 +132,3 @@ jobs: commit: '${{ github.sha }}' fail_on_test_failures: true fail_if_no_tests: false - diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/manager/operations/VariantFileIndexerOperationManagerTest.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/manager/operations/VariantFileIndexerOperationManagerTest.java index f11bbcf6976..c43a82983b8 100644 --- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/manager/operations/VariantFileIndexerOperationManagerTest.java +++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/manager/operations/VariantFileIndexerOperationManagerTest.java @@ -94,7 +94,7 @@ public void testIndexWithStats() throws Exception { assertEquals(500, getDefaultCohort(studyId).getSamples().size()); assertEquals(CohortStatus.NONE, getDefaultCohort(studyId).getInternal().getStatus().getId()); VariantSetStats stats = getVariantSetMetrics(getFile(0).getId()); - assertNotEquals(0, stats.getVariantCount().intValue()); + assertNotEquals(0, stats.getVariantCount()); variantManager.index(studyId, getFile(1).getId(), newTmpOutdir(), queryOptions, sessionId); assertEquals(1000, getDefaultCohort(studyId).getSamples().size()); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/templates/TemplateEntryIterator.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/templates/TemplateEntryIterator.java index 7906658cca1..81744adc1b2 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/templates/TemplateEntryIterator.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/templates/TemplateEntryIterator.java @@ -387,6 +387,21 @@ private void getDeclaredFields(Class clazz, String field, Map m if (declaredField.getType().getName().equals("org.apache.avro.Schema")) { continue; } + if (declaredField.getType().getName().equals("org.apache.avro.specific.SpecificData")) { + continue; + } + if (declaredField.getType().getName().equals("org.apache.avro.message.BinaryMessageEncoder")) { + continue; + } + if (declaredField.getType().getName().equals("org.apache.avro.message.BinaryMessageDecoder")) { + continue; + } + if (declaredField.getType().getName().equals("org.apache.avro.io.DatumReader")) { + continue; + } + if (declaredField.getType().getName().equals("org.apache.avro.io.DatumWriter")) { + continue; + } // Ignore jacoco custom fields if (declaredField.getName().equals("$jacocoData")) { continue; diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/AvroToAnnotationConverter.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/AvroToAnnotationConverter.java index 87f36ab22c9..b1811f015ce 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/AvroToAnnotationConverter.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/utils/AvroToAnnotationConverter.java @@ -95,25 +95,25 @@ private static Variable getVariable(Schema.Field field, Schema schema) { case BYTES: case FIXED: return new Variable(field.name(), field.name(), "", Variable.VariableType.STRING, - field.defaultValue() == null ? null : field.defaultValue().getTextValue(), + field.hasDefaultValue() ? field.defaultVal() : null, defaultRequired, false, null, null, field.pos(), null, getDoc(field), null, null); case INT: case LONG: return new Variable(field.name(), field.name(), "", Variable.VariableType.INTEGER, - field.defaultValue() == null ? null : field.defaultValue().getLongValue(), + field.hasDefaultValue() ? field.defaultVal() : null, defaultRequired, false, null, null, field.pos(), null, getDoc(field), null, null); case FLOAT: case DOUBLE: return new Variable(field.name(), field.name(), "", Variable.VariableType.DOUBLE, - field.defaultValue() == null ? null : field.defaultValue().getDoubleValue(), + field.hasDefaultValue() ? field.defaultVal() : null, defaultRequired, false, null, null, field.pos(), null, getDoc(field), null, null); case BOOLEAN: return new Variable(field.name(), field.name(), "", Variable.VariableType.BOOLEAN, - field.defaultValue() == null ? null : field.defaultValue().getBooleanValue(), + field.hasDefaultValue() ? field.defaultVal() : null, defaultRequired, false, null, null, field.pos(), null, getDoc(field), null, null); case ENUM: return new Variable(field.name(), field.name(), "", Variable.VariableType.CATEGORICAL, - field.defaultValue() == null ? null : field.defaultValue().getTextValue(), + field.hasDefaultValue() ? field.defaultVal() : null, defaultRequired, false, schema.getEnumSymbols(), null, field.pos(), null, getDoc(field), null, null); case ARRAY: return getVariable(field, schema.getElementType()).setMultiValue(true); @@ -136,7 +136,7 @@ private static Variable getVariable(Schema.Field field, Schema schema) { // .setName("value"); // variableSet.add(valueVariable); // return new Variable(field.name(), field.name(), "", Variable.VariableType.OBJECT, -// field.defaultValue(), defaultRequired, true, null, field.pos(), null, field.doc(), variableSet, null); +// field.defaultVal(), defaultRequired, true, null, field.pos(), null, field.doc(), variableSet, null); Variable.VariableType mapType; switch (field.schema().getValueType().getType()) { case STRING: diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/common/mixins/GenericRecordAvroJsonMixin.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/common/mixins/GenericRecordAvroJsonMixin.java index 8448b7a95a6..9fa561de4af 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/common/mixins/GenericRecordAvroJsonMixin.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/common/mixins/GenericRecordAvroJsonMixin.java @@ -23,6 +23,6 @@ * * @author Jacobo Coll <jacobo167@gmail.com> */ -@JsonIgnoreProperties({"schema"}) +@JsonIgnoreProperties({"schema", "specificData"}) public abstract class GenericRecordAvroJsonMixin { } diff --git a/opencga-core/src/main/resources/configuration.yml b/opencga-core/src/main/resources/configuration.yml index cfbaf73b572..7557a766469 100644 --- a/opencga-core/src/main/resources/configuration.yml +++ b/opencga-core/src/main/resources/configuration.yml @@ -267,7 +267,7 @@ analysis: k8s.clientTimeout: 30000 # ms k8s.terminationGracePeriodSeconds: 300 # s k8s.logToStdout: true - k8s.imageName: "opencb/opencga-base:${project.parent.version}-hdp3.1" + k8s.imageName: "opencb/opencga-base:${project.parent.version}" k8s.imagePullPolicy: "IfNotPresent" # k8s.imagePullSecrets: # name : "dockerhub-secrets-name" diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/iterators/VariantDBIterator.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/iterators/VariantDBIterator.java index f070c4c7bb1..37f3194e004 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/iterators/VariantDBIterator.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/iterators/VariantDBIterator.java @@ -52,11 +52,11 @@ public abstract class VariantDBIterator extends CloseableIterator { if (c != 0) { return c; } - c = v1.getStart().compareTo(v2.getStart()); + c = Integer.compare(v1.getStart(), v2.getStart()); if (c != 0) { return c; } - c = v1.getEnd().compareTo(v2.getEnd()); + c = Integer.compare(v1.getEnd(), v2.getEnd()); if (c != 0) { return c; } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManager.java index 6b967ab9cba..a7479ba06a8 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManager.java @@ -170,6 +170,11 @@ protected final VariantAnnotationMetadata checkCurrentAnnotation(ProjectMetadata // Check sources for old cellbase versions List currentSourceVersion = current.getSourceVersion(); List newSourceVersion = newVariantAnnotationMetadata.getSourceVersion(); + + if (newSourceVersion.isEmpty()) { + throw new IllegalArgumentException("Missing annotator source version!"); + } + if (CollectionUtils.isNotEmpty(currentSourceVersion) && !sameSourceVersion(newSourceVersion, currentSourceVersion)) { String msg = "Source version of the annotator has changed. " + "Existing annotation calculated with " @@ -273,9 +278,7 @@ protected final void updateCurrentAnnotation(VariantAnnotator annotator, Project if (newAnnotator == null) { throw new IllegalArgumentException("Missing annotator information for VariantAnnotator: " + annotator.getClass()); } - if (newSourceVersion.isEmpty()) { - throw new IllegalArgumentException("Missing annotator source version for VariantAnnotator: " + annotator.getClass()); - } + checkCurrentAnnotation(projectMetadata, overwrite, newAnnotationMetadata); VariantAnnotationMetadata current = projectMetadata.getAnnotation().getCurrent(); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/CellBaseRestVariantAnnotator.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/CellBaseRestVariantAnnotator.java index 75028ffab36..dc33b9c9a4b 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/CellBaseRestVariantAnnotator.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/CellBaseRestVariantAnnotator.java @@ -177,9 +177,6 @@ private List getVariantAnnotatorSourceVersion() throws VariantAnnotat } } } - if (objectMaps.isEmpty()) { - throw new VariantAnnotatorException("Error fetching CellBase source information from " + getDebugInfo("/meta/versions")); - } return objectMaps; } diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineTest.java index f925784ba07..3a02c97e975 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageEngineTest.java @@ -649,7 +649,7 @@ public void indexWithOtherFields(String extraFields) throws Exception { ); VariantFileMetadata fileMetadata = variantStorageEngine.getVariantReaderUtils().readVariantFileMetadata(etlResult.getTransformResult()); - checkTransformedVariants(etlResult.getTransformResult(), studyMetadata, fileMetadata.getStats().getVariantCount().intValue()); + checkTransformedVariants(etlResult.getTransformResult(), studyMetadata, ((int) fileMetadata.getStats().getVariantCount())); VariantDBAdaptor dbAdaptor = variantStorageEngine.getDBAdaptor(); checkLoadedVariants(dbAdaptor, studyMetadata, true, false, false, getExpectedNumLoadedVariants(fileMetadata)); @@ -740,9 +740,9 @@ private VariantFileMetadata checkTransformedVariants(URI variantsJson, StudyMeta variantReader.close(); if (expectedNumVariants < 0) { - expectedNumVariants = source.getStats().getVariantCount().intValue(); + expectedNumVariants = (int) source.getStats().getVariantCount(); } else { - assertEquals(expectedNumVariants, source.getStats().getVariantCount().intValue()); //9792 + assertEquals(expectedNumVariants, (int) source.getStats().getVariantCount()); //9792 } assertEquals(expectedNumVariants, numVariants); //9792 logger.info("checkTransformedVariants time : " + (System.currentTimeMillis() - start) / 1000.0 + "s"); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageTest.java index 21ae5fb79a4..fbded98ab23 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageTest.java @@ -33,7 +33,7 @@ public interface VariantStorageTest extends AutoCloseable { default void close() throws Exception {} default int getExpectedNumLoadedVariants(VariantFileMetadata fileMetadata) { - int numRecords = fileMetadata.getStats().getVariantCount().intValue(); + int numRecords = (int) fileMetadata.getStats().getVariantCount(); return (int)(numRecords - fileMetadata.getStats().getTypeCount().getOrDefault(VariantType.SYMBOLIC.name(), 0L) - fileMetadata.getStats().getTypeCount().getOrDefault(VariantType.NO_VARIATION.name(), 0L)); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchTest.java index 923d2e24602..4141e55219c 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/search/VariantSearchTest.java @@ -346,8 +346,8 @@ private void checkScore(List inScores, List outScores, String sour Score outScore = getScore(outScores, source); if (inScore != null && outScore != null) { - double inValue = inScore.getScore() == null ? 0 : inScore.getScore(); - double outValue = outScore.getScore() == null ? 0 : outScore.getScore(); + double inValue = inScore.getScore(); + double outValue = outScore.getScore(); String inDescription = inScore.getDescription() == null ? "" : inScore.getDescription(); String outDescription = outScore.getDescription() == null ? "" : outScore.getDescription(); System.out.println(source + ": " + inValue + " vs " + outValue diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompatApi.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompatApi.java index b9d6a9dd31a..7bc33a179ed 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompatApi.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-api/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompatApi.java @@ -38,4 +38,6 @@ public static HBaseCompatApi getInstance() { public abstract byte[][] getTableStartKeys(Admin admin, Table table) throws IOException; public abstract boolean isSnappyAvailable(); + + public abstract Class[] getClassesForDependencyJars(); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml index 44bb534e690..825b86df6b9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/pom.xml @@ -35,6 +35,8 @@ 2.7.7 5.0.0-HBase-2.0 19.0 + 3.3.6 + 0.14.0-incubating @@ -79,6 +81,18 @@ ${guava.version} provided + + com.lmax + disruptor + ${disruptor.version} + provided + + + org.apache.tephra + tephra-core + ${tephra.version} + provided + diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java index 923c061c837..061dc74480c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.0/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java @@ -1,5 +1,6 @@ package org.opencb.opencga.storage.hadoop; +import com.lmax.disruptor.EventFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.Admin; @@ -8,6 +9,7 @@ import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.compress.SnappyCodec; +import org.apache.tephra.TransactionSystemClient; import org.opencb.opencga.storage.hadoop.variant.annotation.phoenix.PhoenixCompat; import org.opencb.opencga.storage.hadoop.variant.annotation.phoenix.PhoenixCompatApi; @@ -51,4 +53,9 @@ public byte[][] getTableStartKeys(Admin admin, Table table) throws IOException { public boolean isSnappyAvailable() { return SnappyCodec.isNativeCodeLoaded(); } + + @Override + public Class[] getClassesForDependencyJars() { + return new Class[]{TransactionSystemClient.class, EventFactory.class}; + } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml index a73c5e26765..1a84bb808ad 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/pom.xml @@ -35,6 +35,8 @@ 2.8.5 5.0.0-HBase-2.0 19.0 + 3.3.6 + 0.14.0-incubating @@ -79,6 +81,18 @@ ${guava.version} provided + + com.lmax + disruptor + ${disruptor.version} + provided + + + org.apache.tephra + tephra-core + ${tephra.version} + provided + diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java index 455d57aa264..270d90dd3e7 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.2/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java @@ -1,5 +1,6 @@ package org.opencb.opencga.storage.hadoop; +import com.lmax.disruptor.EventFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.Admin; @@ -8,6 +9,7 @@ import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.compress.SnappyCodec; +import org.apache.tephra.TransactionSystemClient; import org.opencb.opencga.storage.hadoop.variant.annotation.phoenix.PhoenixCompat; import org.opencb.opencga.storage.hadoop.variant.annotation.phoenix.PhoenixCompatApi; @@ -51,4 +53,9 @@ public byte[][] getTableStartKeys(Admin admin, Table table) throws IOException { public boolean isSnappyAvailable() { return SnappyCodec.isNativeCodeLoaded(); } + + @Override + public Class[] getClassesForDependencyJars() { + return new Class[]{TransactionSystemClient.class, EventFactory.class}; + } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml index 3412007cd42..357fead8e4f 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/pom.xml @@ -35,6 +35,8 @@ 3.3.4 5.1.3 1.1.0 + 3.4.4 + 0.16.1 @@ -79,6 +81,24 @@ ${phoenix-thirdparty.version} provided + + com.lmax + disruptor + ${disruptor.version} + provided + + + org.apache.tephra + tephra-core + ${tephra.version} + provided + + + org.apache.tephra + tephra-core-shaded + ${tephra.version} + provided + diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java index d680840cb3d..c3fafeb46b0 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.4/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java @@ -1,10 +1,12 @@ package org.opencb.opencga.storage.hadoop; +import com.lmax.disruptor.EventFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.Table; +import org.apache.tephra.TransactionSystemClient; import org.opencb.opencga.storage.hadoop.variant.annotation.phoenix.PhoenixCompat; import org.opencb.opencga.storage.hadoop.variant.annotation.phoenix.PhoenixCompatApi; @@ -44,4 +46,9 @@ public boolean isSnappyAvailable() { // [HADOOP-17125] - Using snappy-java in SnappyCodec - 3.3.1, 3.4.0 return true; } + + @Override + public Class[] getClassesForDependencyJars() { + return new Class[]{TransactionSystemClient.class, EventFactory.class}; + } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.5/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.5/pom.xml new file mode 100644 index 00000000000..ac839b79935 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.5/pom.xml @@ -0,0 +1,112 @@ + + + + 4.0.0 + + + org.opencb.opencga + opencga-storage-hadoop-compat + 5.0.0-SNAPSHOT + ../pom.xml + + + + opencga-storage-hadoop-compat-hbase2.5 + jar + OpenCGA Storage Hadoop Compatibility HBase 2.5 + + + 2.5.10 + 3.4.0 + 5.2.0 + 2.1.0 + 3.4.4 + 0.16.1 + + + + + org.opencb.opencga + opencga-storage-hadoop-compat-api + ${project.version} + + + org.apache.hbase + hbase-client + ${hbase.version} + provided + + + org.apache.hbase + hbase-common + ${hbase.version} + provided + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + provided + + + org.apache.hadoop + hadoop-mapreduce-client-core + ${hadoop.version} + provided + + + org.apache.phoenix + phoenix-core + ${phoenix.version} + provided + + + org.apache.phoenix + phoenix-core-client + ${phoenix.version} + provided + + + org.apache.phoenix + phoenix-core-server + ${phoenix.version} + provided + + + org.apache.phoenix.thirdparty + phoenix-shaded-guava + ${phoenix-thirdparty.version} + provided + + + com.lmax + disruptor + ${disruptor.version} + provided + + + org.apache.tephra + tephra-core + ${tephra.version} + provided + + + + + \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.5/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.5/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java new file mode 100644 index 00000000000..aaa972b92ad --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.5/src/main/java/org/opencb/opencga/storage/hadoop/HBaseCompat.java @@ -0,0 +1,53 @@ +package org.opencb.opencga.storage.hadoop; + +import com.lmax.disruptor.EventFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.Table; +import org.opencb.opencga.storage.hadoop.variant.annotation.phoenix.PhoenixCompat; +import org.opencb.opencga.storage.hadoop.variant.annotation.phoenix.PhoenixCompatApi; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class HBaseCompat extends HBaseCompatApi { + + @Override + public void available(Configuration configuration) throws IOException { + HBaseAdmin.available(configuration); + } + + @Override + public boolean isSolrTestingAvailable() { + return true; + } + + @Override + public PhoenixCompatApi getPhoenixCompat() { + return new PhoenixCompat(); + } + + @Override + public List getServerList(Admin admin) throws IOException { + return new ArrayList<>(admin.getClusterMetrics().getServersName()); + } + + @Override + public byte[][] getTableStartKeys(Admin admin, Table table) throws IOException { + return table.getRegionLocator().getStartKeys(); + } + + @Override + public boolean isSnappyAvailable() { + // [HADOOP-17125] - Using snappy-java in SnappyCodec - 3.3.1, 3.4.0 + return true; + } + + @Override + public Class[] getClassesForDependencyJars() { + return new Class[]{EventFactory.class}; + } +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.5/src/main/java/org/opencb/opencga/storage/hadoop/variant/annotation/phoenix/OpenCGAUpsertExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.5/src/main/java/org/opencb/opencga/storage/hadoop/variant/annotation/phoenix/OpenCGAUpsertExecutor.java new file mode 100644 index 00000000000..c60e0f7a5b5 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.5/src/main/java/org/opencb/opencga/storage/hadoop/variant/annotation/phoenix/OpenCGAUpsertExecutor.java @@ -0,0 +1,30 @@ +package org.opencb.opencga.storage.hadoop.variant.annotation.phoenix; + +import org.apache.phoenix.schema.types.PDataType; +import org.apache.phoenix.util.ColumnInfo; +import org.apache.phoenix.util.UpsertExecutor; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.util.List; + +public abstract class OpenCGAUpsertExecutor extends UpsertExecutor { + + public OpenCGAUpsertExecutor(Connection conn, String tableName, List columnInfoList, + UpsertListener upsertListener) { + super(conn, tableName, columnInfoList, upsertListener); + } + + protected OpenCGAUpsertExecutor(Connection conn, List columnInfoList, PreparedStatement preparedStatement, + UpsertListener upsertListener) { + super(conn, columnInfoList, preparedStatement, upsertListener); + } + + @Override + protected org.apache.phoenix.thirdparty.com.google.common.base.Function createConversionFunction(PDataType dataType) { + java.util.function.Function f = createJavaConversionFunction(dataType); + return f::apply; + } + + protected abstract java.util.function.Function createJavaConversionFunction(PDataType dataType); +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.5/src/main/java/org/opencb/opencga/storage/hadoop/variant/annotation/phoenix/PhoenixCompat.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.5/src/main/java/org/opencb/opencga/storage/hadoop/variant/annotation/phoenix/PhoenixCompat.java new file mode 100644 index 00000000000..d66789aaf9c --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/opencga-storage-hadoop-compat-hbase2.5/src/main/java/org/opencb/opencga/storage/hadoop/variant/annotation/phoenix/PhoenixCompat.java @@ -0,0 +1,51 @@ +package org.opencb.opencga.storage.hadoop.variant.annotation.phoenix; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; +import org.apache.phoenix.compile.QueryPlan; +import org.apache.phoenix.iterate.MapReduceParallelScanGrouper; +import org.apache.phoenix.iterate.ParallelScanGrouper; +import org.apache.phoenix.mapreduce.PhoenixRecordReader; +import org.apache.phoenix.schema.PColumn; +import org.apache.phoenix.schema.PName; +import org.apache.phoenix.schema.PTable; +import org.apache.phoenix.schema.PTableImpl; + +import java.lang.reflect.Constructor; +import java.sql.SQLException; +import java.util.Collections; +import java.util.List; + +public class PhoenixCompat implements PhoenixCompatApi { + + @Override + public PTable makePTable(List columns) throws SQLException { + return new PTableImpl.Builder() + .setName(PName.EMPTY_NAME) + .setColumns(columns) + .setIndexes(Collections.emptyList()) + .setPhysicalNames(Collections.emptyList()) + .build(); + } + + @Override + public PhoenixRecordReader newPhoenixRecordReader(Class inputClass, Configuration configuration, + QueryPlan queryPlan) { + try { + Constructor constructor = PhoenixRecordReader.class + .getDeclaredConstructor(Class.class, Configuration.class, QueryPlan.class, ParallelScanGrouper.class); + constructor.setAccessible(true); + return constructor.newInstance(inputClass, configuration, queryPlan, MapReduceParallelScanGrouper.getInstance()); + } catch (ReflectiveOperationException e) { + throw new IllegalStateException(e); + } + } + + @Override + public boolean isDropColumnFromViewSupported() { + // Phoenix 5.2.x does not support drop column from view. + // Might be related with https://issues.apache.org/jira/browse/PHOENIX-6025 + return false; + } + +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml index 679fd3129bb..eaa4dcb63d6 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-compat/pom.xml @@ -35,6 +35,7 @@ opencga-storage-hadoop-compat-hbase2.0 opencga-storage-hadoop-compat-hbase2.2 opencga-storage-hadoop-compat-hbase2.4 + opencga-storage-hadoop-compat-hbase2.5 @@ -58,6 +59,7 @@ provided + \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index 2efa3230cf1..58307caec22 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -282,7 +282,6 @@ org.apache.avro avro-mapred - hadoop2 commons-logging diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/converters/annotation/VariantAnnotationToPhoenixConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/converters/annotation/VariantAnnotationToPhoenixConverter.java index d991c8a3c30..7d2bf77b14e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/converters/annotation/VariantAnnotationToPhoenixConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/converters/annotation/VariantAnnotationToPhoenixConverter.java @@ -176,14 +176,10 @@ public VariantAnnotationToPhoenixConverter(byte[] columnFamily, int annotationId if (proteinVariantAnnotation.getSubstitutionScores() != null) { for (Score score : proteinVariantAnnotation.getSubstitutionScores()) { if (score.getSource().equalsIgnoreCase("sift")) { - if (score.getScore() != null) { - sift.add(score.getScore().floatValue()); - } + sift.add((float) score.getScore()); addNotNull(siftDesc, score.getDescription()); } else if (score.getSource().equalsIgnoreCase("polyphen")) { - if (score.getScore() != null) { - polyphen.add(score.getScore().floatValue()); - } + polyphen.add((float) score.getScore()); addNotNull(polyphenDesc, score.getDescription()); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/converters/study/HBaseToStudyEntryConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/converters/study/HBaseToStudyEntryConverter.java index 7339cda99b9..8771734cdad 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/converters/study/HBaseToStudyEntryConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/converters/study/HBaseToStudyEntryConverter.java @@ -721,7 +721,7 @@ protected List getSampleWithVariant(StudyMetadata studyMetadata, Collec return samplesWithVariant; } else { for (Integer file : filesInThisVariant) { - for (String sample : getSamplesInFile(studyMetadata.getId(), file)) { + for (String sample : getIncludeSamplesInFile(studyMetadata.getId(), file)) { Integer sampleIdx = returnedSamplesPosition.get(sample); if (sampleIdx != null) { samplesWithVariant.set(sampleIdx, true); @@ -805,7 +805,7 @@ protected void addSecondaryAlternates(Variant variant, StudyEntry studyEntry, St se.getFiles().add(fileEntry); } Integer fileId = metadataManager.getFileId(studyMetadata.getId(), fileName); - List samples = getSamplesInFile(studyMetadata.getId(), fileId); + List samples = getIncludeSamplesInFile(studyMetadata.getId(), fileId); for (String sample : samples) { SampleEntry sampleEntry = studyEntry.getSample(sample); // At this point, fileIndex is actually the fileId @@ -1045,7 +1045,7 @@ private Set getFilesFromReturnedSamples(int studyId) { id -> metadataManager.getFileIdsFromSampleIds(id, getReturnedSampleIds(id))); } - private List getSamplesInFile(int studyId, int fileId) { + private List getIncludeSamplesInFile(int studyId, int fileId) { List list = samplesFromFileMap.get(studyId + "_" + fileId); if (list != null) { return list; @@ -1054,18 +1054,21 @@ private List getSamplesInFile(int studyId, int fileId) { return samplesFromFileMap.computeIfAbsent(studyId + "_" + fileId, s -> { LinkedHashSet sampleIds = metadataManager.getSampleIdsFromFileId(studyId, fileId); List samples = new ArrayList<>(sampleIds.size()); + Set samplesSet; + if (configuration.getProjection() == null) { + samplesSet = null; + } else { + samplesSet = new HashSet<>(configuration.getProjection().getStudy(studyId).getSamples()); + } for (Integer sample : sampleIds) { - samples.add(metadataManager.getSampleName(studyId, sample)); + if (samplesSet == null || samplesSet.contains(sample)) { + samples.add(metadataManager.getSampleName(studyId, sample)); + } } return samples; }); } - private List getSamplesInFile(int studyId, String fileName) { - Integer fileId = metadataManager.getFileId(studyId, fileName); - return getSamplesInFile(studyId, fileId); - } - private String getSampleName(int studyId, int sampleId) { return metadataManager.getSampleName(studyId, sampleId); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotationConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotationConverter.java index 488ad7cc6cc..77354306cf4 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotationConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/annotation/SampleIndexVariantAnnotationConverter.java @@ -201,7 +201,7 @@ protected void addPopFreqIndex(BitBuffer bitBuffer, PopulationFrequency populati IndexField field = schema.getPopFreqIndex() .getField(populationFrequency.getStudy(), populationFrequency.getPopulation()); if (field != null) { - field.write(populationFrequency.getAltAlleleFreq().doubleValue(), bitBuffer); + field.write((double) populationFrequency.getAltAlleleFreq(), bitBuffer); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java index 633d938dfbf..a09de0653e8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java @@ -47,11 +47,11 @@ public final class SampleIndexSchema { public static final Comparator INTRA_CHROMOSOME_VARIANT_COMPARATOR = (o1, o2) -> { VariantAvro v1 = o1.getImpl(); VariantAvro v2 = o2.getImpl(); - int c = v1.getStart().compareTo(v2.getStart()); + int c = Integer.compare(v1.getStart(), v2.getStart()); if (c != 0) { return c; } - c = v1.getEnd().compareTo(v2.getEnd()); + c = Integer.compare(v1.getEnd(), v2.getEnd()); if (c != 0) { return c; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java index e44efb594e8..b4f497ea016 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/mr/VariantMapReduceUtil.java @@ -3,7 +3,6 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -import com.lmax.disruptor.EventFactory; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -25,7 +24,6 @@ import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.lib.db.DBWritable; import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil; -import org.apache.tephra.TransactionSystemClient; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; @@ -40,6 +38,7 @@ import org.opencb.opencga.storage.core.variant.query.VariantQueryParser; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjection; +import org.opencb.opencga.storage.hadoop.HBaseCompatApi; import org.opencb.opencga.storage.hadoop.utils.AbstractHBaseDriver; import org.opencb.opencga.storage.hadoop.variant.AbstractVariantsTableDriver; import org.opencb.opencga.storage.hadoop.variant.GenomeHelper; @@ -401,8 +400,7 @@ private static void initVariantMapperJobFromPhoenix(Job job, String variantTable if (addDependencyJar) { TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), - TransactionSystemClient.class, - EventFactory.class); + HBaseCompatApi.getInstance().getClassesForDependencyJars()); } LOGGER.info(sqlQuery); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineArchiveTableTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineArchiveTableTest.java index ecfa0c7e54a..c858ca1b7aa 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineArchiveTableTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngineArchiveTableTest.java @@ -89,7 +89,7 @@ public void queryArchiveTable() throws StorageEngineException { VariantSetStats stats = mm.getVariantFileMetadata(studyId, fileMetadata.getId()).getStats(); stats.getChromosomeCount().forEach((s, l) -> assertEquals("chromosome : " + s, l, variantChromosomeCounts.getOrDefault(s, 0L))); stats.getTypeCount().forEach((s, l) -> assertEquals("variant type : " + s, l, variantCounts.getOrDefault(VariantType.valueOf(s), 0L))); - assertEquals(stats.getVariantCount().intValue(), numVariants[0]); + assertEquals((int) stats.getVariantCount(), numVariants[0]); assertTrue(fileMetadata.getAttributes().getBoolean(VariantStorageOptions.LOAD_ARCHIVE.key())); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java index 9e61f546d6c..cada03beba2 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHadoopStoragePipelineTest.java @@ -197,7 +197,7 @@ public void queryArchiveTable() { }); System.out.println("End query from Archive table"); fileMetadata.getStats().getTypeCount().forEach((s, l) -> assertEquals(l, variantCounts.getOrDefault(s, 0L))); - assertEquals(fileMetadata.getStats().getVariantCount().intValue(), numVariants[0]); + assertEquals((int) fileMetadata.getStats().getVariantCount(), numVariants[0]); } @Test diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java index c881565346c..cdba161a5bc 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorTest.java @@ -223,7 +223,7 @@ public void testArchiveIterator() { // System.out.println(variant.toJson()); count++; } - assertEquals(fileMetadata.getStats().getVariantCount().intValue(), count); + assertEquals((int) fileMetadata.getStats().getVariantCount(), count); } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporterTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporterTest.java index e989f37c440..bdb00af9818 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporterTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/io/HadoopVariantExporterTest.java @@ -92,28 +92,30 @@ public static void beforeClass() throws Exception { URI inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12877_S1.genome.vcf.gz"); VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageEngine, new StudyMetadata(0, study1), - new ObjectMap(VariantStorageOptions.ANNOTATE.key(), true) + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false) .append(VariantStorageOptions.STATS_CALCULATE.key(), false) ); inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12878_S1.genome.vcf.gz"); VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageEngine, new StudyMetadata(0, study1), - new ObjectMap(VariantStorageOptions.ANNOTATE.key(), true) + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false) .append(VariantStorageOptions.STATS_CALCULATE.key(), false) ); inputUri = VariantStorageBaseTest.getResourceUri("platinum/1K.end.platinum-genomes-vcf-NA12878_S1.genome.vcf.gz"); VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageEngine, new StudyMetadata(0, study2), - new ObjectMap(VariantStorageOptions.ANNOTATE.key(), true) + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false) .append(VariantStorageOptions.STATS_CALCULATE.key(), false) ); inputUri = VariantStorageBaseTest.getResourceUri("variant-test-unusual-contigs.vcf"); VariantStorageBaseTest.runDefaultETL(inputUri, variantStorageEngine, new StudyMetadata(0, study3), - new ObjectMap(VariantStorageOptions.ANNOTATE.key(), true) + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false) .append(VariantStorageOptions.STATS_CALCULATE.key(), false) ); + variantStorageEngine.annotate(newOutputUri(), new ObjectMap()); + if (HBaseCompat.getInstance().isSolrTestingAvailable()) { variantStorageEngine.secondaryIndex(); } @@ -301,6 +303,17 @@ public void exportIndex() throws Exception { copyToLocal(fileName, uri); } + @Test + public void exportIndexMultiSampleFile() throws Exception { + String fileName = "some_variants.sample_index.vcf"; + URI uri = getOutputUri(fileName); + variantStorageEngine.exportData(uri, VariantWriterFactory.VariantOutputFormat.VCF, + null, new Query(STUDY.key(), study3).append(SAMPLE.key(), "SAMPLE_1").append(REGION.key(), "13C.DOT"), + new QueryOptions(HadoopVariantExporter.SKIP_SMALL_QUERY, true)); + + copyToLocal(fileName, uri); + } + @Test public void exportUncompleteIndex() throws Exception { String fileName = "some_variants.phoenix.avro"; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/stats/HadoopVariantStatisticsManagerTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/stats/HadoopVariantStatisticsManagerTest.java index 7805d047183..d9dd745118c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/stats/HadoopVariantStatisticsManagerTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/stats/HadoopVariantStatisticsManagerTest.java @@ -174,9 +174,9 @@ public void testJulieTool() throws Exception { VariantStats stats = variant.getStudy(populationFrequency.getStudy()).getStats(populationFrequency.getPopulation()); Assert.assertNotNull(stats); Assert.assertThat(expected, CoreMatchers.hasItem(populationFrequency.getStudy() + ":" + populationFrequency.getPopulation())); - Assert.assertEquals(stats.getAltAlleleFreq(), populationFrequency.getAltAlleleFreq()); + Assert.assertEquals(stats.getAltAlleleFreq().floatValue(), populationFrequency.getAltAlleleFreq()); Assert.assertEquals(stats.getAltAlleleCount(), populationFrequency.getAltAlleleCount()); - Assert.assertEquals(stats.getRefAlleleFreq(), populationFrequency.getRefAlleleFreq()); + Assert.assertEquals(stats.getRefAlleleFreq().floatValue(), populationFrequency.getRefAlleleFreq()); Assert.assertEquals(stats.getRefAlleleCount(), populationFrequency.getRefAlleleCount()); Assert.assertEquals(stats.getGenotypeCount().entrySet().stream().filter(e -> GenotypeClass.HOM_ALT.test(e.getKey())).mapToInt(Map.Entry::getValue).sum(), populationFrequency.getAltHomGenotypeCount().intValue()); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr7.5/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr7.5/pom.xml new file mode 100644 index 00000000000..bc2b6557512 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-emr7.5/pom.xml @@ -0,0 +1,28 @@ + + + 4.0.0 + + + org.opencb.opencga + opencga-storage-hadoop-lib + 5.0.0-SNAPSHOT + ../pom.xml + + + opencga-storage-hadoop-lib-emr7.5 + + + emr7.5 + hbase2.5 + + + + + + maven-assembly-plugin + + + + \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hbase2.0/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hbase2.0/pom.xml new file mode 100644 index 00000000000..9ea1cbf0168 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/opencga-storage-hadoop-lib-hbase2.0/pom.xml @@ -0,0 +1,29 @@ + + + 4.0.0 + + + org.opencb.opencga + opencga-storage-hadoop-lib + 5.0.0-SNAPSHOT + ../pom.xml + + + opencga-storage-hadoop-lib-hbase2.0 + + + hbase2.0 + hbase2.0 + 9.3.25.v20180904 + + + + + + maven-assembly-plugin + + + + \ No newline at end of file diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml index ce2536a1be4..d6a1ee9eed8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-lib/pom.xml @@ -91,10 +91,12 @@ + opencga-storage-hadoop-lib-hbase2.0 opencga-storage-hadoop-lib-hdp3.1 opencga-storage-hadoop-lib-hdi5.1 opencga-storage-hadoop-lib-emr6.1 opencga-storage-hadoop-lib-emr6.13 + opencga-storage-hadoop-lib-emr7.5 diff --git a/pom.xml b/pom.xml index 47f6de1d6d8..03699e27b0d 100644 --- a/pom.xml +++ b/pom.xml @@ -54,7 +54,7 @@ 2.14.3 2.30.1 4.4 - 1.7.7 + 1.11.4 3.11.4 1.28.1 1.7.36 @@ -119,9 +119,9 @@ opencga opencga-env.sh - 1.0.0-SNAPSHOT + 1.1.0 - hdp3.1 + hbase2.0 ${opencga-hadoop-shaded.id.default} opencga-storage-hadoop-lib-${opencga-hadoop-shaded.id} opencga-hadoop-shaded-${opencga-hadoop-shaded.id} @@ -817,7 +817,6 @@ org.apache.avro avro-mapred ${avro.version} - hadoop2 org.mongodb @@ -1596,6 +1595,20 @@ + + hbase2.0 + + + hadoop + hbase2.0 + + + + hbase2.0 + hbase2.0 + 9.3.25.v20180904 + + hdp3.1 @@ -1651,6 +1664,19 @@ hbase2.4 + + emr7.5 + + + hadoop + emr7.5 + + + + emr7.5 + hbase2.5 + + runShortTests