diff --git a/.github/workflows/build-push-dev-image.yml b/.github/workflows/build-push-dev-image.yml index 24abc61..6951c57 100644 --- a/.github/workflows/build-push-dev-image.yml +++ b/.github/workflows/build-push-dev-image.yml @@ -48,20 +48,20 @@ jobs: # https://github.com/marketplace/actions/build-and-push-docker-images - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 with: driver-opts: | network=host - name: Login to DockerHub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} logout: true - name: Login to Container Registry - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: containers.renci.org username: ${{ secrets.CONTAINERHUB_USERNAME }} @@ -72,7 +72,7 @@ jobs: # Notes on Cache: # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache - name: Build Push Container - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: context: . push: true diff --git a/.github/workflows/build-push-release.yml b/.github/workflows/build-push-release.yml index 06656b6..f23dc15 100644 --- a/.github/workflows/build-push-release.yml +++ b/.github/workflows/build-push-release.yml @@ -63,20 +63,20 @@ jobs: # step # https://github.com/marketplace/actions/build-and-push-docker-images - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 with: driver-opts: | network=host - name: Login to DockerHub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} logout: true - name: Login to Container Registry - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: containers.renci.org username: ${{ secrets.CONTAINERHUB_USERNAME }} @@ -86,7 +86,7 @@ jobs: # Notes on Cache: # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache - name: Build Push Container - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: push: true # Push to renci-registry and dockerhub here. diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 78cf048..0dc8428 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -1,11 +1,9 @@ # Workflow responsible for core acceptance testing. # Tests Currently Run: # - flake8-linter -# - image-build-test -# -# This workflow only validates images can build -# but does not push images to any repository. -# +# - PYTest +# - Bandit +# For PR Vulnerability Scanning a separate workflow will run. # The build-push-dev-image and build-push-release workflows # handle the develop and release image storage respectively. # @@ -13,11 +11,17 @@ name: Code-Checks on: - push: - branches-ignore: - - master - - main + # push: + # branches-ignore: + # - master + # - main + # - develop + pull_request: + branches: - develop + - master + - main + types: [ opened, synchronize ] paths-ignore: - README.md - .old_cicd/* @@ -27,13 +31,6 @@ on: - .gitignore - .dockerignore - .githooks - pull_request: - branches: - - develop - - master - - main - types: [ opened, synchronize ] - jobs: ############################## flake8-linter ############################## @@ -45,7 +42,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.12' # Currently actions/setup-python supports caching # but the cache is not as robust as cache action. @@ -69,35 +66,45 @@ jobs: # flake8 --ignore=E,W --exit-zero . continue-on-error: true -############################## test-image-build ############################## - test-image-build: - # needs: flake8-linter - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 +# ############################## build-vuln-test ############################## + # build-vuln-test: + # # needs: flake8-linter + # runs-on: ubuntu-latest + # steps: + # - uses: actions/checkout@v3 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - with: - driver-opts: | - network=host + # - name: Set up Docker Buildx + # uses: docker/setup-buildx-action@v3 + # with: + # driver-opts: | + # network=host - - name: Login to DockerHub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - logout: true + # - name: Login to DockerHub + # uses: docker/login-action@v3 + # with: + # username: ${{ secrets.DOCKERHUB_USERNAME }} + # password: ${{ secrets.DOCKERHUB_TOKEN }} + # logout: true + + # # Notes on Cache: + # # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache + # - name: Build Container + # uses: docker/build-push-action@v5 + # with: + # context: . + # push: false + # load: true + # tag: ${{ github.repository }}:vuln-test + # cache-from: type=registry,ref=${{ github.repository }}:buildcache + # cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max + # ####### Run for Fidelity ###### + # - name: Run Trivy vulnerability scanner + # uses: aquasecurity/trivy-action@master + # with: + # image-ref: '${{ github.repository }}:vuln-test' + # severity: 'CRITICAL,HIGH' + # exit-code: '1' - # Notes on Cache: - # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache - - name: Build Container - uses: docker/build-push-action@v4 - with: - context: . - push: false - cache-from: type=registry,ref=${{ github.repository }}:buildcache - cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max ################################### PYTEST ################################### pytest: runs-on: ubuntu-latest @@ -106,7 +113,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.12' - name: Install Requirements run: | @@ -126,7 +133,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.12' - name: Install Requirements run: | diff --git a/.github/workflows/trivy-pr-scan.yml b/.github/workflows/trivy-pr-scan.yml new file mode 100644 index 0000000..142572d --- /dev/null +++ b/.github/workflows/trivy-pr-scan.yml @@ -0,0 +1,67 @@ + +name: trivy-pr-scan +on: + pull_request: + branches: + - develop + - master + - main + types: [ opened, synchronize ] + paths-ignore: + - README.md + - .old_cicd/* + - .github/* + - .github/workflows/* + - LICENSE + - .gitignore + - .dockerignore + - .githooks + +jobs: + trivy-pr-scan: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + driver-opts: | + network=host + + - name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + logout: true + + # Notes on Cache: + # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache + - name: Build Container + uses: docker/build-push-action@v5 + with: + context: . + push: false + load: true + tags: ${{ github.repository }}:vuln-test + cache-from: type=registry,ref=${{ github.repository }}:buildcache + cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max + + # We will not be concerned with Medium and Low vulnerabilities + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + with: + image-ref: '${{ github.repository }}:vuln-test' + format: 'sarif' + severity: 'CRITICAL,HIGH' + output: 'trivy-results.sarif' + exit-code: '1' + # Scan results should be viewable in GitHub Security Dashboard + # We still fail the job if results are found, so below will always run + # unless manually canceled. + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v2 + if: '!cancelled()' + with: + sarif_file: 'trivy-results.sarif' \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 8135d0d..d57f06b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,16 +6,15 @@ FROM python:3.12.0-alpine3.18 # Install required packages -RUN apt-get update && \ - apt-get install -y curl make vim && \ - rm -rf /var/cache/apt/* +RUN apk update && \ + apk add g++ make # Create a non-root user. ENV USER dug ENV HOME /home/$USER ENV UID 1000 -RUN adduser --disabled-login --home $HOME --shell /bin/bash --uid $UID $USER +RUN adduser -D --home $HOME --uid $UID $USER USER $USER WORKDIR $HOME diff --git a/README.md b/README.md index c6c368c..d669280 100644 --- a/README.md +++ b/README.md @@ -290,3 +290,8 @@ TOPMed phenotypic concept data is [here](https://github.com/helxplatform/dug/tre ## Release To release, commit the change and select feature. + +#### Fail on Vulnerability Detection + +During PR's several vulnerability scanners are run. If there are vulnerabilities detected, the pr checks will fail and a report will be sent to Github Security Dashboard for viewing. Please ensure the vulnerability is mitigated prior to continuing the merge to protected branches. + diff --git a/bin/vlmd_to_dbgap_xml.py b/bin/vlmd_to_dbgap_xml.py index 6263460..5d2b9d3 100644 --- a/bin/vlmd_to_dbgap_xml.py +++ b/bin/vlmd_to_dbgap_xml.py @@ -161,10 +161,12 @@ def vlmd_to_dbgap_xml(input_file, output, file_format, study_id, appl_id, study_ # description later if that is useful. if row.get('constraints.pattern'): counters['constraints.pattern'] += 1 - logging.warning(f"`constraints.pattern` of {row['constraints.pattern']} found in row {row_index}, skipped.") + logging.warning(f"`constraints.pattern` of {row['constraints.pattern']} found in row {row_index}, " + f"but pattern constraints are not currently being written.") if row.get('format'): counters['format'] += 1 - logging.warning(f"Found `format` of {row['format']} found in row {row_index}, skipped.") + logging.warning(f"Found `format` of {row['format']} found in row {row_index}, but format is not " + f"currently being written.") # Process enumerated and encoded values. encs = {} diff --git a/requirements.txt b/requirements.txt index f602432..bac13a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,29 +1,29 @@ aiohttp asyncio -fastapi==0.95.0 -uvicorn==0.23.2 +fastapi +uvicorn elasticsearch[async]==8.5.2 gunicorn itsdangerous Jinja2 jsonschema MarkupSafe -ormar==0.12.1 -mistune==2.0.3 -pluggy==1.0.0 -pyrsistent==0.17.3 +ormar +mistune +pluggy +pyrsistent pytest -pytz==2021.1 -PyYAML==6.0 -requests==2.31.0 -# old redis==4.4.2 -redis==4.5.4 -requests-cache==0.9.8 -six==1.16.0 +pytz +PyYAML +requests +redis +requests-cache +six # Click for command line arguments # We use Click 7.0 because that's what one of the pinned packages above use. click -httpx>=0.24.1 +httpx +linkml-runtime==1.6.0 bmt==1.1.0 -urllib3>=1.26.17 \ No newline at end of file +urllib3 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index b470aef..b551ef3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,14 +17,14 @@ classifiers = package_dir = = src packages = find: -python_requires = >=3.10 +python_requires = >=3.12 include_package_data = true install_requires = elasticsearch==8.5.2 pluggy requests - requests_cache==0.9.8 - redis==4.5.4 + requests_cache + redis [options.entry_points] console_scripts = @@ -32,8 +32,8 @@ console_scripts = [options.extras_require] rest = - fastapi==0.95.0 - uvicorn==0.23.2 + fastapi + uvicorn gunicorn jsonschema diff --git a/src/dug/config.py b/src/dug/config.py index ba050bb..5f4d59d 100644 --- a/src/dug/config.py +++ b/src/dug/config.py @@ -83,7 +83,7 @@ class Config: "desc": "summary", "collection_name": "cde_category", "collection_id": "cde_category", - "collection_action": "files" + "action": "files" } } }) diff --git a/src/dug/core/annotators/__init__.py b/src/dug/core/annotators/__init__.py index 2838f1c..1a58c40 100644 --- a/src/dug/core/annotators/__init__.py +++ b/src/dug/core/annotators/__init__.py @@ -4,9 +4,9 @@ import pluggy from dug.config import Config -from ._base import DugIdentifier, Indexable, Annotator, DefaultNormalizer, DefaultSynonymFinder -from .monarch_annotator import AnnotateMonarch -from .sapbert_annotator import AnnotateSapbert +from dug.core.annotators._base import DugIdentifier, Indexable, Annotator, DefaultNormalizer, DefaultSynonymFinder +from dug.core.annotators.monarch_annotator import AnnotateMonarch +from dug.core.annotators.sapbert_annotator import AnnotateSapbert logger = logging.getLogger('dug') diff --git a/src/dug/core/annotators/monarch_annotator.py b/src/dug/core/annotators/monarch_annotator.py index 841e9cf..1c67f40 100644 --- a/src/dug/core/annotators/monarch_annotator.py +++ b/src/dug/core/annotators/monarch_annotator.py @@ -3,8 +3,8 @@ from typing import List from requests import Session -from ._base import DugIdentifier, Input -from .utils.biolink_purl_util import BioLinkPURLerizer +from dug.core.annotators._base import DugIdentifier, Input +from dug.core.annotators.utils.biolink_purl_util import BioLinkPURLerizer logger = logging.getLogger('dug') diff --git a/src/dug/core/annotators/sapbert_annotator.py b/src/dug/core/annotators/sapbert_annotator.py index 73eefe9..7c2fa81 100644 --- a/src/dug/core/annotators/sapbert_annotator.py +++ b/src/dug/core/annotators/sapbert_annotator.py @@ -3,8 +3,8 @@ from requests import Session import json -from ._base import DugIdentifier, Input -from .utils.biolink_purl_util import BioLinkPURLerizer +from dug.core.annotators._base import DugIdentifier, Input +from dug.core.annotators.utils.biolink_purl_util import BioLinkPURLerizer logger = logging.getLogger("dug") diff --git a/src/dug/core/annotators/utils/__init__.py b/src/dug/core/annotators/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py index 59f60ba..44d7c98 100644 --- a/src/dug/core/async_search.py +++ b/src/dug/core/async_search.py @@ -651,6 +651,7 @@ async def search_vars_unscored(self, concept="", query="", new_results = new_results[data_type] else: new_results = {} + new_results.update({'total_items': total_items['count']}) return new_results async def search_kg(self, unique_id, query, offset=0, size=None, diff --git a/src/dug/core/crawler.py b/src/dug/core/crawler.py index 7331756..4dee2a3 100644 --- a/src/dug/core/crawler.py +++ b/src/dug/core/crawler.py @@ -264,7 +264,7 @@ def expand_to_dug_element(self, for key in attribute_mapping: mapped_value = node.get(attribute_mapping[key], "") # treat all attributes as strings - if key in array_to_string and isinstance(mapped_value, list) and len(mapped_value) > 0: + if attribute_mapping[key] in array_to_string and isinstance(mapped_value, list) and len(mapped_value) > 0: mapped_value = mapped_value[0] element_attribute_args.update({key: mapped_value}) element = DugElement( diff --git a/src/dug/core/tranql.py b/src/dug/core/tranql.py index c4c495b..4c458a2 100644 --- a/src/dug/core/tranql.py +++ b/src/dug/core/tranql.py @@ -113,11 +113,14 @@ def get_node_names(self, include_curie=True): return node_names def get_node_synonyms(self, include_curie=True): + # @TODO call name-resolver node_synonyms = [] curie_ids = self.get_curie_ids() for node in self.get_nodes(): if include_curie or node['id'] not in curie_ids: - node_synonyms += node.get('synonyms') or [] + syn = node.get('synonyms') + if isinstance(syn,list): + node_synonyms += syn return node_synonyms def get_curie_ids(self): diff --git a/src/dug/server.py b/src/dug/server.py index fde7e5a..f7a8466 100644 --- a/src/dug/server.py +++ b/src/dug/server.py @@ -3,6 +3,7 @@ import uvicorn from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware from dug.config import Config from dug.core.async_search import Search from pydantic import BaseModel @@ -15,6 +16,13 @@ root_path=os.environ.get("ROOT_PATH", "/"), ) +APP.add_middleware( + CORSMiddleware, + allow_origins=['*'], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) class GetFromIndex(BaseModel): index: str = "concepts_index" diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 9237bd7..3a2d97e 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -25,31 +25,31 @@ def test_dug_cli_parser(): @patch('dug.cli.crawl') def test_dug_cli_main_crawl(mock_crawl): main(["crawl", "somefile.csv", "--parser", "topmedtag"]) - assert mock_crawl.called_once() + mock_crawl.assert_called_once() @mark.cli @patch('dug.cli.crawl') def test_dug_cli_main_extract_dug_elements(mock_crawl): main(["crawl", "somefile.csv", "--parser", "topmedtag", "-x"]) - assert mock_crawl.called_once() + mock_crawl.assert_called_once() assert mock_crawl.call_args_list[0].args[0].extract_dug_elements @mark.cli @patch('dug.cli.crawl') def test_dug_cli_main_extract_dug_elements_none(mock_crawl): main(["crawl", "somefile.csv", "--parser", "topmedtag"]) - assert mock_crawl.called_once() + mock_crawl.assert_called_once() assert not mock_crawl.call_args_list[0].args[0].extract_dug_elements @mark.cli @patch('dug.cli.crawl') def test_dug_cli_main_annotator(mock_crawl): main(["crawl", "somefile.csv","--parser", "topmedtag", "--annotator", "annotator-monarch"]) - assert mock_crawl.called_once() + mock_crawl.assert_called_once() @mark.cli @patch('dug.cli.search') def test_dug_cli_main_search(mock_search): # mock_search.search.return_value = "Searching!" main(["search", "-q", "heart attack", "-t", "variables", "-k", "namespace=default"]) - assert mock_search.called_once() + mock_search.assert_called_once()