From 80f89363bd88c9e1d274fd7a194c9a99bf49076d Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Fri, 22 Mar 2024 14:08:28 -0400 Subject: [PATCH 01/13] build: use for build configs --- .gitignore | 2 ++ pyproject.toml | 80 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1246a916..03dc9aa1 100644 --- a/.gitignore +++ b/.gitignore @@ -136,3 +136,5 @@ dynamodb_local_latest/ # Zip *.zip + +notebooks diff --git a/pyproject.toml b/pyproject.toml index 7d4fe90b..77273d72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ dynamic = ["version"] [project.optional-dependencies] tests = ["pytest", "pytest-cov", "mock", "pytest-asyncio"] -dev = ["pre-commit", "flake8", "flake8-docstrings"] +dev = ["pre-commit", "ruff==0.2.0"] notebooks = ["ipykernel", "jupyterlab"] [project.urls] @@ -73,3 +73,81 @@ testpaths = ["tests"] [tool.coverage.run] branch = true +[tool.ruff] +src = ["src"] + +[tool.ruff.lint] +select = [ + "F", # https://docs.astral.sh/ruff/rules/#pyflakes-f + "E", "W", # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w + "I", # https://docs.astral.sh/ruff/rules/#isort-i + "N", # https://docs.astral.sh/ruff/rules/#pep8-naming-n + "D", # https://docs.astral.sh/ruff/rules/#pydocstyle-d + "UP", # https://docs.astral.sh/ruff/rules/#pyupgrade-up + "ANN", # https://docs.astral.sh/ruff/rules/#flake8-annotations-ann + "ASYNC", # https://docs.astral.sh/ruff/rules/#flake8-async-async + "S", # https://docs.astral.sh/ruff/rules/#flake8-bandit-s + "B", # https://docs.astral.sh/ruff/rules/#flake8-bugbear-b + "A", # https://docs.astral.sh/ruff/rules/#flake8-builtins-a + "C4", # https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4 + "DTZ", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz + "T10", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz + "EM", # https://docs.astral.sh/ruff/rules/#flake8-errmsg-em + "G", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g + "PIE", # https://docs.astral.sh/ruff/rules/#flake8-pie-pie + "T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20 + "PT", # https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt + "Q", # https://docs.astral.sh/ruff/rules/#flake8-quotes-q + "RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse + "RET", # https://docs.astral.sh/ruff/rules/#flake8-return-ret + "SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim + "PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth + "PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh + "RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf +] +fixable = [ + "I", + "F401", + "D", + "UP", + "ANN", + "B", + "C4", + "G", + "PIE", + "PT", + "RSE", + "SIM", + "RUF" +] +# ANN101 - missing-type-self +# ANN003 - missing-type-kwargs +# D203 - one-blank-line-before-class +# D205 - blank-line-after-summary +# D206 - indent-with-spaces* +# D213 - multi-line-summary-second-line +# D300 - triple-single-quotes* +# D400 - ends-in-period +# D415 - ends-in-punctuation +# E111 - indentation-with-invalid-multiple* +# E114 - indentation-with-invalid-multiple-comment* +# E117 - over-indented* +# E501 - line-too-long* +# W191 - tab-indentation* +# S321 - suspicious-ftp-lib-usage +# *ignored for compatibility with formatter +ignore = [ + "ANN101", "ANN003", + "D203", "D205", "D206", "D213", "D300", "D400", "D415", + "E111", "E114", "E117", "E501", + "W191", + "S321", +] + +[tool.ruff.lint.per-file-ignores] +# ANN001 - missing-type-function-argument +# ANN2 - missing-return-type +# ANN102 - missing-type-cls +# S101 - assert +# B011 - assert-false +"tests/*" = ["ANN001", "ANN2", "ANN102", "S101", "B011"] From c8c9f21a5eb15eaafbb6cb5907aa3ae1539ac22a Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Fri, 22 Mar 2024 14:09:48 -0400 Subject: [PATCH 02/13] remove ruff configs --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 77273d72..3763492b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,6 +73,7 @@ testpaths = ["tests"] [tool.coverage.run] branch = true + [tool.ruff] src = ["src"] From 978141d6719d66444fdfd96fa98860c5eed3c17b Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Fri, 22 Mar 2024 14:40:12 -0400 Subject: [PATCH 03/13] style: use ruff --- .pre-commit-config.yaml | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7a376c8a..dd19de58 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,14 @@ -# See https://pre-commit.com for more information -# See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/pre-commit/pre-commit-hooks + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v1.4.0 hooks: - - id: flake8 - additional_dependencies: [flake8-docstrings] - - id: check-added-large-files - args: ['--maxkb=1024'] - exclude: ^tests/data - - id: detect-private-key + - id: check-added-large-files + - id: detect-private-key + - id: trailing-whitespace + - id: end-of-file-fixer + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.2.0 + hooks: + - id: ruff-format + - id: ruff + args: [ --fix, --exit-non-zero-on-fix ] From 16978f4a5d16b082d489d557eb948e7bbdf16de9 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Fri, 22 Mar 2024 14:41:20 -0400 Subject: [PATCH 04/13] remove flake8 things --- .flake8 | 12 ------------ Pipfile | 3 +-- README.md | 4 ++-- 3 files changed, 3 insertions(+), 16 deletions(-) delete mode 100644 .flake8 diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 5db9c0f2..00000000 --- a/.flake8 +++ /dev/null @@ -1,12 +0,0 @@ -[flake8] -ignore = D205, D400, W503 -max-line-length = 88 -exclude = - .git - venv - __pycache__ - source - outputs - docs/* -per-file-ignores = - *__init__.py:F401 diff --git a/Pipfile b/Pipfile index a808c9e4..09ce25b8 100644 --- a/Pipfile +++ b/Pipfile @@ -27,7 +27,6 @@ pytest-cov = "*" pytest-asyncio = "*" mock = "*" pre-commit = "*" -flake8 = "*" -flake8-docstrings = "*" +ruff = "==0.2.0" ipykernel = "*" jupyterlab = "*" diff --git a/README.md b/README.md index f5e5fd10..71fd9564 100644 --- a/README.md +++ b/README.md @@ -152,10 +152,10 @@ python3 -m pytest ### And coding style tests -Code style is managed by [flake8](https://github.com/PyCQA/flake8) and checked prior to commit. +Code style is managed by [ruff](https://astral.sh/ruff) and checked prior to commit. ``` -see .flake8 +python3 -m ruff check --fix . && python3 -m ruff format . ``` From 5c7b6c3cb2cefc09ef502eae0353684304088def Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Fri, 22 Mar 2024 15:09:31 -0400 Subject: [PATCH 05/13] Add ruff linting --- .github/workflows/checks.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index f319070d..861242ed 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -19,3 +19,19 @@ jobs: run: | python -m pip install pipenv pipenv install --skip-lock # this is what Elastic beanstalk uses + lint: + name: lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: Install dependencies + run: python3 -m pip install ".[dev]" + + - name: Check style + run: python3 -m ruff check . && python3 -m ruff format --check . From 38e9fd19817e887c9d26ffe7b5425903469f64bf Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 28 Mar 2024 20:40:35 -0400 Subject: [PATCH 06/13] fix typo From 410aa4dcd2a77dfa7576383ef89e567a13c0fda9 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 28 Mar 2024 20:34:00 -0400 Subject: [PATCH 07/13] move stuff --- pyproject.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 3763492b..6ef05850 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,14 @@ dependencies = [ "thera-py[etl]~=0.5.0.dev3", "civicpy~=3.0.0", "requests", +<<<<<<< HEAD "pydantic==2.*", +||||||| parent of 3dfee5f (move stuff) + "jsondiff", + "pydantic~=2.1", +======= + "pydantic~=2.1", +>>>>>>> 3dfee5f (move stuff) "requests-cache", "neo4j==5.*", "uvicorn", From 24e6c7b4e0bc6c38885b9f24ee10eea4c6f854f9 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 28 Mar 2024 20:35:13 -0400 Subject: [PATCH 08/13] not sure how that got there --- .gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index 03dc9aa1..1246a916 100644 --- a/.gitignore +++ b/.gitignore @@ -136,5 +136,3 @@ dynamodb_local_latest/ # Zip *.zip - -notebooks From 244a8fb5954adc6789263957ca124363b3c3bf7e Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 28 Mar 2024 20:42:35 -0400 Subject: [PATCH 09/13] update configs --- .pre-commit-config.yaml | 2 +- pyproject.toml | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dd19de58..7ace1071 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.2.0 + rev: v0.2.0 # ruff version hooks: - id: ruff-format - id: ruff diff --git a/pyproject.toml b/pyproject.toml index 6ef05850..3763492b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,14 +32,7 @@ dependencies = [ "thera-py[etl]~=0.5.0.dev3", "civicpy~=3.0.0", "requests", -<<<<<<< HEAD "pydantic==2.*", -||||||| parent of 3dfee5f (move stuff) - "jsondiff", - "pydantic~=2.1", -======= - "pydantic~=2.1", ->>>>>>> 3dfee5f (move stuff) "requests-cache", "neo4j==5.*", "uvicorn", From 65d5bf4f2b47bd8002ed91ce2aa8c4568b8fc35b Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 28 Mar 2024 21:35:15 -0400 Subject: [PATCH 10/13] style with ruff --- pyproject.toml | 2 + src/metakb/__init__.py | 19 +- src/metakb/cli.py | 280 ++--- src/metakb/database.py | 175 +-- src/metakb/exceptions.py | 4 +- src/metakb/harvesters/__init__.py | 2 - src/metakb/harvesters/base.py | 24 +- src/metakb/harvesters/civic.py | 20 +- src/metakb/harvesters/moa.py | 105 +- src/metakb/main.py | 36 +- src/metakb/normalizers.py | 103 +- src/metakb/query.py | 201 ++-- src/metakb/schemas/annotation.py | 32 +- src/metakb/schemas/api.py | 2 +- src/metakb/schemas/categorical_variation.py | 18 +- src/metakb/schemas/variation_statement.py | 14 +- src/metakb/transform/base.py | 187 +-- src/metakb/transform/civic.py | 419 ++++--- src/metakb/transform/moa.py | 249 ++-- tests/conftest.py | 1004 +++++++---------- tests/unit/database/test_database.py | 166 ++- .../harvesters/moa/test_moa_assertions.py | 27 +- tests/unit/harvesters/moa/test_moa_harvest.py | 10 +- tests/unit/harvesters/moa/test_moa_source.py | 11 +- tests/unit/harvesters/test_base_class.py | 10 +- tests/unit/harvesters/test_civic_harvester.py | 24 +- tests/unit/setup/test_minimal_setup.py | 2 +- tests/unit/test_search_studies.py | 2 +- .../test_civic_transform_diagnostic.py | 74 +- .../test_civic_transform_prognostic.py | 62 +- .../test_civic_transform_therapeutic.py | 23 +- tests/unit/transform/test_moa_transform.py | 92 +- 32 files changed, 1707 insertions(+), 1692 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3763492b..653ad361 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,4 +151,6 @@ ignore = [ # ANN102 - missing-type-cls # S101 - assert # B011 - assert-false +# N815 - "tests/*" = ["ANN001", "ANN2", "ANN102", "S101", "B011"] +"src/metakb/schemas/*" = ["ANN102", "N815"] diff --git a/src/metakb/__init__.py b/src/metakb/__init__.py index 46186a18..b0a29fd5 100644 --- a/src/metakb/__init__.py +++ b/src/metakb/__init__.py @@ -1,28 +1,25 @@ """The MetaKB package.""" -from pathlib import Path import logging from os import environ +from pathlib import Path APP_ROOT = Path(__file__).resolve().parents[0] PROJECT_ROOT = Path(__file__).resolve().parents[1] -if 'METAKB_NORM_EB_PROD' in environ: - LOG_FN = "/tmp/metakb.log" -else: - LOG_FN = "metakb.log" +LOG_FN = "/tmp/metakb.log" if "METAKB_NORM_EB_PROD" in environ else "metakb.log" # noqa: S108 logging.basicConfig( - filename=LOG_FN, - format='[%(asctime)s] - %(name)s - %(levelname)s : %(message)s') -logger = logging.getLogger('metakb') + filename=LOG_FN, format="[%(asctime)s] - %(name)s - %(levelname)s : %(message)s" +) +logger = logging.getLogger("metakb") logger.setLevel(logging.DEBUG) logging.getLogger("boto3").setLevel(logging.INFO) logging.getLogger("botocore").setLevel(logging.INFO) logging.getLogger("urllib3").setLevel(logging.INFO) logging.getLogger("python_jsonschema_objects").setLevel(logging.INFO) logging.getLogger("hgvs.parser").setLevel(logging.INFO) -logging.getLogger("biocommons.seqrepo.seqaliasdb.seqaliasdb").setLevel(logging.INFO) # noqa: E501 -logging.getLogger("biocommons.seqrepo.fastadir.fastadir").setLevel(logging.INFO) # noqa: E501 +logging.getLogger("biocommons.seqrepo.seqaliasdb.seqaliasdb").setLevel(logging.INFO) +logging.getLogger("biocommons.seqrepo.fastadir.fastadir").setLevel(logging.INFO) logging.getLogger("requests_cache.patcher").setLevel(logging.INFO) logging.getLogger("bioregistry.resource_manager").setLevel(logging.INFO) logging.getLogger("blib2to3.pgen2.driver").setLevel(logging.INFO) @@ -30,7 +27,7 @@ logging.getLogger("asyncio").setLevel(logging.INFO) logger.handlers = [] -if 'METAKB_NORM_EB_PROD' in environ: +if "METAKB_NORM_EB_PROD" in environ: ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) logger.addHandler(ch) diff --git a/src/metakb/cli.py b/src/metakb/cli.py index e7a5cefe..30a34084 100644 --- a/src/metakb/cli.py +++ b/src/metakb/cli.py @@ -1,42 +1,40 @@ -""" -Provide CLI utility for performing data collection, transformation, and upload +"""Provide CLI utility for performing data collection, transformation, and upload to graph datastore. """ -from timeit import default_timer as timer -from os import environ import logging -from typing import Optional -from pathlib import Path import re import tempfile +from os import environ +from pathlib import Path +from timeit import default_timer as timer +from typing import Optional, Set, Union from zipfile import ZipFile import asyncclick as click +import boto3 +from boto3.exceptions import ResourceLoadException +from botocore.config import Config +from disease.cli import CLI as DiseaseCLI # noqa: N811 from disease.database import Database as DiseaseDatabase from disease.schemas import SourceName as DiseaseSources -from disease.cli import CLI as DiseaseCLI -from therapy.database import Database as TherapyDatabase -from therapy.schemas import SourceName as TherapySources -from therapy.cli import CLI as TherapyCLI +from gene.cli import CLI as GeneCLI # noqa: N811 from gene.database import Database as GeneDatabase from gene.schemas import SourceName as GeneSources -from gene.cli import CLI as GeneCLI -import boto3 -from boto3.exceptions import ResourceLoadException -from botocore.config import Config +from therapy.cli import CLI as TherapyCLI # noqa: N811 +from therapy.database import Database as TherapyDatabase +from therapy.schemas import SourceName as TherapySources from metakb import APP_ROOT from metakb.database import Graph +from metakb.harvesters import CivicHarvester, Harvester, MoaHarvester from metakb.schemas.app import SourceName -from metakb.harvesters import Harvester, CivicHarvester, MoaHarvester -from metakb.transform import Transform, CivicTransform, MoaTransform - +from metakb.transform import CivicTransform, MoaTransform, Transform -logger = logging.getLogger('metakb.cli') +logger = logging.getLogger("metakb.cli") logger.setLevel(logging.DEBUG) -def echo_info(msg: str): +def echo_info(msg: str) -> None: """Log (as INFO) and echo given message. :param str msg: message to emit """ @@ -50,59 +48,72 @@ class CLI: @staticmethod @click.command() @click.option( - '--db_url', - help=('URL endpoint for the application Neo4j database. Can also be ' - 'provided via environment variable METAKB_DB_URL.') + "--db_url", + help=( + "URL endpoint for the application Neo4j database. Can also be " + "provided via environment variable METAKB_DB_URL." + ), ) @click.option( - '--db_username', - help=('Username to provide to application database. Can also be ' - 'provided via environment variable METAKB_DB_USERNAME.') + "--db_username", + help=( + "Username to provide to application database. Can also be " + "provided via environment variable METAKB_DB_USERNAME." + ), ) @click.option( - '--db_password', - help=('Password to provide to application database. Can also be ' - 'provided via environment variable METAKB_DB_PASSWORD.') + "--db_password", + help=( + "Password to provide to application database. Can also be " + "provided via environment variable METAKB_DB_PASSWORD." + ), ) @click.option( - '--load_normalizers_db', - '-i', + "--load_normalizers_db", + "-i", is_flag=True, default=False, - help='Check normalizers database and load data if necessary.' + help="Check normalizers database and load data if necessary.", ) @click.option( - '--force_load_normalizers_db', - '-f', + "--force_load_normalizers_db", + "-f", is_flag=True, default=False, - help=('Load all normalizers data into database. Overrides ' - '--load_normalizers_db if both are selected.') + help=( + "Load all normalizers data into database. Overrides " + "--load_normalizers_db if both are selected." + ), ) @click.option( - '--normalizers_db_url', - default='http://localhost:8000', - help=('URL endpoint of normalizers DynamoDB database. Set to ' - '`http://localhost:8000` by default.') + "--normalizers_db_url", + default="http://localhost:8000", + help=( + "URL endpoint of normalizers DynamoDB database. Set to " + "`http://localhost:8000` by default." + ), ) @click.option( "--load_latest_cdms", "-l", is_flag=True, default=False, - help=("Clear MetaKB database and load most recent available source " - "CDM files. Does not run harvest and transform methods to " - "generate new CDM files. Exclusive with --load_target_cdm and " - "--load_latest_s3_cdms.") + help=( + "Clear MetaKB database and load most recent available source " + "CDM files. Does not run harvest and transform methods to " + "generate new CDM files. Exclusive with --load_target_cdm and " + "--load_latest_s3_cdms." + ), ) @click.option( "--load_target_cdm", "-t", - type=click.Path(exists=True, dir_okay=False, readable=True, - path_type=Path), + type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path), required=False, - help=("Load transformed CDM file at specified path. Exclusive with " - "--load_latest_cdms and --load_latest_s3_cdms.") + help=( + "Load transformed CDM file at specified path. Exclusive with " + "--load_latest_cdms and --load_latest_s3_cdms." + ), ) @click.option( "--load_latest_s3_cdms", @@ -110,9 +121,11 @@ class CLI: is_flag=True, default=False, required=False, - help=("Clear MetaKB database, retrieve most recent data available " - "from VICC S3 bucket, and load the database with retrieved " - "data. Exclusive with --load_latest_cdms and load_target_cdm.") + help=( + "Clear MetaKB database, retrieve most recent data available " + "from VICC S3 bucket, and load the database with retrieved " + "data. Exclusive with --load_latest_cdms and load_target_cdm." + ), ) @click.option( "--update_cached", @@ -120,31 +133,42 @@ class CLI: is_flag=True, default=False, required=False, - help=("`True` if civicpy cache should be updated. Note this will take serveral" - "minutes. `False` if local cache should be used") + help=( + "`True` if civicpy cache should be updated. Note this will take serveral" + "minutes. `False` if local cache should be used" + ), ) async def update_metakb_db( - db_url: str, db_username: str, db_password: str, - load_normalizers_db: bool, force_load_normalizers_db: bool, - normalizers_db_url: str, load_latest_cdms: bool, - load_target_cdm: Optional[Path], load_latest_s3_cdms: bool, - update_cached: bool - ): + db_url: str, + db_username: str, + db_password: str, + load_normalizers_db: bool, + force_load_normalizers_db: bool, + normalizers_db_url: str, + load_latest_cdms: bool, + load_target_cdm: Optional[Path], + load_latest_s3_cdms: bool, + update_cached: bool, + ) -> None: """Execute data harvest and transformation from resources and upload to graph datastore. """ - if sum([load_latest_cdms, bool(load_target_cdm), - load_latest_s3_cdms]) > 1: - CLI()._help_msg("Error: Can only use one of `--load_latest_cdms`, " - "`--load_target_cdm`, `--load_latest_s3_cdms`.") + if sum([load_latest_cdms, bool(load_target_cdm), load_latest_s3_cdms]) > 1: + CLI()._help_msg( + "Error: Can only use one of `--load_latest_cdms`, " + "`--load_target_cdm`, `--load_latest_s3_cdms`." + ) - db_url = CLI()._check_db_param(db_url, 'URL') - db_username = CLI()._check_db_param(db_username, 'username') - db_password = CLI()._check_db_param(db_password, 'password') + db_url = CLI()._check_db_param(db_url, "URL") + db_username = CLI()._check_db_param(db_username, "username") + db_password = CLI()._check_db_param(db_password, "password") if normalizers_db_url: - for env_var_name in ['GENE_NORM_DB_URL', 'THERAPY_NORM_DB_URL', - 'DISEASE_NORM_DB_URL']: + for env_var_name in [ + "GENE_NORM_DB_URL", + "THERAPY_NORM_DB_URL", + "DISEASE_NORM_DB_URL", + ]: environ[env_var_name] = normalizers_db_url if not any([load_latest_cdms, load_target_cdm, load_latest_s3_cdms]): @@ -166,8 +190,7 @@ async def update_metakb_db( if load_latest_s3_cdms: version = CLI()._retrieve_s3_cdms() g.clear() - for src in sorted({v.value for v - in SourceName.__members__.values()}): + for src in sorted({v.value for v in SourceName.__members__.values()}): if version is not None: pattern = f"{src}_cdm_{version}.json" else: @@ -175,18 +198,15 @@ async def update_metakb_db( globbed = (APP_ROOT / "data" / src / "transform").glob(pattern) try: path = sorted(globbed)[-1] - except IndexError: - raise FileNotFoundError(f"No valid transform file found " - f"matching pattern: {pattern}") + except IndexError as e: + msg = f"No valid transform file found matching pattern: {pattern}" + raise FileNotFoundError(msg) from e g.load_from_json(path) g.close() end = timer() - echo_info( - f"Successfully loaded neo4j database in {(end - start):.5f} s\n" - ) + echo_info(f"Successfully loaded neo4j database in {(end - start):.5f} s\n") - s3_cdm_pattern = re.compile( - r"cdm/20[23]\d[01]\d[0123]\d/(.*)_cdm_(.*).json.zip") + s3_cdm_pattern = re.compile(r"cdm/20[23]\d[01]\d[0123]\d/(.*)_cdm_(.*).json.zip") def _retrieve_s3_cdms(self) -> str: """Retrieve most recent CDM files from VICC S3 bucket. Expects to find @@ -200,13 +220,12 @@ def _retrieve_s3_cdms(self) -> str: echo_info("Attempting to fetch CDM files from S3 bucket") s3 = boto3.resource("s3", config=Config(region_name="us-east-2")) if not s3: - raise ResourceLoadException("Unable to initiate AWS S3 Resource") - bucket = sorted( - list( - s3.Bucket("vicc-metakb").objects.filter(Prefix="cdm").all() - ), + msg = "Unable to initiate AWS S3 Resource" + raise ResourceLoadException(msg) + bucket = sorted( # noqa: C414 + list(s3.Bucket("vicc-metakb").objects.filter(Prefix="cdm").all()), key=lambda f: f.key, - reverse=True + reverse=True, ) newest_version: Optional[str] = None for file in bucket: @@ -221,7 +240,7 @@ def _retrieve_s3_cdms(self) -> str: continue tmp_path = Path(tempfile.gettempdir()) / "metakb_dl_tmp" - with open(tmp_path, "wb") as f: + with tmp_path.open("wb") as f: file.Object().download_fileobj(f) cdm_dir = APP_ROOT / "data" / source / "transform" @@ -229,20 +248,17 @@ def _retrieve_s3_cdms(self) -> str: cdm_zip.extract(f"{source}_cdm_{newest_version}.json", cdm_dir) if newest_version is None: - raise FileNotFoundError("Unable to locate files matching expected " - "resource pattern in VICC s3 bucket") + msg = "Unable to locate files matching expected resource pattern in VICC s3 bucket" + raise FileNotFoundError(msg) echo_info(f"Retrieved CDM files dated {newest_version}") return newest_version @staticmethod - def _harvest_sources(update_cached) -> None: + def _harvest_sources(update_cached: bool) -> None: """Run harvesting procedure for all sources.""" echo_info("Harvesting sources...") # TODO: Switch to using constant - harvester_sources = { - 'civic': CivicHarvester, - 'moa': MoaHarvester - } + harvester_sources = {"civic": CivicHarvester, "moa": MoaHarvester} total_start = timer() for source_str, source_class in harvester_sources.items(): echo_info(f"Harvesting {source_str}...") @@ -256,10 +272,9 @@ def _harvest_sources(update_cached) -> None: source_successful = source.harvest() end = timer() if not source_successful: - echo_info(f'{source_str} harvest failed.') + echo_info(f"{source_str} harvest failed.") click.get_current_context().exit() - echo_info( - f"{source_str} harvest finished in {(end - start):.5f} s") + echo_info(f"{source_str} harvest finished in {(end - start):.5f} s") total_end = timer() echo_info( f"Successfully harvested all sources in " @@ -271,10 +286,7 @@ async def _transform_sources() -> None: """Run transformation procedure for all sources.""" echo_info("Transforming harvested data to CDM...") # TODO: Switch to using constant - transform_sources = { - 'civic': CivicTransform, - 'moa': MoaTransform - } + transform_sources = {"civic": CivicTransform, "moa": MoaTransform} total_start = timer() for src_str, src_name in transform_sources.items(): echo_info(f"Transforming {src_str}...") @@ -282,8 +294,7 @@ async def _transform_sources() -> None: source: Transform = src_name() await source.transform() end = timer() - echo_info( - f"{src_str} transform finished in {(end - start):.5f} s.") + echo_info(f"{src_str} transform finished in {(end - start):.5f} s.") source.create_json() total_end = timer() echo_info( @@ -291,7 +302,7 @@ async def _transform_sources() -> None: f"{(total_end - total_start):.5f} s\n" ) - def _load_normalizers_db(self, load_normalizer_db): + def _load_normalizers_db(self, load_normalizer_db: bool) -> None: """Load normalizer database source data. :param bool load_normalizer_db: Load normalizer database for each @@ -301,23 +312,28 @@ def _load_normalizers_db(self, load_normalizer_db): load_disease = load_therapy = load_gene = True else: load_disease = self._check_normalizer( - DiseaseDatabase(), {src.value for src in DiseaseSources}) + DiseaseDatabase(), {src.value for src in DiseaseSources} + ) load_therapy = self._check_normalizer( - TherapyDatabase(), {src for src in TherapySources}) + TherapyDatabase(), set(TherapySources) + ) load_gene = self._check_normalizer( - GeneDatabase(), {src.value for src in GeneSources}) + GeneDatabase(), {src.value for src in GeneSources} + ) for load_source, normalizer_cli in [ - (load_disease, DiseaseCLI), (load_therapy, TherapyCLI), - (load_gene, GeneCLI) + (load_disease, DiseaseCLI), + (load_therapy, TherapyCLI), + (load_gene, GeneCLI), ]: - name = \ - str(normalizer_cli).split()[1].split('.')[0][1:].capitalize() + name = str(normalizer_cli).split()[1].split(".")[0][1:].capitalize() self._update_normalizer_db(name, load_source, normalizer_cli) echo_info("Normalizers database loaded.\n") @staticmethod - def _check_normalizer(db, sources) -> bool: + def _check_normalizer( + db: Union[GeneDatabase, TherapyDatabase, DiseaseDatabase], sources: Set + ) -> bool: """Check whether or not normalizer data needs to be loaded. :param Database db: Normalizer database @@ -325,15 +341,17 @@ def _check_normalizer(db, sources) -> bool: :return: `True` If normalizer needs to be loaded. `False` otherwise. """ for src in sources: - response = db.metadata.get_item( - Key={'src_name': src} - ) - if not response.get('Item'): + response = db.metadata.get_item(Key={"src_name": src}) + if not response.get("Item"): return True return False @staticmethod - def _update_normalizer_db(name, load_normalizer, source_cli) -> None: + def _update_normalizer_db( + name: str, + load_normalizer: bool, + source_cli: Union[DiseaseCLI, TherapyCLI, GeneCLI], + ) -> None: """Update Normalizer database. :param str name: Name of the normalizer @@ -343,15 +361,14 @@ def _update_normalizer_db(name, load_normalizer, source_cli) -> None: """ if load_normalizer: try: - echo_info(f'\nLoading {name} Normalizer data...') - source_cli.update_normalizer_db( - ['--update_all', '--update_merged']) - echo_info(f'Successfully Loaded {name} Normalizer data.\n') + echo_info(f"\nLoading {name} Normalizer data...") + source_cli.update_normalizer_db(["--update_all", "--update_merged"]) + echo_info(f"Successfully Loaded {name} Normalizer data.\n") except SystemExit as e: if e.code != 0: raise e else: - echo_info(f'{name} Normalizer is already loaded.\n') + echo_info(f"{name} Normalizer is already loaded.\n") @staticmethod def _check_db_param(param: str, name: str) -> str: @@ -361,22 +378,19 @@ def _check_db_param(param: str, name: str) -> str: :return: parameter value, or exit with error message if unavailable """ if not param: - env_var_name = f'METAKB_DB_{name.upper()}' - if env_var_name in environ.keys(): + env_var_name = f"METAKB_DB_{name.upper()}" + if env_var_name in environ: return environ[env_var_name] - else: - # Default is local - if name == 'URL': - return "bolt://localhost:7687" - elif name == 'username': - return 'neo4j' - else: - return 'admin' - else: - return param + # Default is local + if name == "URL": + return "bolt://localhost:7687" + if name == "username": + return "neo4j" + return "admin" + return param @staticmethod - def _help_msg(msg: str = ""): + def _help_msg(msg: str = "") -> None: """Handle invalid user input. :param str msg: Error message to display to user. """ @@ -389,5 +403,5 @@ def _help_msg(msg: str = ""): ctx.exit() -if __name__ == '__main__': +if __name__ == "__main__": CLI().update_metakb_db(_anyio_backend="asyncio") diff --git a/src/metakb/database.py b/src/metakb/database.py index e18c2ab6..1fff7847 100644 --- a/src/metakb/database.py +++ b/src/metakb/database.py @@ -16,9 +16,7 @@ def _create_parameterized_query( - entity: Dict, - params: Tuple[str], - entity_param_prefix: str = "" + entity: Dict, params: Tuple[str], entity_param_prefix: str = "" ) -> str: """Create parameterized query string for requested params if non-null in entity. @@ -36,20 +34,23 @@ def _create_parameterized_query( class Graph: """Manage requests to graph datastore.""" - def __init__(self, uri: str = '', credentials: Tuple[str, str] = ('', '')) -> None: + def __init__(self, uri: str = "", credentials: Tuple[str, str] = ("", "")) -> None: """Initialize Graph driver instance. :param uri: address of Neo4j DB :param credentials: tuple containing username and password """ - if 'METAKB_NORM_EB_PROD' in environ: + if "METAKB_NORM_EB_PROD" in environ: secret = ast.literal_eval(self.get_secret()) uri = f"bolt://{secret['host']}:{secret['port']}" - credentials = (secret['username'], secret['password']) - elif 'METAKB_DB_URL' in environ and 'METAKB_DB_USERNAME' in environ and 'METAKB_DB_PASSWORD' in environ: # noqa: E501 - uri = environ['METAKB_DB_URL'] - credentials = (environ['METAKB_DB_USERNAME'], - environ['METAKB_DB_PASSWORD']) + credentials = (secret["username"], secret["password"]) + elif ( + "METAKB_DB_URL" in environ + and "METAKB_DB_USERNAME" in environ + and "METAKB_DB_PASSWORD" in environ + ): + uri = environ["METAKB_DB_URL"] + credentials = (environ["METAKB_DB_USERNAME"], environ["METAKB_DB_PASSWORD"]) elif not (uri and credentials[0] and credentials[1]): # Local uri = "bolt://localhost:7687" @@ -64,12 +65,14 @@ def close(self) -> None: def clear(self) -> None: """Debugging helper - wipe out DB.""" + def delete_all(tx: ManagedTransaction) -> None: """Delete all nodes and relationships :param tx: Transaction object provided to transaction functions """ tx.run("MATCH (n) DETACH DELETE n;") + with self.driver.session() as session: session.execute_write(delete_all) @@ -80,8 +83,8 @@ def load_from_json(self, src_transformed_cdm: Path) -> None: common data model containing studies, variation, therapeutic procedures, conditions, genes, methods, documents, etc. """ - logger.info(f"Loading data from {src_transformed_cdm}") - with open(src_transformed_cdm, 'r') as f: + logger.info("Loading data from %s", src_transformed_cdm) + with src_transformed_cdm.open() as f: items = json.load(f) src_name = SourceName( str(src_transformed_cdm).split("/")[-1].split("_cdm")[0] @@ -95,16 +98,23 @@ def _create_constraints(tx: ManagedTransaction) -> None: :param tx: Transaction object provided to transaction functions """ queries = [ - "CREATE CONSTRAINT coding_constraint IF NOT EXISTS FOR (c:Coding) REQUIRE (c.code, c.label, c.system) IS UNIQUE;", # noqa: E501 + "CREATE CONSTRAINT coding_constraint IF NOT EXISTS FOR (c:Coding) REQUIRE (c.code, c.label, c.system) IS UNIQUE;", ] for label in [ - "Gene", "Disease", "TherapeuticProcedure", "Variation", - "CategoricalVariation", "VariantGroup", "Location", "Document", "Study", - "Method" + "Gene", + "Disease", + "TherapeuticProcedure", + "Variation", + "CategoricalVariation", + "VariantGroup", + "Location", + "Document", + "Study", + "Method", ]: queries.append( - f"CREATE CONSTRAINT {label.lower()}_id_constraint IF NOT EXISTS FOR (n:{label}) REQUIRE n.id IS UNIQUE;" # noqa: E501 + f"CREATE CONSTRAINT {label.lower()}_id_constraint IF NOT EXISTS FOR (n:{label}) REQUIRE n.id IS UNIQUE;" ) for query in queries: @@ -132,9 +142,7 @@ def add_transformed_data(self, data: Dict, src_name: SourceName) -> None: cat_var_key = "variations" for cv in data.get(cat_var_key, []): session.execute_write( - self._add_categorical_variation, - cv, - ids_in_studies + self._add_categorical_variation, cv, ids_in_studies ) for doc in data.get("documents", []): @@ -159,12 +167,10 @@ def add_transformed_data(self, data: Dict, src_name: SourceName) -> None: session.execute_write(self._add_study, study) loaded_study_count += 1 - logger.info(f"Successfully loaded {loaded_study_count} studies.") + logger.info("Successfully loaded %s studies.", loaded_study_count) @staticmethod - def _add_mappings_and_exts_to_obj( - obj: Dict, obj_keys: List[str] - ) -> None: + def _add_mappings_and_exts_to_obj(obj: Dict, obj_keys: List[str]) -> None: """Get mappings and extensions from object and add to `obj` and `obj_keys` :param obj: Object to update with mappings and extensions (if found) @@ -192,10 +198,7 @@ def _add_mappings_and_exts_to_obj( obj_keys.append(f"{name}:${name}") def _add_method( - self, - tx: ManagedTransaction, - method: Dict, - ids_in_studies: Set[str] + self, tx: ManagedTransaction, method: Dict, ids_in_studies: Set[str] ) -> None: """Add Method node and its relationships to DB @@ -223,10 +226,7 @@ def _add_method( tx.run(query, **method) def _add_gene_or_disease( - self, - tx: ManagedTransaction, - obj_in: Dict, - ids_in_studies: Set[str] + self, tx: ManagedTransaction, obj_in: Dict, ids_in_studies: Set[str] ) -> None: """Add gene or disease node and its relationships to DB @@ -242,18 +242,12 @@ def _add_gene_or_disease( obj_type = obj["type"] if obj_type not in {"Gene", "Disease"}: - raise TypeError(f"Invalid object type: {obj_type}") + msg = f"Invalid object type: {obj_type}" + raise TypeError(msg) obj_keys = [ _create_parameterized_query( - obj, - ( - "id", - "label", - "description", - "aliases", - "type" - ) + obj, ("id", "label", "description", "aliases", "type") ) ] @@ -274,7 +268,7 @@ def _add_therapeutic_procedure( self, tx: ManagedTransaction, therapeutic_procedure: Dict, - ids_in_studies: Set[str] + ids_in_studies: Set[str], ) -> None: """Add therapeutic procedure node and its relationships @@ -292,15 +286,7 @@ def _add_therapeutic_procedure( if tp_type == "TherapeuticAgent": self._add_therapeutic_agent(tx, tp) elif tp_type in {"CombinationTherapy", "TherapeuticSubstituteGroup"}: - keys = [ - _create_parameterized_query( - tp, - ( - "id", - "type" - ) - ) - ] + keys = [_create_parameterized_query(tp, ("id", "type"))] self._add_mappings_and_exts_to_obj(tp, keys) keys = ", ".join(keys) @@ -308,7 +294,11 @@ def _add_therapeutic_procedure( query = f"MERGE (tp:{tp_type}:TherapeuticProcedure {{ {keys} }})" tx.run(query, **tp) - tas = tp["components"] if tp_type == "CombinationTherapy" else tp["substitutes"] # noqa: E501 + tas = ( + tp["components"] + if tp_type == "CombinationTherapy" + else tp["substitutes"] + ) for ta in tas: self._add_therapeutic_agent(tx, ta) query = f""" @@ -319,11 +309,12 @@ def _add_therapeutic_procedure( if tp_type == "CombinationTherapy": query += "MERGE (tp) -[:HAS_COMPONENTS] -> (ta)" else: - query += 'MERGE (tp) -[:HAS_SUBSTITUTES] -> (ta)' + query += "MERGE (tp) -[:HAS_SUBSTITUTES] -> (ta)" tx.run(query) else: - raise TypeError(f"Invalid therapeutic procedure type: {tp_type}") + msg = f"Invalid therapeutic procedure type: {tp_type}" + raise TypeError(msg) def _add_therapeutic_agent( self, tx: ManagedTransaction, therapeutic_agent: Dict @@ -335,15 +326,7 @@ def _add_therapeutic_agent( """ ta = therapeutic_agent.copy() nonnull_keys = [ - _create_parameterized_query( - ta, - ( - "id", - "label", - "aliases", - "type" - ) - ) + _create_parameterized_query(ta, ("id", "label", "aliases", "type")) ] self._add_mappings_and_exts_to_obj(ta, nonnull_keys) @@ -355,10 +338,7 @@ def _add_therapeutic_agent( tx.run(query, **ta) @staticmethod - def _add_location( - tx: ManagedTransaction, - location_in: Dict - ) -> None: + def _add_location(tx: ManagedTransaction, location_in: Dict) -> None: """Add location node and its relationships :param tx: Transaction object provided to transaction functions @@ -380,11 +360,7 @@ def _add_location( """ tx.run(query, **loc) - def _add_variation( - self, - tx: ManagedTransaction, - variation_in: Dict - ) -> None: + def _add_variation(self, tx: ManagedTransaction, variation_in: Dict) -> None: """Add variation node and its relationships :param tx: Transaction object provided to transaction functions @@ -392,9 +368,7 @@ def _add_variation( """ v = variation_in.copy() v_keys = [ - f"v.{key}=${key}" - for key in ("id", "label", "digest", "type") - if v.get(key) + f"v.{key}=${key}" for key in ("id", "label", "digest", "type") if v.get(key) ] expressions = v.get("expressions", []) @@ -433,7 +407,7 @@ def _add_categorical_variation( self, tx: ManagedTransaction, categorical_variation_in: Dict, - ids_in_studies: Set[str] + ids_in_studies: Set[str], ) -> None: """Add categorical variation objects to DB. @@ -448,14 +422,7 @@ def _add_categorical_variation( mp_nonnull_keys = [ _create_parameterized_query( - cv, - ( - "id", - "label", - "description", - "aliases", - "type" - ) + cv, ("id", "label", "description", "aliases", "type") ) ] @@ -486,10 +453,7 @@ def _add_categorical_variation( tx.run(query, **cv) def _add_document( - self, - tx: ManagedTransaction, - document_in: Dict, - ids_in_studies: Set[str] + self, tx: ManagedTransaction, document_in: Dict, ids_in_studies: Set[str] ) -> None: """Add Document object to DB. @@ -510,17 +474,13 @@ def _add_document( else: query = None - if query: - result = tx.run(query, **document_in) - else: - result = None + result = tx.run(query, **document_in) if query else None if (not result) or (result and not result.single()): document = document_in.copy() formatted_keys = [ _create_parameterized_query( - document, - ('id', 'label', 'title', 'pmid', 'url', 'doi') + document, ("id", "label", "title", "pmid", "url", "doi") ) ] @@ -538,6 +498,7 @@ def _get_ids_from_studies(self, studies: List[Dict]) -> Set[str]: :param studies: List of studies :return: Set of IDs found in studies """ + def _add_obj_id_to_set(obj: Dict, ids_set: Set[str]) -> None: """Add object id to set of IDs @@ -557,7 +518,7 @@ def _add_obj_id_to_set(obj: Dict, ids_set: Set[str]) -> None: study.get("variant"), study.get("therapeutic"), study.get("tumorType"), - study.get("qualifiers", {}).get("geneContext") + study.get("qualifiers", {}).get("geneContext"), ]: if obj: if isinstance(obj, list): @@ -578,14 +539,7 @@ def _add_study(tx: ManagedTransaction, study_in: Dict) -> None: study = study_in.copy() study_type = study["type"] study_keys = _create_parameterized_query( - study, - ( - "id", - "description", - "direction", - "predicate", - "type" - ) + study, ("id", "description", "direction", "predicate", "type") ) match_line = "" @@ -618,9 +572,7 @@ def _add_study(tx: ManagedTransaction, study_in: Dict) -> None: coding_key_fields = ("code", "label", "system") coding_keys = _create_parameterized_query( - coding, - coding_key_fields, - entity_param_prefix="coding_" + coding, coding_key_fields, entity_param_prefix="coding_" ) for k in coding_key_fields: v = coding.get(k) @@ -657,20 +609,15 @@ def _add_study(tx: ManagedTransaction, study_in: Dict) -> None: @staticmethod def get_secret() -> str: """Get secrets for MetaKB instances.""" - secret_name = environ['METAKB_DB_SECRET'] + secret_name = environ["METAKB_DB_SECRET"] region_name = "us-east-2" # Create a Secrets Manager client session = boto3.session.Session() - client = session.client( - service_name='secretsmanager', - region_name=region_name - ) + client = session.client(service_name="secretsmanager", region_name=region_name) try: - get_secret_value_response = client.get_secret_value( - SecretId=secret_name - ) + get_secret_value_response = client.get_secret_value(SecretId=secret_name) except ClientError as e: # For a list of exceptions thrown, see # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html diff --git a/src/metakb/exceptions.py b/src/metakb/exceptions.py index d04a5d1a..8c1d80e6 100644 --- a/src/metakb/exceptions.py +++ b/src/metakb/exceptions.py @@ -1,7 +1,5 @@ """Define exceptions.""" -class NormalizationException(Exception): +class NormalizationException(Exception): # noqa: N818 """Indicate failure to normalize term.""" - - pass diff --git a/src/metakb/harvesters/__init__.py b/src/metakb/harvesters/__init__.py index 3c64321a..a7f790b3 100644 --- a/src/metakb/harvesters/__init__.py +++ b/src/metakb/harvesters/__init__.py @@ -1,3 +1 @@ """A package for metakb harvester routines.""" -from .civic import CivicHarvester -from .moa import MoaHarvester diff --git a/src/metakb/harvesters/base.py b/src/metakb/harvesters/base.py index 9779b1b7..9f9747df 100644 --- a/src/metakb/harvesters/base.py +++ b/src/metakb/harvesters/base.py @@ -1,8 +1,8 @@ """A module for the Harvester base class""" -from typing import List, Dict, Optional +import datetime import json import logging -from datetime import datetime as dt +from typing import Dict, List, Optional from metakb import APP_ROOT, DATE_FMT @@ -13,8 +13,7 @@ class Harvester: """A base class for content harvesters.""" def harvest(self) -> bool: - """ - Retrieve and store records from a resource. Records may be stored in + """Retrieve and store records from a resource. Records may be stored in any manner, but must be retrievable by :method:`iterate_records`. :return: `True` if operation was successful, `False` otherwise. @@ -22,30 +21,33 @@ def harvest(self) -> bool: """ raise NotImplementedError - def create_json(self, items: Dict[str, List], - filename: Optional[str] = None) -> bool: + def create_json( + self, items: Dict[str, List], filename: Optional[str] = None + ) -> bool: """Create composite and individual JSON for harvested data. :param Dict items: item types keyed to Lists of values :param Optional[str] filename: custom filename for composite document :return: `True` if JSON creation was successful. `False` otherwise. """ - composite_dict = dict() + composite_dict = {} src = self.__class__.__name__.lower().split("harvest")[0] src_dir = APP_ROOT / "data" / src / "harvester" src_dir.mkdir(exist_ok=True, parents=True) - today = dt.strftime(dt.today(), DATE_FMT) + today = datetime.datetime.strftime( + datetime.datetime.now(tz=datetime.timezone.utc), DATE_FMT + ) try: for item_type, item_list in items.items(): composite_dict[item_type] = item_list - with open(src_dir / f"{item_type}_{today}.json", "w+") as f: + with (src_dir / f"{item_type}_{today}.json").open("w+") as f: f.write(json.dumps(item_list, indent=4)) if not filename: filename = f"{src}_harvester_{today}.json" - with open(src_dir / filename, "w+") as f: + with (src_dir / filename).open("w+") as f: f.write(json.dumps(composite_dict, indent=4)) except Exception as e: - logger.error(f"Unable to create json: {e}") + logger.error("Unable to create json: %s", e) return False return True diff --git a/src/metakb/harvesters/civic.py b/src/metakb/harvesters/civic.py index 80d42965..d5c0da4f 100644 --- a/src/metakb/harvesters/civic.py +++ b/src/metakb/harvesters/civic.py @@ -1,11 +1,12 @@ """A module for the CIViC harvester.""" import logging -from typing import Dict, List, Optional from pathlib import Path +from typing import Dict, List, Optional -from civicpy import civic as civicpy, LOCAL_CACHE_PATH +from civicpy import LOCAL_CACHE_PATH +from civicpy import civic as civicpy -from metakb.harvesters.base import Harvester # noqa: I202 +from metakb.harvesters.base import Harvester logger = logging.getLogger(__name__) @@ -17,7 +18,7 @@ def __init__( self, update_cache: bool = False, update_from_remote: bool = True, - local_cache_path: Optional[Path] = LOCAL_CACHE_PATH + local_cache_path: Optional[Path] = LOCAL_CACHE_PATH, ) -> None: """Initialize CivicHarvester class. @@ -62,17 +63,17 @@ def harvest(self, filename: Optional[str] = None) -> bool: "genes": self.genes, "variants": self.variants, "molecular_profiles": self.molecular_profiles, - "assertions": self.assertions + "assertions": self.assertions, }, - filename + filename, ) if not json_created: logger.error( "CIViC Harvester was not successful: JSON files not created." ) return False - except Exception as e: # noqa: E722 - logger.error(f"CIViC Harvester was not successful: {e}") + except Exception as e: + logger.error("CIViC Harvester was not successful: %s", e) return False else: logger.info("CIViC Harvester was successful.") @@ -130,7 +131,8 @@ def _dictify(self, obj: any) -> Dict: if isinstance(obj, civicpy.CivicRecord): return { k: self._dictify(v) - for k, v in obj.__dict__.items() if not k.startswith(("_", "partial")) + for k, v in obj.__dict__.items() + if not k.startswith(("_", "partial")) } if isinstance(obj, list): diff --git a/src/metakb/harvesters/moa.py b/src/metakb/harvesters/moa.py index db38ad32..1390b0ab 100644 --- a/src/metakb/harvesters/moa.py +++ b/src/metakb/harvesters/moa.py @@ -1,12 +1,11 @@ """A module for the Molecular Oncology Almanac harvester""" import logging -from typing import Optional, List, Dict +from typing import Dict, List, Optional import requests import requests_cache -from metakb.harvesters.base import Harvester # noqa: I202 - +from metakb.harvesters.base import Harvester logger = logging.getLogger(__name__) @@ -15,8 +14,7 @@ class MoaHarvester(Harvester): """A class for the Molecular Oncology Almanac harvester.""" def harvest(self, filename: Optional[str] = None) -> bool: - """ - Retrieve and store sources, variants, and assertions + """Retrieve and store sources, variants, and assertions records from MOAlmanac in composite and individual JSON files. :param Optional[str] filename: File name for composite json @@ -34,15 +32,15 @@ def harvest(self, filename: Optional[str] = None) -> bool: "assertions": assertions, "sources": sources, "variants": variants, - "genes": genes + "genes": genes, }, - filename + filename, ) if not json_created: logger.error("MOAlmanac Harvester was not successful.") return False - except Exception as e: # noqa: E722 - logger.error(f"MOAlmanac Harvester was not successful: {e}") + except Exception as e: + logger.error("MOAlmanac Harvester was not successful: %s", e) return False else: logger.info("MOAlmanac Harvester was successful.") @@ -54,9 +52,9 @@ def _harvest_genes() -> List[Dict]: :return: List of MOA gene records """ - genes = list() + genes = [] with requests_cache.disabled(): - r = requests.get("https://moalmanac.org/api/genes") + r = requests.get("https://moalmanac.org/api/genes", timeout=60) if r.status_code == 200: genes = r.json() return genes @@ -93,8 +91,9 @@ def harvest_variants(self) -> List[Dict]: return variants, variants_list - def harvest_assertions(self, assertion_resp: List[Dict], - variants_list: List[Dict]) -> List[Dict]: + def harvest_assertions( + self, assertion_resp: List[Dict], variants_list: List[Dict] + ) -> List[Dict]: """Harvest all MOA assertions :param List[Dict] assertion_resp: A list of MOA assertion records @@ -115,10 +114,8 @@ def _get_all_assertions(self) -> List[Dict]: :return: All moa assertion records """ with requests_cache.disabled(): - r = requests.get("https://moalmanac.org/api/assertions") - assertions = r.json() - - return assertions + r = requests.get("https://moalmanac.org/api/assertions", timeout=60) + return r.json() def _get_all_variants(self) -> List[Dict]: """Return all variant records @@ -126,10 +123,8 @@ def _get_all_variants(self) -> List[Dict]: :return: All moa variant records """ with requests_cache.disabled(): - r = requests.get("https://moalmanac.org/api/features") - variants = r.json() - - return variants + r = requests.get("https://moalmanac.org/api/features", timeout=60) + return r.json() def _source_item(self, source: Dict) -> Dict: """Harvest an individual MOA source of evidence @@ -138,16 +133,15 @@ def _source_item(self, source: Dict) -> Dict: :return: a dictionary containing MOA source of evidence data :rtype: dict """ - source_record = { + return { "id": source["source_id"], "type": source["source_type"], "doi": source["doi"], "nct": source["nct"], "pmid": source["pmid"], "url": source["url"], - "citation": source["citation"] + "citation": source["citation"], } - return source_record def _harvest_variant(self, variant: Dict) -> Dict: """Harvest an individual MOA variant record. @@ -156,11 +150,9 @@ def _harvest_variant(self, variant: Dict) -> Dict: :return: A dictionary containing MOA variant data :rtype: dict """ - variant_record = { - "id": variant["feature_id"] - } + variant_record = {"id": variant["feature_id"]} - variant_record.update({k: v for k, v in variant["attributes"][0].items()}) # noqa: E501 + variant_record.update(dict(variant["attributes"][0].items())) variant_record.update(self._get_feature(variant_record)) return variant_record @@ -180,20 +172,20 @@ def _harvest_assertion(self, assertion: Dict, variants_list: List[Dict]) -> Dict "disease": { "name": assertion["disease"], "oncotree_code": assertion["oncotree_code"], - "oncotree_term": assertion["oncotree_term"] + "oncotree_term": assertion["oncotree_term"], }, "therapy_name": assertion["therapy_name"], "therapy_type": assertion["therapy_type"], "clinical_significance": self._get_therapy( - assertion["therapy_resistance"], - assertion["therapy_sensitivity"]), + assertion["therapy_resistance"], assertion["therapy_sensitivity"] + ), "predictive_implication": assertion["predictive_implication"], "favorable_prognosis": assertion["favorable_prognosis"], "created_on": assertion["created_on"], "last_updated": assertion["last_updated"], "submitted_by": assertion["submitted_by"], "validated": assertion["validated"], - "source_ids": assertion["sources"][0]["source_id"] + "source_ids": assertion["sources"][0]["source_id"], } for v in variants_list: @@ -214,14 +206,12 @@ def _get_therapy(self, resistance: bool, sensitivity: bool) -> Optional[str]: """ if resistance: return "resistance" - elif sensitivity: + if sensitivity: return "sensitivity" - else: - return None + return None def _get_feature(self, v: Dict) -> Dict: - """ - Get feature name from the harvested variants + """Get feature name from the harvested variants :param Dict v: harvested MOA variant :return: feature name same format as displayed in moalmanac.org @@ -229,39 +219,44 @@ def _get_feature(self, v: Dict) -> Dict: """ feature_type = v["feature_type"] if feature_type == "rearrangement": - feature = "{}{}{}".format(v["gene1"], - f"--{v['gene2']}" if v["gene2"] else "", - f" {v['rearrangement_type']}" - if v["rearrangement_type"] else "") + feature = "{}{}{}".format( + v["gene1"], + f"--{v['gene2']}" if v["gene2"] else "", + f" {v['rearrangement_type']}" if v["rearrangement_type"] else "", + ) elif feature_type == "somatic_variant": - feature = "{}{}{}".format(v["gene"], - f" {v['protein_change']}" - if v["protein_change"] else "", - f" ({v['variant_annotation']})" - if v["variant_annotation"] else "") + feature = "{}{}{}".format( + v["gene"], + f" {v['protein_change']}" if v["protein_change"] else "", + f" ({v['variant_annotation']})" if v["variant_annotation"] else "", + ) elif feature_type == "germline_variant": - feature = "{}{}".format(v["gene"], " (Pathogenic)" - if v["pathogenic"] == "1.0" else "") + feature = "{}{}".format( + v["gene"], " (Pathogenic)" if v["pathogenic"] == "1.0" else "" + ) elif feature_type == "copy_number": feature = "{} {}".format(v["gene"], v["direction"]) elif feature_type == "microsatellite_stability": feature = "{}".format(v.get("status")) elif feature_type == "mutational_signature": csn = v.get("cosmic_signature_number", "") - feature = "COSMIC Signature {}".format(csn) + feature = f"COSMIC Signature {csn}" elif feature_type == "mutational_burden": clss = v["classification"] min_mut = v["minimum_mutations"] mut_per_mb = v["mutations_per_mb"] - feature = "{}{}".format(clss, - f" (>= {min_mut} mutations)" if min_mut - else (f" (>= {mut_per_mb} mutations/Mb)" - if mut_per_mb else "")) + feature = "{}{}".format( + clss, + f" (>= {min_mut} mutations)" + if min_mut + else (f" (>= {mut_per_mb} mutations/Mb)" if mut_per_mb else ""), + ) elif feature_type == "neoantigen_burden": feature = "{}".format(v["classification"]) elif feature_type == "knockdown" or feature_type == "silencing": - feature = "{}{}".format(v["gene"], f" ({v['technique']})" - if v["technique"] else "") + feature = "{}{}".format( + v["gene"], f" ({v['technique']})" if v["technique"] else "" + ) else: feature = "{}".format(v["event"]) diff --git a/src/metakb/main.py b/src/metakb/main.py index db5765c0..34e08618 100644 --- a/src/metakb/main.py +++ b/src/metakb/main.py @@ -1,14 +1,16 @@ """Main application for FastAPI.""" +from typing import Dict, Optional + from fastapi import FastAPI, Query from fastapi.openapi.utils import get_openapi + from metakb.query import QueryHandler from metakb.version import __version__ -from typing import Dict, Optional app = FastAPI( - docs_url='/api/v2', - openapi_url='/api/v2/openapi.json', - swagger_ui_parameters={"tryItOutEnabled": True} + docs_url="/api/v2", + openapi_url="/api/v2/openapi.json", + swagger_ui_parameters={"tryItOutEnabled": True}, ) query = QueryHandler() @@ -21,15 +23,15 @@ def custom_openapi() -> Dict: title="The VICC Meta-Knowledgebase", version=__version__, description="A search interface for cancer variant interpretations" - " assembled by aggregating and harmonizing across multiple" - " cancer variant interpretation knowledgebases.", - routes=app.routes + " assembled by aggregating and harmonizing across multiple" + " cancer variant interpretation knowledgebases.", + routes=app.routes, ) - openapi_schema['info']['contact'] = { + openapi_schema["info"]["contact"] = { "name": "VICC", "email": "help@cancervariants.org", - "url": "https://cancervariants.org" + "url": "https://cancervariants.org", } app.openapi_schema = openapi_schema return app.openapi_schema @@ -44,24 +46,26 @@ def custom_openapi() -> Dict: "`variation` and `therapy` are provided, will return all studies that have both " "the provided `variation` and `therapy`." ) -v_description = ("Variation (subject) to search. Can be free text or VRS Variation ID.") +v_description = "Variation (subject) to search. Can be free text or VRS Variation ID." d_description = "Disease (object qualifier) to search" t_description = "Therapy (object) to search" g_description = "Gene to search" -s_description = ("Study ID to search.") +s_description = "Study ID to search." search_study_response_descr = "A response to a validly-formed query." -@app.get('/api/v2/search/studies', - summary=search_studies_summary, - response_description=search_study_response_descr, - description=search_studies_descr,) +@app.get( + "/api/v2/search/studies", + summary=search_studies_summary, + response_description=search_study_response_descr, + description=search_studies_descr, +) async def get_studies( variation: Optional[str] = Query(None, description=v_description), disease: Optional[str] = Query(None, description=d_description), therapy: Optional[str] = Query(None, description=t_description), gene: Optional[str] = Query(None, description=g_description), - study_id: Optional[str] = Query(None, description=s_description) + study_id: Optional[str] = Query(None, description=s_description), ) -> dict: """Get nested studies from queried concepts that match all conditions provided. For example, if `variation` and `therapy` are provided, will return all studies diff --git a/src/metakb/normalizers.py b/src/metakb/normalizers.py index ad242ec6..4b9c4ce2 100644 --- a/src/metakb/normalizers.py +++ b/src/metakb/normalizers.py @@ -1,19 +1,20 @@ """Module for VICC normalizers.""" -from typing import Optional, Tuple, List +import logging +from typing import List, Optional, Tuple -from ga4gh.core import core_models -from ga4gh.vrs import models -from variation.query import QueryHandler as VariationQueryHandler -from therapy.query import QueryHandler as TherapyQueryHandler -from therapy.database import create_db as create_therapy_db -from therapy.schemas import NormalizationService as NormalizedTherapy, ApprovalRating from disease.database import create_db as create_disease_db from disease.query import QueryHandler as DiseaseQueryHandler from disease.schemas import NormalizationService as NormalizedDisease +from ga4gh.core import core_models +from ga4gh.vrs import models from gene.database import create_db as create_gene_db from gene.query import QueryHandler as GeneQueryHandler from gene.schemas import NormalizeService as NormalizedGene -import logging +from therapy.database import create_db as create_therapy_db +from therapy.query import QueryHandler as TherapyQueryHandler +from therapy.schemas import ApprovalRating +from therapy.schemas import NormalizationService as NormalizedTherapy +from variation.query import QueryHandler as VariationQueryHandler logger = logging.getLogger(__name__) @@ -30,8 +31,9 @@ def __init__(self) -> None: self.disease_query_handler = DiseaseQueryHandler(create_disease_db()) self.therapy_query_handler = TherapyQueryHandler(create_therapy_db()) - async def normalize_variation(self, - queries: List[str]) -> Optional[models.Variation]: + async def normalize_variation( + self, queries: List[str] + ) -> Optional[models.Variation]: """Normalize variation queries. :param List[str] queries: Possible query strings to try to normalize @@ -42,16 +44,22 @@ async def normalize_variation(self, if not query: continue try: - variation_norm_resp = await self.variation_normalizer.normalize_handler.normalize(query) # noqa: E501 + variation_norm_resp = ( + await self.variation_normalizer.normalize_handler.normalize(query) + ) if variation_norm_resp and variation_norm_resp.variation: return variation_norm_resp.variation - except Exception as e: # noqa: E722 - logger.warning(f"Variation Normalizer raised an exception using query" - f" {query}: {e}") + except Exception as e: + logger.warning( + "Variation Normalizer raised an exception using query %s: %s", + query, + e, + ) return None - def normalize_gene(self, queries)\ - -> Tuple[Optional[NormalizedGene], Optional[str]]: + def normalize_gene( + self, queries: List[str] + ) -> Tuple[Optional[NormalizedGene], Optional[str]]: """Normalize gene queries :param list queries: Gene queries to normalize @@ -67,8 +75,11 @@ def normalize_gene(self, queries)\ try: gene_norm_resp = self.gene_query_handler.normalize(query_str) except Exception as e: - logger.warning(f"Gene Normalizer raised an exception using query " - f"{query_str}: {e}") + logger.warning( + "Gene Normalizer raised an exception using query %s: %s", + query_str, + e, + ) else: if gene_norm_resp.match_type > highest_match: highest_match = gene_norm_resp.match_type @@ -77,8 +88,9 @@ def normalize_gene(self, queries)\ break return gene_norm_resp, normalized_gene_id - def normalize_disease(self, queries)\ - -> Tuple[Optional[NormalizedDisease], Optional[str]]: + def normalize_disease( + self, queries: List[str] + ) -> Tuple[Optional[NormalizedDisease], Optional[str]]: """Normalize disease queries :param list queries: Disease queries to normalize @@ -95,8 +107,11 @@ def normalize_disease(self, queries)\ try: disease_norm_resp = self.disease_query_handler.normalize(query) except Exception as e: - logger.warning(f"Disease Normalizer raised an exception using query " - f"{query}: {e}") + logger.warning( + "Disease Normalizer raised an exception using query %s: %s", + query, + e, + ) else: if disease_norm_resp.match_type > highest_match: highest_match = disease_norm_resp.match_type @@ -105,8 +120,9 @@ def normalize_disease(self, queries)\ break return disease_norm_resp, normalized_disease_id - def normalize_therapy(self, queries)\ - -> Tuple[Optional[NormalizedTherapy], Optional[str]]: + def normalize_therapy( + self, queries: List[str] + ) -> Tuple[Optional[NormalizedTherapy], Optional[str]]: """Normalize therapy queries :param list queries: Therapy queries to normalize @@ -123,8 +139,11 @@ def normalize_therapy(self, queries)\ try: therapy_norm_resp = self.therapy_query_handler.normalize(query) except Exception as e: - logger.warning(f"Therapy Normalizer raised an exception using " - f"query {query}: {e}") + logger.warning( + "Therapy Normalizer raised an exception using query %s: %s", + query, + e, + ) else: if therapy_norm_resp.match_type > highest_match: highest_match = therapy_norm_resp.match_type @@ -135,7 +154,7 @@ def normalize_therapy(self, queries)\ @staticmethod def get_regulatory_approval_extension( - therapy_norm_resp: NormalizedTherapy + therapy_norm_resp: NormalizedTherapy, ) -> Optional[core_models.Extension]: """Given therapy normalization service response, extract out the regulatory approval extension @@ -145,7 +164,12 @@ def get_regulatory_approval_extension( data if it `regulatory_approval` extensions exists in therapy normalizer """ regulatory_approval_extension = None - tn_resp_exts = therapy_norm_resp.model_dump().get("therapeutic_agent", {}).get("extensions") or [] # noqa: E501 + tn_resp_exts = ( + therapy_norm_resp.model_dump() + .get("therapeutic_agent", {}) + .get("extensions") + or [] + ) tn_ext = [v for v in tn_resp_exts if v["name"] == "regulatory_approval"] if tn_ext: @@ -153,17 +177,21 @@ def get_regulatory_approval_extension( approval_ratings = ext_value.get("approval_ratings", []) matched_ext_value = None - if any(ar in {ApprovalRating.FDA_PRESCRIPTION, ApprovalRating.FDA_OTC} - for ar in approval_ratings): - if ApprovalRating.FDA_DISCONTINUED not in approval_ratings or \ - ApprovalRating.CHEMBL_4 in approval_ratings: # noqa: E125 + if any( + ar in {ApprovalRating.FDA_PRESCRIPTION, ApprovalRating.FDA_OTC} + for ar in approval_ratings + ): + if ( + ApprovalRating.FDA_DISCONTINUED not in approval_ratings + or ApprovalRating.CHEMBL_4 in approval_ratings + ): matched_ext_value = "FDA" elif ApprovalRating.CHEMBL_4 in approval_ratings: matched_ext_value = "chembl_phase_4" if matched_ext_value: has_indications = ext_value.get("has_indication", []) - matched_indications = list() + matched_indications = [] for indication in has_indications: indication_exts = indication.get("extensions", []) @@ -183,8 +211,11 @@ def get_regulatory_approval_extension( regulatory_approval_extension = core_models.Extension( name="regulatory_approval", value={ - "approval_rating": "FDA" if matched_ext_value == "FDA" else "ChEMBL", # noqa: E501 - "has_indications": matched_indications - }) + "approval_rating": "FDA" + if matched_ext_value == "FDA" + else "ChEMBL", + "has_indications": matched_indications, + }, + ) return regulatory_approval_extension diff --git a/src/metakb/query.py b/src/metakb/query.py index 945803ab..2f24d1dd 100644 --- a/src/metakb/query.py +++ b/src/metakb/query.py @@ -1,13 +1,12 @@ """Module for queries.""" -from copy import copy -from enum import Enum import json import logging +from copy import copy +from enum import Enum from typing import Dict, List, Optional, Tuple from ga4gh.core import core_models from ga4gh.vrs import models -from metakb.schemas.app import SourceName from neo4j import Transaction from neo4j.graph import Node from pydantic import ValidationError @@ -16,6 +15,7 @@ from metakb.normalizers import ViccNormalizers from metakb.schemas.annotation import Document, Method from metakb.schemas.api import SearchStudiesService, ServiceMeta +from metakb.schemas.app import SourceName from metakb.schemas.categorical_variation import CategoricalVariation from metakb.schemas.variation_statement import ( VariantTherapeuticResponseStudy, @@ -60,22 +60,30 @@ def _update_mappings(params: Dict) -> None: class QueryHandler: """Class for handling queries.""" - def __init__(self, uri: str = "", - creds: Tuple[str, str] = ("", ""), - normalizers: ViccNormalizers = ViccNormalizers()) -> None: + def __init__( + self, + uri: str = "", + creds: Tuple[str, str] = ("", ""), + normalizers: Optional[ViccNormalizers] = None, + ) -> None: """Initialize neo4j driver and the VICC normalizers. :param str uri: address of Neo4j DB :param Tuple[str, str] credentials: tuple containing username and password :param ViccNormalizers normalizers: normalizer collection instance """ + if normalizers is None: + normalizers = ViccNormalizers() self.driver = Graph(uri, creds).driver self.vicc_normalizers = normalizers async def search_studies( - self, variation: Optional[str] = None, disease: Optional[str] = None, - therapy: Optional[str] = None, gene: Optional[str] = None, - study_id: Optional[str] = None + self, + variation: Optional[str] = None, + disease: Optional[str] = None, + therapy: Optional[str] = None, + gene: Optional[str] = None, + study_id: Optional[str] = None, ) -> SearchStudiesService: """Get nested studies from queried concepts that match all conditions provided. For example, if `variation` and `therapy` are provided, will return all studies @@ -95,23 +103,29 @@ async def search_studies( "disease": None, "therapy": None, "gene": None, - "study_id": None + "study_id": None, }, "warnings": [], "study_ids": [], "studies": [], - "service_meta_": ServiceMeta() + "service_meta_": ServiceMeta(), } normalized_terms = await self._get_normalized_terms( - variation, disease, therapy, gene, study_id, response) + variation, disease, therapy, gene, study_id, response + ) if normalized_terms is None: return SearchStudiesService(**response) - (normalized_variation, normalized_disease, - normalized_therapy, normalized_gene, study, - valid_study_id) = normalized_terms + ( + normalized_variation, + normalized_disease, + normalized_therapy, + normalized_gene, + study, + valid_study_id, + ) = normalized_terms with self.driver.session() as session: if valid_study_id: @@ -123,7 +137,7 @@ async def search_studies( normalized_variation=normalized_variation, normalized_therapy=normalized_therapy, normalized_disease=normalized_disease, - normalized_gene=normalized_gene + normalized_gene=normalized_gene, ) response["study_ids"] = [s["id"] for s in study_nodes] @@ -137,9 +151,13 @@ async def search_studies( return SearchStudiesService(**response) async def _get_normalized_terms( - self, variation: Optional[str], disease: Optional[str], - therapy: Optional[str], gene: Optional[str], - study_id: Optional[str], response: Dict + self, + variation: Optional[str], + disease: Optional[str], + therapy: Optional[str], + gene: Optional[str], + study_id: Optional[str], + response: Dict, ) -> Optional[Tuple]: """Find normalized terms for queried concepts. @@ -158,20 +176,23 @@ async def _get_normalized_terms( # Find normalized terms using VICC normalizers if therapy: response["query"]["therapy"] = therapy - normalized_therapy = \ - self._get_normalized_therapy(therapy.strip(), response["warnings"]) + normalized_therapy = self._get_normalized_therapy( + therapy.strip(), response["warnings"] + ) else: normalized_therapy = None if disease: response["query"]["disease"] = disease - normalized_disease = \ - self._get_normalized_disease(disease.strip(), response["warnings"]) + normalized_disease = self._get_normalized_disease( + disease.strip(), response["warnings"] + ) else: normalized_disease = None if variation: response["query"]["variation"] = variation - normalized_variation = \ - await self._get_normalized_variation(variation, response["warnings"]) + normalized_variation = await self._get_normalized_variation( + variation, response["warnings"] + ) else: normalized_variation = None if gene: @@ -190,62 +211,67 @@ async def _get_normalized_terms( if study: valid_study_id = study.get("id") else: - response["warnings"].append( - f"Study: {study_id} does not exist.") + response["warnings"].append(f"Study: {study_id} does not exist.") # If queried concept is given check that it is normalized / valid - if (variation and not normalized_variation) or \ - (therapy and not normalized_therapy) or \ - (disease and not normalized_disease) or \ - (gene and not normalized_gene) or \ - (study_id and not valid_study_id): + if ( + (variation and not normalized_variation) + or (therapy and not normalized_therapy) + or (disease and not normalized_disease) + or (gene and not normalized_gene) + or (study_id and not valid_study_id) + ): return None - return (normalized_variation, normalized_disease, normalized_therapy, - normalized_gene, study, valid_study_id) + return ( + normalized_variation, + normalized_disease, + normalized_therapy, + normalized_gene, + study, + valid_study_id, + ) - def _get_normalized_therapy(self, therapy: str, - warnings: List[str]) -> Optional[str]: + def _get_normalized_therapy( + self, therapy: str, warnings: List[str] + ) -> Optional[str]: """Get normalized therapy concept. :param therapy: Therapy query :param warnings: A list of warnings for the search query :return: A normalized therapy concept if it exists """ - _, normalized_therapy_id = \ - self.vicc_normalizers.normalize_therapy([therapy]) + _, normalized_therapy_id = self.vicc_normalizers.normalize_therapy([therapy]) if not normalized_therapy_id: - warnings.append(f"Therapy Normalizer unable to normalize: " - f"{therapy}") + warnings.append(f"Therapy Normalizer unable to normalize: " f"{therapy}") return normalized_therapy_id - def _get_normalized_disease(self, disease: str, - warnings: List[str]) -> Optional[str]: + def _get_normalized_disease( + self, disease: str, warnings: List[str] + ) -> Optional[str]: """Get normalized disease concept. :param disease: Disease query :param warnings: A list of warnings for the search query :return: A normalized disease concept if it exists """ - _, normalized_disease_id = \ - self.vicc_normalizers.normalize_disease([disease]) + _, normalized_disease_id = self.vicc_normalizers.normalize_disease([disease]) if not normalized_disease_id: - warnings.append(f"Disease Normalizer unable to normalize: " - f"{disease}") + warnings.append(f"Disease Normalizer unable to normalize: " f"{disease}") return normalized_disease_id - async def _get_normalized_variation(self, variation: str, - warnings: List[str]) -> Optional[str]: + async def _get_normalized_variation( + self, variation: str, warnings: List[str] + ) -> Optional[str]: """Get normalized variation concept. :param variation: Variation query :param warnings: A list of warnings for the search query :return: A normalized variant concept if it exists """ - variant_norm_resp = \ - await self.vicc_normalizers.normalize_variation([variation]) + variant_norm_resp = await self.vicc_normalizers.normalize_variation([variation]) normalized_variation = variant_norm_resp.id if variant_norm_resp else None if not normalized_variation: @@ -253,8 +279,9 @@ async def _get_normalized_variation(self, variation: str, if variation.startswith(("ga4gh:VA.", "ga4gh:CX.", "ga4gh:CN.")): normalized_variation = variation else: - warnings.append(f"Variation Normalizer unable to normalize: " - f"{variation}") + warnings.append( + f"Variation Normalizer unable to normalize: " f"{variation}" + ) return normalized_variation def _get_normalized_gene(self, gene: str, warnings: List[str]) -> Optional[str]: @@ -290,7 +317,7 @@ def _get_related_studies( normalized_variation: Optional[str] = None, normalized_therapy: Optional[str] = None, normalized_disease: Optional[str] = None, - normalized_gene: Optional[str] = None + normalized_gene: Optional[str] = None, ) -> List[Node]: """Get studies that contain queried normalized concepts. @@ -340,9 +367,7 @@ def _get_related_studies( return [s[0] for s in tx.run(query, **params)] def _get_nested_studies( - self, - tx: Transaction, - study_nodes: List[Node] + self, tx: Transaction, study_nodes: List[Node] ) -> List[Dict]: """Get a list of nested studies. @@ -382,7 +407,7 @@ def _get_nested_study(self, tx: Transaction, s: Node) -> Dict: "variant": None, "strength": None, "isReportedIn": [], - "specifiedBy": None + "specifiedBy": None, } params.update(s) study_id = s["id"] @@ -431,8 +456,7 @@ def _get_disease(node: Dict) -> core_models.Disease: _update_mappings(node) node["extensions"] = [ core_models.Extension( - name="disease_normalizer_id", - value=node["disease_normalizer_id"] + name="disease_normalizer_id", value=node["disease_normalizer_id"] ) ] return core_models.Disease(**node) @@ -451,7 +475,7 @@ def _get_cat_var(self, tx: Transaction, node: Dict) -> CategoricalVariation: ("moa_representative_coordinate", "MOA representative coordinate"), ("civic_representative_coordinate", "CIViC representative coordinate"), ("civic_molecular_profile_score", "CIViC Molecular Profile Score"), - ("variant_types", "Variant types") + ("variant_types", "Variant types"), ): node_val = node.get(node_key) if node_val: @@ -459,12 +483,7 @@ def _get_cat_var(self, tx: Transaction, node: Dict) -> CategoricalVariation: ext_val = json.loads(node_val) except TypeError: ext_val = node_val - extensions.append( - core_models.Extension( - name=ext_name, - value=ext_val - ) - ) + extensions.append(core_models.Extension(name=ext_name, value=ext_val)) if node_key.startswith(SourceName.MOA.value): # Cant be civic break @@ -480,9 +499,7 @@ def _get_cat_var(self, tx: Transaction, node: Dict) -> CategoricalVariation: @staticmethod def _get_variations( - tx: Transaction, - cv_id: str, - relation: VariationRelation + tx: Transaction, cv_id: str, relation: VariationRelation ) -> List[Dict]: """Get list of variations associated to categorical variation @@ -512,24 +529,21 @@ def _get_variations( syntax = variation_k.split("expression_")[-1].replace("_", ".") for hgvs_expr in variation_v: expressions.append( - models.Expression( - syntax=syntax, - value=hgvs_expr - ) + models.Expression(syntax=syntax, value=hgvs_expr) ) v_params["expressions"] = expressions or None loc_params = r_params["loc"] v_params["location"] = loc_params - v_params["location"]["sequenceReference"] = json.loads(loc_params["sequence_reference"]) # noqa: E501 + v_params["location"]["sequenceReference"] = json.loads( + loc_params["sequence_reference"] + ) variations.append(models.Variation(**v_params).model_dump()) return variations @staticmethod def _get_variant_onco_study_qualifier( - tx: Transaction, - study_id: str, - allele_origin: Optional[str] + tx: Transaction, study_id: str, allele_origin: Optional[str] ) -> _VariantOncogenicityStudyQualifier: """Get variant oncogenicity study qualifier data for a study @@ -551,14 +565,12 @@ def _get_variant_onco_study_qualifier( gene_params["extensions"] = [ core_models.Extension( - name="gene_normalizer_id", - value=gene_params["gene_normalizer_id"] + name="gene_normalizer_id", value=gene_params["gene_normalizer_id"] ) ] return _VariantOncogenicityStudyQualifier( - alleleOrigin=allele_origin, - geneContext=core_models.Gene(**gene_params) + alleleOrigin=allele_origin, geneContext=core_models.Gene(**gene_params) ) @staticmethod @@ -592,10 +604,7 @@ def _get_document(node: Dict) -> Document: source_type = node.get("source_type") if source_type: node["extensions"] = [ - core_models.Extension( - name="source_type", - value=source_type - ) + core_models.Extension(name="source_type", value=source_type) ] return Document(**node) @@ -618,19 +627,23 @@ def _get_therapeutic_procedure( node["extensions"] = [ core_models.Extension( name="civic_therapy_interaction_type", - value=civic_therapy_interaction_type + value=civic_therapy_interaction_type, ) ] if node_type == "CombinationTherapy": node["components"] = self._get_therapeutic_agents( - tx, node["id"], TherapeuticProcedureType.COMBINATION, - TherapeuticRelation.HAS_COMPONENTS + tx, + node["id"], + TherapeuticProcedureType.COMBINATION, + TherapeuticRelation.HAS_COMPONENTS, ) else: node["substitutes"] = self._get_therapeutic_agents( - tx, node["id"], TherapeuticProcedureType.SUBSTITUTES, - TherapeuticRelation.HAS_SUBSTITUTES + tx, + node["id"], + TherapeuticProcedureType.SUBSTITUTES, + TherapeuticRelation.HAS_SUBSTITUTES, ) therapeutic = core_models.TherapeuticProcedure(**node) @@ -647,7 +660,7 @@ def _get_therapeutic_agents( tx: Transaction, tp_id: str, tp_type: TherapeuticProcedureType, - tp_relation: TherapeuticRelation + tp_relation: TherapeuticRelation, ) -> List[core_models.TherapeuticAgent]: """Get list of therapeutic agents for therapeutic combination or substitutes group @@ -684,8 +697,7 @@ def _get_therapeutic_agent(in_ta_params: Dict) -> core_models.TherapeuticAgent: _update_mappings(ta_params) extensions = [ core_models.Extension( - name="therapy_normalizer_id", - value=ta_params["therapy_normalizer_id"] + name="therapy_normalizer_id", value=ta_params["therapy_normalizer_id"] ) ] regulatory_approval = ta_params.get("regulatory_approval") @@ -693,8 +705,7 @@ def _get_therapeutic_agent(in_ta_params: Dict) -> core_models.TherapeuticAgent: regulatory_approval = json.loads(regulatory_approval) extensions.append( core_models.Extension( - name="regulatory_approval", - value=regulatory_approval + name="regulatory_approval", value=regulatory_approval ) ) diff --git a/src/metakb/schemas/annotation.py b/src/metakb/schemas/annotation.py index 798b8b26..4de940d8 100644 --- a/src/metakb/schemas/annotation.py +++ b/src/metakb/schemas/annotation.py @@ -1,5 +1,5 @@ """Module containing GK pilot annotation definitions""" -from datetime import datetime +import datetime from enum import StrEnum from typing import Dict, List, Literal, Optional, Union @@ -28,18 +28,16 @@ class Document(core_models._MappableEntity): type: Literal["Document"] = "Document" title: Optional[StrictStr] = Field(None, description="The title of the Document") - url: Optional[constr(pattern=r"^(https?|s?ftp)://")] = Field( # noqa: F722 + url: Optional[constr(pattern=r"^(https?|s?ftp)://")] = Field( None, description="A URL at which the document may be retrieved." ) - doi: Optional[ - constr(pattern=r"^10.(\d+)(\.\d+)*\/[\w\-\.]+") # noqa: F722 - ] = Field( + doi: Optional[constr(pattern=r"^10.(\d+)(\.\d+)*\/[\w\-\.]+")] = Field( None, - description="A `Digital Object Identifier _` for the document.", # noqa: E501 + description="A `Digital Object Identifier _` for the document.", ) pmid: Optional[StrictInt] = Field( None, - description="A `PubMed unique identifier `_.", # noqa: E501 + description="A `PubMed unique identifier `_.", ) @@ -54,7 +52,7 @@ class Method(core_models._Entity): ) subtype: Optional[core_models.Coding] = Field( None, - description="A more specific type of entity the method represents (e.g. Variant Interpretation Guideline, Experimental Protocol)", # noqa: E501 + description="A more specific type of entity the method represents (e.g. Variant Interpretation Guideline, Experimental Protocol)", ) @@ -80,7 +78,7 @@ class Contribution(core_models._Entity): date: Optional[StrictStr] = None activity: Optional[core_models.Coding] = Field( None, - description="SHOULD describe a concept descending from the Contributor Role Ontology.", # noqa: E501 + description="SHOULD describe a concept descending from the Contributor Role Ontology.", ) @field_validator("date") @@ -91,10 +89,12 @@ def date_format(cls, v: Optional[str]) -> Optional[str]: valid_format = "%Y-%m-%d" try: - datetime.strptime(v, valid_format).strftime(valid_format) - except ValueError: - raise ValueError("`date` must use YYYY-MM-DD format") - + datetime.datetime.strptime(v, valid_format).replace( + tzinfo=datetime.timezone.utc + ).strftime(valid_format) + except ValueError as e: + msg = "`date` must use YYYY-MM-DD format" + raise ValueError(msg) from e return v @@ -107,7 +107,7 @@ class _InformationEntity(core_models._Entity): type: StrictStr specifiedBy: Optional[Union[Method, core_models.IRI]] = Field( None, - description="A `Method` that describes all or part of the process through which the information was generated.", # noqa: E501 + description="A `Method` that describes all or part of the process through which the information was generated.", ) contributions: Optional[List[Contribution]] = None isReportedIn: Optional[List[Union[Document, core_models.IRI]]] = Field( @@ -124,7 +124,7 @@ class DataItem(_InformationEntity): type: Literal["DataItem"] = Field("DataItem", description="MUST be 'DataItem'.") subtype: Optional[core_models.Coding] = Field( None, - description="A specific type of data the DataItem object represents (e.g. a specimen count, a patient weight, an allele frequency, a p-value, a confidence score)", # noqa: E501 + description="A specific type of data the DataItem object represents (e.g. a specimen count, a patient weight, an allele frequency, a p-value, a confidence score)", ) value: StrictStr unit: Optional[core_models.Coding] = None @@ -143,7 +143,7 @@ class _StatementBase(_InformationEntity): ) strength: Optional[Union[core_models.Coding, core_models.IRI]] = Field( None, - description="The overall strength of support for the Statement based on all evidence assessed.", # noqa: E501 + description="The overall strength of support for the Statement based on all evidence assessed.", ) diff --git a/src/metakb/schemas/api.py b/src/metakb/schemas/api.py index f80ea0fd..762b6b0c 100644 --- a/src/metakb/schemas/api.py +++ b/src/metakb/schemas/api.py @@ -21,7 +21,7 @@ class ServiceMeta(BaseModel): "example": { "name": "metakb", "version": __version__, - "url": "https://github.com/cancervariants/metakb" + "url": "https://github.com/cancervariants/metakb", } } ) diff --git a/src/metakb/schemas/categorical_variation.py b/src/metakb/schemas/categorical_variation.py index dabd6a35..b69ce045 100644 --- a/src/metakb/schemas/categorical_variation.py +++ b/src/metakb/schemas/categorical_variation.py @@ -28,7 +28,7 @@ class _CategoricalVariationBase(core_models._DomainEntity): members: Optional[List[Union[models.Variation, core_models.IRI]]] = Field( None, - description="A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", # noqa: E501 + description="A non-exhaustive list of VRS variation contexts that satisfy the constraints of this categorical variant.", ) @@ -48,7 +48,7 @@ class ProteinSequenceConsequence(_CategoricalVariationBase): ) definingContext: Union[models.Allele, core_models.IRI] = Field( ..., - description="The `VRS Allele `_ object that is congruent with (projects to the same codons) as alleles on other protein reference sequences.", # noqa: E501 + description="The `VRS Allele `_ object that is congruent with (projects to the same codons) as alleles on other protein reference sequences.", ) @@ -66,7 +66,7 @@ class CanonicalAllele(_CategoricalVariationBase): ) definingContext: Union[models.Allele, core_models.IRI] = Field( ..., - description="The `VRS Allele `_ object that is congruent with variants on alternate reference sequences.", # noqa: E501 + description="The `VRS Allele `_ object that is congruent with variants on alternate reference sequences.", ) @@ -85,15 +85,15 @@ class CategoricalCnv(_CategoricalVariationBase): ) location: models.Location = Field( ..., - description="A `VRS Location `_ object that represents a sequence derived from that location, and is congruent with locations on alternate reference sequences.", # noqa: E501 + description="A `VRS Location `_ object that represents a sequence derived from that location, and is congruent with locations on alternate reference sequences.", ) locationMatchCharacteristic: Optional[LocationMatchCharacteristic] = Field( None, - description="The characteristics of a valid match between a contextual CNV location (the query) and the Categorical CNV location (the domain), when both query and domain are represented on the same reference sequence. An `exact` match requires the location of the query and domain to be identical. A `subinterval` match requires the query to be a subinterval of the domain. A `superinterval` match requires the query to be a superinterval of the domain. A `partial` match requires at least 1 residue of overlap between the query and domain.", # noqa: E501 + description="The characteristics of a valid match between a contextual CNV location (the query) and the Categorical CNV location (the domain), when both query and domain are represented on the same reference sequence. An `exact` match requires the location of the query and domain to be identical. A `subinterval` match requires the query to be a subinterval of the domain. A `superinterval` match requires the query to be a superinterval of the domain. A `partial` match requires at least 1 residue of overlap between the query and domain.", ) copyChange: Optional[models.CopyChange] = Field( None, - description="A representation of the change in copies of a sequence in a system. MUST be one of 'efo:0030069' (complete genomic loss), 'efo:0020073' (high-level loss), 'efo:0030068' (low-level loss), 'efo:0030067' (loss), 'efo:0030064' (regional base ploidy), 'efo:0030070' (gain), 'efo:0030071' (low-level gain), 'efo:0030072' (high-level gain).", # noqa: E501 + description="A representation of the change in copies of a sequence in a system. MUST be one of 'efo:0030069' (complete genomic loss), 'efo:0020073' (high-level loss), 'efo:0030068' (low-level loss), 'efo:0030067' (loss), 'efo:0030064' (regional base ploidy), 'efo:0030070' (gain), 'efo:0030071' (low-level gain), 'efo:0030072' (high-level gain).", ) copies: Optional[Union[int, models.Range]] = Field( None, description="The integral number of copies of the subject in a system." @@ -113,11 +113,11 @@ class DescribedVariation(_CategoricalVariationBase): ) label: StrictStr = Field( ..., - description="A primary label for the categorical variation. This required property should provide a short and descriptive textual representation of the concept.", # noqa: E501 + description="A primary label for the categorical variation. This required property should provide a short and descriptive textual representation of the concept.", ) description: Optional[StrictStr] = Field( None, - description="A textual description of the domain of variation that should match the categorical variation entity.", # noqa: E501 + description="A textual description of the domain of variation that should match the categorical variation entity.", ) @@ -131,7 +131,7 @@ class CategoricalVariation(RootModel): ] = Field( ..., json_schema_extra={ - "description": "A representation of a categorically-defined domain for variation, in which individual contextual variation instances may be members of the domain.", # noqa: E501 + "description": "A representation of a categorically-defined domain for variation, in which individual contextual variation instances may be members of the domain.", }, discriminator="type", ) diff --git a/src/metakb/schemas/variation_statement.py b/src/metakb/schemas/variation_statement.py index 9ee37f68..262e3f9c 100644 --- a/src/metakb/schemas/variation_statement.py +++ b/src/metakb/schemas/variation_statement.py @@ -1,6 +1,6 @@ """Module containing variant statement definitions""" from enum import StrEnum -from typing import Literal, Optional, Union, List +from typing import List, Literal, Optional, Union from ga4gh.core import core_models from ga4gh.vrs import models @@ -78,7 +78,7 @@ class _VariantClassification(_VariantStatement): classification: Union[core_models.Coding, core_models.IRI] = Field( ..., - description="A methodological, summary classification about the impact of a variant.", # noqa: E501 + description="A methodological, summary classification about the impact of a variant.", ) @@ -87,11 +87,11 @@ class VariantPathogenicityQualifier(BaseModel): penetrance: Optional[Penetrance] = Field( None, - description="The extent to which the variant impact is expressed by individuals carrying it as a measure of the proportion of carriers exhibiting the condition.", # noqa: E501 + description="The extent to which the variant impact is expressed by individuals carrying it as a measure of the proportion of carriers exhibiting the condition.", ) modeOfInheritance: Optional[ModeOfInheritance] = Field( None, - description="The pattern of inheritance expected for the pathogenic effect of this variant.", # noqa: E501 + description="The pattern of inheritance expected for the pathogenic effect of this variant.", ) geneContext: Optional[core_models.Gene] = Field( None, description="A gene context that qualifies the Statement." @@ -134,11 +134,11 @@ class _VariantOncogenicityStudyQualifier(BaseModel): alleleOrigin: Optional[AlleleOrigin] = Field( None, - description="Whether the statement should be interpreted in the context of an inherited (germline) variant, an acquired (somatic) mutation, or both (combined).", # noqa: E501 + description="Whether the statement should be interpreted in the context of an inherited (germline) variant, an acquired (somatic) mutation, or both (combined).", ) allelePrevalence: Optional[AllelePrevalence] = Field( None, - description="Whether the statement should be interpreted in the context of the variant being rare or common.", # noqa: E501 + description="Whether the statement should be interpreted in the context of the variant being rare or common.", ) geneContext: Optional[core_models.Gene] = Field( None, description="A gene context that qualifies the Statement." @@ -175,7 +175,7 @@ class VariantTherapeuticResponseStudy(_VariantStudySummary): # extends object therapeutic: Union[core_models.TherapeuticProcedure, core_models.IRI] = Field( ..., - description="A drug administration or other therapeutic procedure that the neoplasm is intended to respond to.", # noqa: E501 + description="A drug administration or other therapeutic procedure that the neoplasm is intended to respond to.", ) tumorType: Union[core_models.Condition, core_models.IRI] = Field( ..., diff --git a/src/metakb/transform/base.py b/src/metakb/transform/base.py index a4503b06..b0bf5400 100644 --- a/src/metakb/transform/base.py +++ b/src/metakb/transform/base.py @@ -1,23 +1,25 @@ """A module for the Transform base class.""" -from abc import abstractmethod -from typing import Dict, Optional, List, Set, Union +import datetime import json import logging -from pathlib import Path -from datetime import datetime as dt +from abc import abstractmethod from enum import StrEnum +from pathlib import Path +from typing import ClassVar, Dict, List, Optional, Set, Union +from disease.schemas import ( + NamespacePrefix as DiseaseNamespacePrefix, +) from disease.schemas import ( NormalizationService as NormalizedDisease, - NamespacePrefix as DiseaseNamespacePrefix ) from ga4gh.core import core_models, sha512t24u from pydantic import BaseModel, StrictStr, ValidationError from therapy.schemas import NormalizationService as NormalizedTherapy from metakb import APP_ROOT, DATE_FMT -from metakb.schemas.annotation import Method, Document from metakb.normalizers import ViccNormalizers +from metakb.schemas.annotation import Document, Method logger = logging.getLogger(__name__) @@ -79,114 +81,127 @@ class ViccConceptVocab(BaseModel): class Transform: """A base class for transforming harvester data.""" - _methods: List[Method] = [ + _methods: ClassVar[List[Method]] = [ Method( id=MethodId.CIVIC_EID_SOP, label="CIViC Curation SOP (2019)", isReportedIn=Document( label="Danos et al., 2019, Genome Med.", - title="Standard operating procedure for curation and clinical interpretation of variants in cancer", # noqa: E501 + title="Standard operating procedure for curation and clinical interpretation of variants in cancer", doi="10.1186/s13073-019-0687-x", - pmid=31779674 - ) + pmid=31779674, + ), ).model_dump(exclude_none=True), Method( id=MethodId.MOA_ASSERTION_BIORXIV, label="MOAlmanac (2021)", isReportedIn=Document( label="Reardon, B., Moore, N.D., Moore, N.S. et al.", - title="Integrating molecular profiles into clinical frameworks through the Molecular Oncology Almanac to prospectively guide precision oncology", # noqa: E501 + title="Integrating molecular profiles into clinical frameworks through the Molecular Oncology Almanac to prospectively guide precision oncology", doi="10.1038/s43018-021-00243-3", - pmid=35121878 - ) + pmid=35121878, + ), ).model_dump(exclude_none=True), ] - methods_mapping = {m["id"]: m for m in _methods} + methods_mapping: ClassVar[Dict] = {m["id"]: m for m in _methods} - _vicc_concept_vocabs: List[ViccConceptVocab] = [ + _vicc_concept_vocabs: ClassVar[List[ViccConceptVocab]] = [ ViccConceptVocab( id="vicc:e000000", domain="EvidenceStrength", term="evidence", parents=[], exact_mappings={EcoLevel.EVIDENCE}, - definition="A type of information that is used to support statements."), + definition="A type of information that is used to support statements.", + ), ViccConceptVocab( id="vicc:e000001", domain="EvidenceStrength", term="authoritative evidence", parents=["vicc:e000000"], exact_mappings={CivicEvidenceLevel.A}, - definition="Evidence derived from an authoritative source describing a proven or consensus statement."), # noqa: E501 + definition="Evidence derived from an authoritative source describing a proven or consensus statement.", + ), ViccConceptVocab( id="vicc:e000002", domain="EvidenceStrength", term="FDA recognized evidence", parents=["vicc:e000001"], exact_mappings={MoaEvidenceLevel.FDA_APPROVED}, - definition="Evidence derived from statements recognized by the US Food and Drug Administration."), # noqa: E501 + definition="Evidence derived from statements recognized by the US Food and Drug Administration.", + ), ViccConceptVocab( id="vicc:e000003", domain="EvidenceStrength", term="professional guideline evidence", parents=["vicc:e000001"], exact_mappings={MoaEvidenceLevel.GUIDELINE}, - definition="Evidence derived from statements by professional society guidelines"), # noqa: E501 + definition="Evidence derived from statements by professional society guidelines", + ), ViccConceptVocab( id="vicc:e000004", domain="EvidenceStrength", term="clinical evidence", parents=["vicc:e000000"], exact_mappings={EcoLevel.CLINICAL_STUDY_EVIDENCE}, - definition="Evidence derived from clinical research studies"), + definition="Evidence derived from clinical research studies", + ), ViccConceptVocab( id="vicc:e000005", domain="EvidenceStrength", term="clinical cohort evidence", parents=["vicc:e000004"], exact_mappings={CivicEvidenceLevel.B}, - definition="Evidence derived from the clinical study of a participant cohort"), # noqa: E501 + definition="Evidence derived from the clinical study of a participant cohort", + ), ViccConceptVocab( id="vicc:e000006", domain="EvidenceStrength", term="interventional study evidence", parents=["vicc:e000005"], exact_mappings={MoaEvidenceLevel.CLINICAL_TRIAL}, - definition="Evidence derived from interventional studies of clinical cohorts (clinical trials)"), # noqa: E501 + definition="Evidence derived from interventional studies of clinical cohorts (clinical trials)", + ), ViccConceptVocab( id="vicc:e000007", domain="EvidenceStrength", term="observational study evidence", parents=["vicc:e000005"], exact_mappings={MoaEvidenceLevel.CLINICAL_EVIDENCE}, - definition="Evidence derived from observational studies of clinical cohorts"), # noqa: E501 + definition="Evidence derived from observational studies of clinical cohorts", + ), ViccConceptVocab( id="vicc:e000008", domain="EvidenceStrength", term="case study evidence", parents=["vicc:e000004"], exact_mappings={CivicEvidenceLevel.C}, - definition="Evidence derived from clinical study of a single participant"), + definition="Evidence derived from clinical study of a single participant", + ), ViccConceptVocab( id="vicc:e000009", domain="EvidenceStrength", term="preclinical evidence", parents=["vicc:e000000"], exact_mappings={CivicEvidenceLevel.D, MoaEvidenceLevel.PRECLINICAL}, - definition="Evidence derived from the study of model organisms"), + definition="Evidence derived from the study of model organisms", + ), ViccConceptVocab( id="vicc:e000010", domain="EvidenceStrength", term="inferential evidence", parents=["vicc:e000000"], exact_mappings={CivicEvidenceLevel.E, MoaEvidenceLevel.INFERENTIAL}, - definition="Evidence derived by inference") + definition="Evidence derived by inference", + ), ] - def __init__(self, - data_dir: Path = APP_ROOT / "data", - harvester_path: Optional[Path] = None, - normalizers: Optional[ViccNormalizers] = None) -> None: + def __init__( + self, + data_dir: Path = APP_ROOT / "data", + harvester_path: Optional[Path] = None, + normalizers: Optional[ViccNormalizers] = None, + ) -> None: """Initialize Transform base class. :param Path data_dir: Path to source data directory @@ -212,15 +227,14 @@ def __init__(self, self.documents = [] # Cache for concepts that were unable to normalize. Set of source concept IDs - self.unable_to_normalize = { - "diseases": set(), - "therapeutics": set() - } + self.unable_to_normalize = {"diseases": set(), "therapeutics": set()} self.next_node_id = {} - self.evidence_level_to_vicc_concept_mapping = self._evidence_level_to_vicc_concept_mapping() # noqa: E501 + self.evidence_level_to_vicc_concept_mapping = ( + self._evidence_level_to_vicc_concept_mapping() + ) - async def transform(self, *args, **kwargs): + async def transform(self) -> None: """Transform harvested data to the Common Data Model.""" raise NotImplementedError @@ -230,21 +244,20 @@ def extract_harvester(self) -> Dict[str, List]: :return: Dict containing Lists of entries for each object type """ if self.harvester_path is None: - today = dt.strftime(dt.today(), DATE_FMT) + today = datetime.datetime.strftime( + datetime.datetime.now(tz=datetime.timezone.utc), DATE_FMT + ) default_fname = f"{self.name}_harvester_{today}.json" default_path = self.data_dir / "harvester" / default_fname if not default_path.exists(): - raise FileNotFoundError( - f"Unable to open harvest file under default filename: " - f"{default_path.absolute().as_uri()}" - ) + msg = f"Unable to open harvest file under default filename: {default_path.absolute().as_uri()}" + raise FileNotFoundError(msg) self.harvester_path = default_path else: if not self.harvester_path.exists(): - raise FileNotFoundError( - f"Unable to open harvester file: {self.harvester_path}" - ) - with open(self.harvester_path, "r") as f: + msg = f"Unable to open harvester file: {self.harvester_path}" + raise FileNotFoundError(msg) + with self.harvester_path.open() as f: return json.load(f) def _evidence_level_to_vicc_concept_mapping(self) -> Dict: @@ -259,7 +272,7 @@ def _evidence_level_to_vicc_concept_mapping(self) -> Dict: mappings[exact_mapping] = core_models.Coding( code=item.id.split(":")[-1], label=item.term, - system="https://go.osu.edu/evidence-codes" + system="https://go.osu.edu/evidence-codes", ) return mappings @@ -276,8 +289,7 @@ def _get_digest_for_str_lists(str_list: List[str]) -> str: @abstractmethod def _get_therapeutic_agent( - self, - therapy: Dict + self, therapy: Dict ) -> Optional[core_models.TherapeuticAgent]: """Get Therapeutic Agent representation for source therapy object @@ -291,7 +303,7 @@ def _get_therapeutic_substitute_group( self, therapeutic_sub_group_id: str, therapies: List[Dict], - therapy_interaction_type: str + therapy_interaction_type: str, ) -> Optional[core_models.TherapeuticSubstituteGroup]: """Get Therapeutic Substitute Group for therapies @@ -327,7 +339,7 @@ def _get_combination_therapy( ta = self._add_therapeutic_procedure( therapeutic_procedure_id, [therapy], - TherapeuticProcedureType.THERAPEUTIC_AGENT + TherapeuticProcedureType.THERAPEUTIC_AGENT, ) if not ta: return None @@ -336,22 +348,22 @@ def _get_combination_therapy( extensions = [ core_models.Extension( - name="moa_therapy_type" if source_name == "moa" else "civic_therapy_interaction_type", # noqa: E501 - value=therapy_interaction_type + name="moa_therapy_type" + if source_name == "moa" + else "civic_therapy_interaction_type", + value=therapy_interaction_type, ).model_dump(exclude_none=True) ] try: ct = core_models.CombinationTherapy( - id=combination_therapy_id, - components=components, - extensions=extensions + id=combination_therapy_id, components=components, extensions=extensions ).model_dump(exclude_none=True) except ValidationError as e: # if combination validation checks fail logger.debug( - "ValidationError raised when attempting to create CombinationTherapy: " - f"{e}" + "ValidationError raised when attempting to create CombinationTherapy: %s", + e, ) ct = None @@ -367,7 +379,7 @@ def _add_therapeutic_procedure( Union[ core_models.TherapeuticAgent, core_models.TherapeuticSubstituteGroup, - core_models.CombinationTherapy + core_models.CombinationTherapy, ] ]: """Create or get Therapeutic Procedure given therapies @@ -392,17 +404,19 @@ def _add_therapeutic_procedure( if therapeutic_procedure_id not in self.unable_to_normalize["therapeutics"]: if therapeutic_procedure_type == TherapeuticProcedureType.THERAPEUTIC_AGENT: tp = self._get_therapeutic_agent(therapies[0]) - elif therapeutic_procedure_type == TherapeuticProcedureType.THERAPEUTIC_SUBSTITUTE_GROUP: # noqa: E501 + elif ( + therapeutic_procedure_type + == TherapeuticProcedureType.THERAPEUTIC_SUBSTITUTE_GROUP + ): tp = self._get_therapeutic_substitute_group( - therapeutic_procedure_id, - therapies, - therapy_interaction_type + therapeutic_procedure_id, therapies, therapy_interaction_type ) - elif therapeutic_procedure_type == TherapeuticProcedureType.COMBINATION_THERAPY: # noqa: E501 + elif ( + therapeutic_procedure_type + == TherapeuticProcedureType.COMBINATION_THERAPY + ): tp = self._get_combination_therapy( - therapeutic_procedure_id, - therapies, - therapy_interaction_type + therapeutic_procedure_id, therapies, therapy_interaction_type ) else: # not supported @@ -417,8 +431,7 @@ def _add_therapeutic_procedure( @staticmethod def _get_therapy_normalizer_ext_data( - normalized_therapeutic_id: str, - therapy_norm_resp: NormalizedTherapy + normalized_therapeutic_id: str, therapy_norm_resp: NormalizedTherapy ) -> core_models.Extension: """Create extension containing relevant therapy-normalizer data @@ -431,14 +444,13 @@ def _get_therapy_normalizer_ext_data( name="therapy_normalizer_data", value={ "normalized_id": normalized_therapeutic_id, - "label": therapy_norm_resp.therapeutic_agent.label - } + "label": therapy_norm_resp.therapeutic_agent.label, + }, ) @staticmethod def _get_disease_normalizer_ext_data( - normalized_disease_id: str, - disease_norm_resp: NormalizedDisease + normalized_disease_id: str, disease_norm_resp: NormalizedDisease ) -> core_models.Extension: """Create extension containing relevant disease-normalizer data @@ -459,12 +471,13 @@ def _get_disease_normalizer_ext_data( value={ "normalized_id": normalized_disease_id, "label": disease_norm_resp.disease.label, - "mondo_id": mondo_id - } + "mondo_id": mondo_id, + }, ) - def create_json(self, transform_dir: Optional[Path] = None, - filename: Optional[str] = None) -> None: + def create_json( + self, transform_dir: Optional[Path] = None, filename: Optional[str] = None + ) -> None: """Create a composite JSON for transformed data. :param Optional[Path] transform_dir: Path to data directory for @@ -476,19 +489,21 @@ def create_json(self, transform_dir: Optional[Path] = None, transform_dir.mkdir(exist_ok=True, parents=True) composite_dict = { - 'studies': self.studies, - 'variations': self.variations, - 'molecular_profiles': self.molecular_profiles, - 'genes': self.genes, - 'therapeutics': self.therapeutics, - 'diseases': self.diseases, - 'methods': self.methods, - 'documents': self.documents + "studies": self.studies, + "variations": self.variations, + "molecular_profiles": self.molecular_profiles, + "genes": self.genes, + "therapeutics": self.therapeutics, + "diseases": self.diseases, + "methods": self.methods, + "documents": self.documents, } - today = dt.strftime(dt.today(), DATE_FMT) + today = datetime.datetime.strftime( + datetime.datetime.now(tz=datetime.timezone.utc), DATE_FMT + ) if filename is None: filename = f"{self.name}_cdm_{today}.json" out = transform_dir / filename - with open(out, 'w+') as f: + with out.open("w+") as f: json.dump(composite_dict, f, indent=4) diff --git a/src/metakb/transform/civic.py b/src/metakb/transform/civic.py index a29aa70f..41514c9d 100644 --- a/src/metakb/transform/civic.py +++ b/src/metakb/transform/civic.py @@ -1,9 +1,9 @@ """A module for to transform CIViC.""" -from enum import StrEnum -from typing import Optional, Dict, List, Tuple -from pathlib import Path import logging import re +from enum import StrEnum +from pathlib import Path +from typing import Dict, List, Optional, Tuple from ga4gh.core import core_models from ga4gh.vrs import models @@ -11,20 +11,20 @@ from metakb import APP_ROOT from metakb.normalizers import ViccNormalizers -from metakb.transform.base import ( - Transform, - MethodId, - CivicEvidenceLevel, - TherapeuticProcedureType -) from metakb.schemas.annotation import Direction, Document +from metakb.schemas.categorical_variation import ProteinSequenceConsequence from metakb.schemas.variation_statement import ( AlleleOrigin, VariantTherapeuticResponseStudy, VariantTherapeuticResponseStudyPredicate, - _VariantOncogenicityStudyQualifier + _VariantOncogenicityStudyQualifier, +) +from metakb.transform.base import ( + CivicEvidenceLevel, + MethodId, + TherapeuticProcedureType, + Transform, ) -from metakb.schemas.categorical_variation import ProteinSequenceConsequence logger = logging.getLogger(__name__) @@ -33,14 +33,40 @@ # Variant names that are known to not be supported in the variation-normalizer UNABLE_TO_NORMALIZE_VAR_NAMES = { - "mutation", "exon", "overexpression", - "frameshift", "promoter", "deletion", "type", "insertion", - "expression", "duplication", "copy", "underexpression", - "number", "variation", "repeat", "rearrangement", "activation", - "expression", "mislocalization", "translocation", "wild", - "polymorphism", "frame", "shift", "loss", "function", "levels", - "inactivation", "snp", "fusion", "dup", "truncation", - "homozygosity", "gain", "phosphorylation" + "mutation", + "exon", + "overexpression", + "frameshift", + "promoter", + "deletion", + "type", + "insertion", + "expression", + "duplication", + "copy", + "underexpression", + "number", + "variation", + "repeat", + "rearrangement", + "activation", + "mislocalization", + "translocation", + "wild", + "polymorphism", + "frame", + "shift", + "loss", + "function", + "levels", + "inactivation", + "snp", + "fusion", + "dup", + "truncation", + "homozygosity", + "gain", + "phosphorylation", } @@ -68,19 +94,21 @@ class SourcePrefix(StrEnum): class CivicTransform(Transform): """A class for transforming CIViC to the common data model.""" - def __init__(self, - data_dir: Path = APP_ROOT / "data", - harvester_path: Optional[Path] = None, - normalizers: Optional[ViccNormalizers] = None) -> None: + def __init__( + self, + data_dir: Path = APP_ROOT / "data", + harvester_path: Optional[Path] = None, + normalizers: Optional[ViccNormalizers] = None, + ) -> None: """Initialize CIViC Transform class. :param data_dir: Path to source data directory :param harvester_path: Path to previously harvested CIViC data :param normalizers: normalizer collection instance """ - super().__init__(data_dir=data_dir, - harvester_path=harvester_path, - normalizers=normalizers) + super().__init__( + data_dir=data_dir, harvester_path=harvester_path, normalizers=normalizers + ) # Method will always be the same self.methods = [self.methods_mapping[MethodId.CIVIC_EID_SOP.value]] @@ -92,7 +120,7 @@ def __init__(self, "molecular_profiles": {}, "diseases": {}, "therapeutics": {}, - "genes": {} + "genes": {}, } @staticmethod @@ -118,8 +146,11 @@ def _mp_to_variant_mapping(molecular_profiles: List[Dict]) -> Tuple[List, Dict]: supported_mps.append(mp) mp_id_to_v_id[mp_id] = mp_variant_ids[0] - logger.debug(f"{len(not_supported_mp_ids)} Molecular Profiles not supported: " - f"{not_supported_mp_ids}") + logger.debug( + "%s Molecular Profiles not supported: %s", + len(not_supported_mp_ids), + not_supported_mp_ids, + ) return supported_mps, mp_id_to_v_id async def transform(self) -> None: @@ -127,7 +158,7 @@ async def transform(self) -> None: results in instance variables. """ data = self.extract_harvester() - evidence_items = data['evidence'] + evidence_items = data["evidence"] # Get list of supported molecular profiles and mapping to variant id molecular_profiles, mp_id_to_v_id_mapping = self._mp_to_variant_mapping( @@ -136,13 +167,17 @@ async def transform(self) -> None: # Only want evidence with approved status and predictive evidence type evidence_items = [ - e for e in evidence_items + e + for e in evidence_items if e["status"] == "accepted" and e["evidence_type"].upper() == "PREDICTIVE" ] # Get all variant IDs from supported molecular profiles - vids = {mp_id_to_v_id_mapping[e["molecular_profile_id"]] - for e in evidence_items if e["molecular_profile_id"]} + vids = { + mp_id_to_v_id_mapping[e["molecular_profile_id"]] + for e in evidence_items + if e["molecular_profile_id"] + } # Add variant (only supported) and gene (all) data # (mutates `variations` and `genes`) @@ -163,8 +198,7 @@ async def transform(self) -> None: # Add variant therapeutic response study data. Will update `studies` self._add_variant_therapeutic_response_studies( - evidence_items, - mp_id_to_v_id_mapping + evidence_items, mp_id_to_v_id_mapping ) def _add_variant_therapeutic_response_studies( @@ -184,13 +218,13 @@ def _add_variant_therapeutic_response_studies( mp_id = f"civic.mpid:{r['molecular_profile_id']}" mp = self.able_to_normalize["molecular_profiles"].get(mp_id) if not mp: - logger.debug(f"mp_id not supported: {mp_id}") + logger.debug("mp_id not supported: %s", mp_id) continue variant_id = f"civic.vid:{mp_id_to_v_id_mapping[r['molecular_profile_id']]}" variation_gene_map = self.able_to_normalize["variations"].get(variant_id) if not variation_gene_map: - logger.debug("variant_id not supported: {variant_id}") + logger.debug("variant_id not supported: %s", variant_id) continue # Get predicate @@ -201,7 +235,7 @@ def _add_variant_therapeutic_response_studies( continue # Add disease - if not r['disease']: + if not r["disease"]: continue civic_disease = self._add_disease(r["disease"]) @@ -222,14 +256,18 @@ def _add_variant_therapeutic_response_studies( if therapy_interaction_type == "SUBSTITUTES": therapeutic_procedure_id = f"civic.tsgid:{therapeutic_digest}" - therapeutic_procedure_type = TherapeuticProcedureType.THERAPEUTIC_SUBSTITUTE_GROUP # noqa: E501 + therapeutic_procedure_type = ( + TherapeuticProcedureType.THERAPEUTIC_SUBSTITUTE_GROUP + ) elif therapy_interaction_type == "COMBINATION": therapeutic_procedure_id = f"civic.ctid:{therapeutic_digest}" - therapeutic_procedure_type = TherapeuticProcedureType.COMBINATION_THERAPY # noqa: E501 + therapeutic_procedure_type = ( + TherapeuticProcedureType.COMBINATION_THERAPY + ) else: logger.debug( - "civic therapy_interaction_type not supported: " - f"{therapy_interaction_type}" + "civic therapy_interaction_type not supported: %s", + therapy_interaction_type, ) continue @@ -237,13 +275,13 @@ def _add_variant_therapeutic_response_studies( therapeutic_procedure_id, therapies, therapeutic_procedure_type, - therapy_interaction_type + therapy_interaction_type, ) if not civic_therapeutic: continue # Add document - document = self._add_eid_document(r['source']) + document = self._add_eid_document(r["source"]) # Get strength direction = self._get_evidence_direction(r["evidence_direction"]) @@ -255,12 +293,11 @@ def _add_variant_therapeutic_response_studies( variation_gene_map["civic_gene_id"] ) qualifiers = self._get_variant_onco_study_qualifier( - r["variant_origin"], - civic_gene + r["variant_origin"], civic_gene ) statement = VariantTherapeuticResponseStudy( - id=r['name'].lower().replace('eid', 'civic.eid:'), + id=r["name"].lower().replace("eid", "civic.eid:"), description=r["description"] if r["description"] else None, direction=direction, strength=strength, @@ -270,14 +307,12 @@ def _add_variant_therapeutic_response_studies( tumorType=civic_disease, qualifiers=qualifiers, specifiedBy=self.methods[0], - isReportedIn=[document] + isReportedIn=[document], ).model_dump(exclude_none=True) self.studies.append(statement) def _get_variant_onco_study_qualifier( - self, - variant_origin: str, - gene: Optional[core_models.Gene] = None + self, variant_origin: str, gene: Optional[core_models.Gene] = None ) -> Optional[_VariantOncogenicityStudyQualifier]: """Get Variant Oncogenicity Study Qualifier @@ -296,8 +331,7 @@ def _get_variant_onco_study_qualifier( if allele_origin or gene: qualifier = _VariantOncogenicityStudyQualifier( - alleleOrigin=allele_origin, - geneContext=gene + alleleOrigin=allele_origin, geneContext=gene ) else: qualifier = None @@ -312,15 +346,12 @@ def _get_evidence_direction(self, direction: str) -> Optional[Direction]: direction_upper = direction.upper() if direction_upper == "SUPPORTS": return Direction.SUPPORTS - elif direction_upper == "DOES_NOT_SUPPORT": + if direction_upper == "DOES_NOT_SUPPORT": return Direction.REFUTES - else: - return Direction.NONE + return Direction.NONE def _get_predicate( - self, - record_type: str, - clin_sig: str + self, record_type: str, clin_sig: str ) -> Optional[VariantTherapeuticResponseStudyPredicate]: """Return predicate for an evidence item. @@ -331,16 +362,18 @@ def _get_predicate( predicate = None if record_type == "PREDICTIVE": - if clin_sig == 'SENSITIVITYRESPONSE': - predicate = VariantTherapeuticResponseStudyPredicate.PREDICTS_SENSITIVITY_TO # noqa: E501 - elif clin_sig == 'RESISTANCE': - predicate = VariantTherapeuticResponseStudyPredicate.PREDICTS_RESISTANCE_TO # noqa: E501 + if clin_sig == "SENSITIVITYRESPONSE": + predicate = ( + VariantTherapeuticResponseStudyPredicate.PREDICTS_SENSITIVITY_TO + ) + elif clin_sig == "RESISTANCE": + predicate = ( + VariantTherapeuticResponseStudyPredicate.PREDICTS_RESISTANCE_TO + ) return predicate def _add_protein_consequences( - self, - molecular_profiles: List[Dict], - mp_id_to_v_id_mapping: Dict + self, molecular_profiles: List[Dict], mp_id_to_v_id_mapping: Dict ) -> None: """Create Protein Sequence Consequence objects for all supported MP records. Mutates instance variables `able_to_normalize['molecular_profiles']` and @@ -363,7 +396,7 @@ def _add_protein_consequences( # Get aliases from MP and Variant record aliases = civic_variation_data["aliases"] or [] - for a in (mp["aliases"] or []): + for a in mp["aliases"] or []: if not SNP_RE.match(a): aliases.append(a) @@ -372,8 +405,7 @@ def _add_protein_consequences( if mp_score: extensions = [ core_models.Extension( - name="CIViC Molecular Profile Score", - value=mp_score + name="CIViC Molecular Profile Score", value=mp_score ) ] else: @@ -382,13 +414,14 @@ def _add_protein_consequences( # Get CIViC representative coordinate and Variant types data for ext_key, var_key in [ ("CIViC representative coordinate", "coordinates"), - ("Variant types", "variant_types") + ("Variant types", "variant_types"), ]: if civic_variation_data[var_key]: - extensions.append(core_models.Extension( - name=ext_key, - value=civic_variation_data[var_key] - )) + extensions.append( + core_models.Extension( + name=ext_key, value=civic_variation_data[var_key] + ) + ) psc = ProteinSequenceConsequence( id=mp_id, @@ -398,7 +431,7 @@ def _add_protein_consequences( aliases=list(set(aliases)) or None, mappings=civic_variation_data["mappings"], extensions=extensions or None, - members=civic_variation_data["members"] + members=civic_variation_data["members"], ).model_dump(exclude_none=True) self.molecular_profiles.append(psc) self.able_to_normalize["molecular_profiles"][mp_id] = psc @@ -414,8 +447,7 @@ def _get_variant_name(variant: Dict) -> str: if "c." in variant["name"]: variant_name = variant["name"] if "(" in variant_name: - variant_name = \ - variant_name.replace("(", "").replace(")", "") + variant_name = variant_name.replace("(", "").replace(")", "") variant_name = variant_name.split()[-1] else: variant_name = variant["name"] @@ -437,20 +469,21 @@ def _is_supported_variant_query(variant_name: str, variant_id: int) -> bool: vname_lower = variant_name.lower() if vname_lower.endswith("fs") or "-" in vname_lower or "/" in vname_lower: - logger.debug("Variation Normalizer does not support " - f"{variant_id}: {variant_name}") + logger.debug( + "Variation Normalizer does not support %s: %s", variant_id, variant_name + ) return False if set(vname_lower.split()) & UNABLE_TO_NORMALIZE_VAR_NAMES: - logger.debug("Variation Normalizer does not support " - f"{variant_id}: {variant_name}") + logger.debug( + "Variation Normalizer does not support %s: %s", variant_id, variant_name + ) return False return True async def _get_variation_members( - self, - variant: Dict + self, variant: Dict ) -> Optional[List[models.Variation]]: """Get members field for variation object. This is the related variant concepts. For now, we will only do genomic HGVS expressions @@ -495,8 +528,11 @@ async def _add_variations(self, variants: List[Dict]) -> None: # Couldn't find normalized concept if not vrs_variation: - logger.debug("Variation Normalizer unable to normalize " - f"{variant_id} using query {variant_query}") + logger.debug( + "Variation Normalizer unable to normalize %s using query %s", + variant_id, + variant_query, + ) continue # Create VRS Variation object @@ -519,7 +555,7 @@ async def _add_variations(self, variants: List[Dict]) -> None: core_models.Coding( code=vt["so_id"], system=f"{vt['url'].rsplit('/', 1)[0]}/", - label="_".join(vt["name"].lower().split()) + label="_".join(vt["name"].lower().split()), ) ) @@ -530,39 +566,45 @@ async def _add_variations(self, variants: List[Dict]) -> None: code=str(variant["id"]), system="https://civicdb.org/variants/", ), - relation=core_models.Relation.EXACT_MATCH + relation=core_models.Relation.EXACT_MATCH, ) ] if variant["allele_registry_id"]: - mappings.append(core_models.Mapping( - coding=core_models.Coding( - code=variant["allele_registry_id"], - system="https://reg.clinicalgenome.org/", - ), - relation=core_models.Relation.RELATED_MATCH - )) + mappings.append( + core_models.Mapping( + coding=core_models.Coding( + code=variant["allele_registry_id"], + system="https://reg.clinicalgenome.org/", + ), + relation=core_models.Relation.RELATED_MATCH, + ) + ) for ce in variant["clinvar_entries"]: - mappings.append(core_models.Mapping( - coding=core_models.Coding( - code=ce, - system="https://www.ncbi.nlm.nih.gov/clinvar/variation/", - ), - relation=core_models.Relation.RELATED_MATCH - )) + mappings.append( + core_models.Mapping( + coding=core_models.Coding( + code=ce, + system="https://www.ncbi.nlm.nih.gov/clinvar/variation/", + ), + relation=core_models.Relation.RELATED_MATCH, + ) + ) aliases = [] for a in variant["variant_aliases"]: if SNP_RE.match(a): a = a.lower() - mappings.append(core_models.Mapping( - coding=core_models.Coding( - code=a, - system="https://www.ncbi.nlm.nih.gov/snp/", - ), - relation=core_models.Relation.RELATED_MATCH - )) + mappings.append( + core_models.Mapping( + coding=core_models.Coding( + code=a, + system="https://www.ncbi.nlm.nih.gov/snp/", + ), + relation=core_models.Relation.RELATED_MATCH, + ) + ) else: aliases.append(a) @@ -582,7 +624,7 @@ async def _add_variations(self, variants: List[Dict]) -> None: mappings=mappings or None, aliases=aliases or None, coordinates=coordinates or None, - members=members + members=members, ).model_dump() def _get_expressions(self, variant: Dict) -> List[models.Expression]: @@ -592,19 +634,16 @@ def _get_expressions(self, variant: Dict) -> List[models.Expression]: :return: A list of expressions """ expressions = [] - for hgvs_expr in variant['hgvs_expressions']: - if ':g.' in hgvs_expr: + for hgvs_expr in variant["hgvs_expressions"]: + if ":g." in hgvs_expr: syntax = models.Syntax.HGVS_G - elif ':c.' in hgvs_expr: + elif ":c." in hgvs_expr: syntax = models.Syntax.HGVS_C else: syntax = models.Syntax.HGVS_P - if hgvs_expr != 'N/A': - expressions.append(models.Expression( - syntax=syntax, - value=hgvs_expr - )) + if hgvs_expr != "N/A": + expressions.append(models.Expression(syntax=syntax, value=hgvs_expr)) return expressions def _add_genes(self, genes: List[Dict]) -> None: @@ -617,36 +656,39 @@ def _add_genes(self, genes: List[Dict]) -> None: for gene in genes: gene_id = f"civic.gid:{gene['id']}" ncbigene = f"ncbigene:{gene['entrez_id']}" - queries = [ncbigene, gene['name']] + gene['aliases'] + queries = [ncbigene, gene["name"]] + gene["aliases"] - _, normalized_gene_id = \ - self.vicc_normalizers.normalize_gene(queries) + _, normalized_gene_id = self.vicc_normalizers.normalize_gene(queries) if normalized_gene_id: civic_gene = core_models.Gene( id=gene_id, label=gene["name"], - description=gene['description'] if gene['description'] else None, + description=gene["description"] if gene["description"] else None, mappings=[ core_models.Mapping( coding=core_models.Coding( code=f"ncbigene:{gene['entrez_id']}", - system="https://www.ncbi.nlm.nih.gov/gene/" + system="https://www.ncbi.nlm.nih.gov/gene/", ), - relation=core_models.Relation.EXACT_MATCH + relation=core_models.Relation.EXACT_MATCH, ) ], aliases=gene["aliases"] if gene["aliases"] else None, - extensions=[core_models.Extension( - name="gene_normalizer_id", - value=normalized_gene_id - )] + extensions=[ + core_models.Extension( + name="gene_normalizer_id", value=normalized_gene_id + ) + ], ).model_dump(exclude_none=True) self.able_to_normalize["genes"][gene_id] = civic_gene self.genes.append(civic_gene) else: - logger.debug(f"Gene Normalizer unable to normalize {gene_id}" - f"using queries: {queries}") + logger.debug( + "Gene Normalizer unable to normalize %s using queries: %s", + gene_id, + queries, + ) def _add_disease(self, disease: Dict) -> Optional[core_models.Disease]: """Create or get disease given CIViC disease. @@ -659,19 +701,18 @@ def _add_disease(self, disease: Dict) -> Optional[core_models.Disease]: :return: Disease object if disease-normalizer was able to normalize """ disease_id = f"civic.did:{disease['id']}" - vrs_disease = self.able_to_normalize['diseases'].get(disease_id) + vrs_disease = self.able_to_normalize["diseases"].get(disease_id) if vrs_disease: return vrs_disease - else: - vrs_disease = None - if disease_id not in self.unable_to_normalize['diseases']: - vrs_disease = self._get_disease(disease) - if vrs_disease: - self.able_to_normalize['diseases'][disease_id] = vrs_disease - self.diseases.append(vrs_disease) - else: - self.unable_to_normalize['diseases'].add(disease_id) - return vrs_disease + vrs_disease = None + if disease_id not in self.unable_to_normalize["diseases"]: + vrs_disease = self._get_disease(disease) + if vrs_disease: + self.able_to_normalize["diseases"][disease_id] = vrs_disease + self.diseases.append(vrs_disease) + else: + self.unable_to_normalize["diseases"].add(disease_id) + return vrs_disease def _get_disease(self, disease: Dict) -> Optional[Dict]: """Get core_models.Disease object for a CIViC disease @@ -681,30 +722,37 @@ def _get_disease(self, disease: Dict) -> Optional[Dict]: Otherwise, `None` """ disease_id = f"civic.did:{disease['id']}" - display_name = disease['display_name'] - doid = disease['doid'] + display_name = disease["display_name"] + doid = disease["doid"] mappings = [] if not doid: - logger.debug(f"{disease_id} ({display_name}) has null DOID") + logger.debug("%s (%s) has null DOID", disease_id, display_name) queries = [display_name] else: doid = f"DOID:{doid}" queries = [doid, display_name] - mappings.append(core_models.Mapping( - coding=core_models.Coding( - code=doid, - system="https://www.disease-ontology.org/", - ), - relation=core_models.Relation.EXACT_MATCH - )) + mappings.append( + core_models.Mapping( + coding=core_models.Coding( + code=doid, + system="https://www.disease-ontology.org/", + ), + relation=core_models.Relation.EXACT_MATCH, + ) + ) - disease_norm_resp, normalized_disease_id = \ - self.vicc_normalizers.normalize_disease(queries) + ( + disease_norm_resp, + normalized_disease_id, + ) = self.vicc_normalizers.normalize_disease(queries) if not normalized_disease_id: - logger.debug(f"Disease Normalizer unable to normalize: " - f"{disease_id} using queries {queries}") + logger.debug( + "Disease Normalizer unable to normalize: %s using queries %s", + disease_id, + queries, + ) return None return core_models.Disease( @@ -713,17 +761,16 @@ def _get_disease(self, disease: Dict) -> Optional[Dict]: mappings=mappings if mappings else None, extensions=[ self._get_disease_normalizer_ext_data( - normalized_disease_id, - disease_norm_resp + normalized_disease_id, disease_norm_resp ), - ] + ], ).model_dump(exclude_none=True) def _get_therapeutic_substitute_group( self, therapeutic_sub_group_id: str, therapies: List[Dict], - therapy_interaction_type: str + therapy_interaction_type: str, ) -> Optional[core_models.TherapeuticSubstituteGroup]: """Get Therapeutic Substitute Group for CIViC therapies @@ -740,7 +787,7 @@ def _get_therapeutic_substitute_group( ta = self._add_therapeutic_procedure( therapeutic_procedure_id, [therapy], - TherapeuticProcedureType.THERAPEUTIC_AGENT + TherapeuticProcedureType.THERAPEUTIC_AGENT, ) if not ta: return None @@ -749,8 +796,7 @@ def _get_therapeutic_substitute_group( extensions = [ core_models.Extension( - name="civic_therapy_interaction_type", - value=therapy_interaction_type + name="civic_therapy_interaction_type", value=therapy_interaction_type ).model_dump(exclude_none=True) ] @@ -758,19 +804,21 @@ def _get_therapeutic_substitute_group( tsg = core_models.TherapeuticSubstituteGroup( id=therapeutic_sub_group_id, substitutes=substitutes, - extensions=extensions + extensions=extensions, ).model_dump(exclude_none=True) except ValidationError as e: # If substitutes validation checks fail logger.debug( - "ValidationError raised when attempting to create " - f"TherapeuticSubstituteGroup: {e}" + "ValidationError raised when attempting to create TherapeuticSubstituteGroup: %s", + {e}, ) tsg = None return tsg - def _get_therapeutic_agent(self, therapy: Dict) -> Optional[core_models.TherapeuticAgent]: # noqa: E501 + def _get_therapeutic_agent( + self, therapy: Dict + ) -> Optional[core_models.TherapeuticAgent]: """Get Therapeutic Agent for CIViC therapy :param therapy: CIViC therapy object @@ -783,31 +831,38 @@ def _get_therapeutic_agent(self, therapy: Dict) -> Optional[core_models.Therapeu mappings = [] if ncit_id: queries = [f"ncit:{ncit_id}", label] - mappings.append(core_models.Mapping( - coding=core_models.Coding( - code=ncit_id, - system="https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=" # noqa: E501 - ), - relation=core_models.Relation.EXACT_MATCH - )) + mappings.append( + core_models.Mapping( + coding=core_models.Coding( + code=ncit_id, + system="https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=", + ), + relation=core_models.Relation.EXACT_MATCH, + ) + ) else: queries = [label] - therapy_norm_resp, normalized_therapeutic_id = \ - self.vicc_normalizers.normalize_therapy(queries) + ( + therapy_norm_resp, + normalized_therapeutic_id, + ) = self.vicc_normalizers.normalize_therapy(queries) if not normalized_therapeutic_id: - logger.debug(f"Therapy Normalizer unable to normalize: " - f"using queries ncit:{ncit_id} and {label}") + logger.debug( + "Therapy Normalizer unable to normalize: using queries ncit:%s and %s", + ncit_id, + label, + ) return None - regulatory_approval_extension = \ + regulatory_approval_extension = ( self.vicc_normalizers.get_regulatory_approval_extension(therapy_norm_resp) + ) extensions = [ self._get_therapy_normalizer_ext_data( - normalized_therapeutic_id, - therapy_norm_resp + normalized_therapeutic_id, therapy_norm_resp ), ] @@ -819,7 +874,7 @@ def _get_therapeutic_agent(self, therapy: Dict) -> Optional[core_models.Therapeu label=label, mappings=mappings if mappings else None, aliases=therapy["aliases"] if therapy["aliases"] else None, - extensions=extensions + extensions=extensions, ).model_dump(exclude_none=True) def _add_eid_document(self, source: Dict) -> Optional[Document]: @@ -829,7 +884,7 @@ def _add_eid_document(self, source: Dict) -> Optional[Document]: :param source: An evidence item's source :return: Document for Evidence Item if source type is supported """ - source_type = source['source_type'].upper() + source_type = source["source_type"].upper() if source_type in SourcePrefix.__members__: document = Document( id=f"civic.source:{source['id']}", @@ -843,7 +898,7 @@ def _add_eid_document(self, source: Dict) -> Optional[Document]: if document not in self.documents: self.documents.append(document) else: - logger.debug(f"{source_type} not in schemas.SourcePrefix") + logger.debug("%s not in schemas.SourcePrefix", source_type) document = None return document diff --git a/src/metakb/transform/moa.py b/src/metakb/transform/moa.py index e141a8a3..22f30b8f 100644 --- a/src/metakb/transform/moa.py +++ b/src/metakb/transform/moa.py @@ -1,29 +1,30 @@ """A module to convert MOA resources to common data model""" -from pathlib import Path -from typing import Optional, List, Dict + +import json import logging +from pathlib import Path +from typing import Dict, List, Optional from urllib.parse import quote -import json from ga4gh.core import core_models, sha512t24u from ga4gh.vrs import models -from metakb import APP_ROOT # noqa: I202 +from metakb import APP_ROOT from metakb.normalizers import ViccNormalizers -from metakb.transform.base import ( - Transform, - MethodId, - MoaEvidenceLevel, - TherapeuticProcedureType -) -from metakb.schemas.annotation import Document, Direction +from metakb.schemas.annotation import Direction, Document +from metakb.schemas.categorical_variation import ProteinSequenceConsequence from metakb.schemas.variation_statement import ( AlleleOrigin, VariantTherapeuticResponseStudy, VariantTherapeuticResponseStudyPredicate, - _VariantOncogenicityStudyQualifier + _VariantOncogenicityStudyQualifier, +) +from metakb.transform.base import ( + MethodId, + MoaEvidenceLevel, + TherapeuticProcedureType, + Transform, ) -from metakb.schemas.categorical_variation import ProteinSequenceConsequence logger = logging.getLogger(__name__) @@ -31,10 +32,12 @@ class MoaTransform(Transform): """A class for transforming MOA resources to common data model.""" - def __init__(self, - data_dir: Path = APP_ROOT / "data", - harvester_path: Optional[Path] = None, - normalizers: Optional[ViccNormalizers] = None) -> None: + def __init__( + self, + data_dir: Path = APP_ROOT / "data", + harvester_path: Optional[Path] = None, + normalizers: Optional[ViccNormalizers] = None, + ) -> None: """Initialize MOAlmanac Transform class. :param data_dir: Path to source data directory @@ -42,9 +45,7 @@ def __init__(self, :param normalizers: normalizer collection instance """ super().__init__( - data_dir=data_dir, - harvester_path=harvester_path, - normalizers=normalizers + data_dir=data_dir, harvester_path=harvester_path, normalizers=normalizers ) # Method will always be the same @@ -57,7 +58,7 @@ def __init__(self, "diseases": {}, "therapeutics": {}, "genes": {}, - "documents": {} + "documents": {}, } async def transform(self) -> None: @@ -76,8 +77,7 @@ async def transform(self) -> None: await self._add_variant_therapeutic_response_studies(data["assertions"]) async def _add_variant_therapeutic_response_studies( - self, - assertions: List[Dict] + self, assertions: List[Dict] ) -> None: """Create Variant Therapeutic Response Studies from MOA assertions. Will add associated values to instances variables (`therapeutics`, `diseases`, @@ -94,31 +94,41 @@ async def _add_variant_therapeutic_response_studies( variation_gene_map = self.able_to_normalize["variations"].get(variant_id) if not variation_gene_map: logger.debug( - f"{assertion_id} has no variation for variant_id {variant_id}" + "%s has no variation for variant_id %s", assertion_id, variant_id ) continue # Get predicate. We only support therapeutic resistance/sensitivity if record["clinical_significance"] == "resistance": - predicate = VariantTherapeuticResponseStudyPredicate.PREDICTS_RESISTANCE_TO # noqa: E501 + predicate = ( + VariantTherapeuticResponseStudyPredicate.PREDICTS_RESISTANCE_TO + ) elif record["clinical_significance"] == "sensitivity": - predicate = VariantTherapeuticResponseStudyPredicate.PREDICTS_SENSITIVITY_TO # noqa: E501 + predicate = ( + VariantTherapeuticResponseStudyPredicate.PREDICTS_SENSITIVITY_TO + ) else: logger.debug( - "clinical_significance not supported: " - f"{record['clinical_significance']}" + "clinical_significance not supported: %s", + record["clinical_significance"], ) continue # Get strength - predictive_implication = record["predictive_implication"].strip().replace(" ", "_").replace("-", "_").upper() # noqa: E501 + predictive_implication = ( + record["predictive_implication"] + .strip() + .replace(" ", "_") + .replace("-", "_") + .upper() + ) moa_evidence_level = MoaEvidenceLevel[predictive_implication] strength = self.evidence_level_to_vicc_concept_mapping[moa_evidence_level] # Add therapeutic agent. We only support one therapy, so we will skip others therapy_name = record["therapy_name"] if not therapy_name: - logger.debug(f"{assertion_id} has no therapy_name") + logger.debug("%s has no therapy_name", assertion_id) continue therapy_interaction_type = record["therapy_type"] @@ -129,9 +139,11 @@ async def _add_variant_therapeutic_response_studies( "COMBINATION THERAPY", "IMMUNOTHERAPY", "RADIATION THERAPY", - "TARGETED THERAPY" + "TARGETED THERAPY", }: - therapeutic_procedure_type = TherapeuticProcedureType.COMBINATION_THERAPY # noqa: E501 + therapeutic_procedure_type = ( + TherapeuticProcedureType.COMBINATION_THERAPY + ) else: # skipping HORMONE and CHEMOTHERAPY for now continue @@ -150,21 +162,23 @@ async def _add_variant_therapeutic_response_studies( therapeutic_procedure_id, therapies, therapeutic_procedure_type, - therapy_interaction_type + therapy_interaction_type, ) if not moa_therapeutic: logger.debug( - f"{assertion_id} has no therapeutic agent for therapy_name " - f"{therapy_name}" + "%s has no therapeutic agent for therapy_name %s", + assertion_id, + therapy_name, ) continue # Add disease moa_disease = self._add_disease(record["disease"]) if not moa_disease: - logger.debug(f"{assertion_id} has no disease for disease " - f"{record['disease']}") + logger.debug( + "%s has no disease for disease %s", assertion_id, record["disease"] + ) continue # Add document @@ -187,7 +201,7 @@ async def _add_variant_therapeutic_response_studies( tumorType=moa_disease, qualifiers=qualifiers, specifiedBy=self.methods[0], - isReportedIn=[document] + isReportedIn=[document], ).model_dump(exclude_none=True) self.studies.append(statement) @@ -210,8 +224,7 @@ def _get_variant_onco_study_qualifier( if allele_origin or gene: qualifier = _VariantOncogenicityStudyQualifier( - alleleOrigin=allele_origin, - geneContext=gene + alleleOrigin=allele_origin, geneContext=gene ) else: qualifier = None @@ -231,21 +244,28 @@ async def _add_protein_consequences(self, variants: List[Dict]) -> None: # Skipping Fusion + Translocation + rearrangements that variation normalizer # does not support if "rearrangement_type" in variant: - logger.debug(f"Variation Normalizer does not support {moa_variant_id}:" - f" {feature}") + logger.debug( + "Variation Normalizer does not support %s: %s", + moa_variant_id, + feature, + ) continue # Gene is required to form query gene = variant.get("gene") if not gene: - logger.debug(f"Variation Normalizer does not support {moa_variant_id}: " - f"{feature} (no gene provided)") + logger.debug( + "Variation Normalizer does not support %s: %s (no gene provided)", + moa_variant_id, + feature, + ) continue moa_gene = self.able_to_normalize["genes"].get(quote(gene)) if not moa_gene: - logger.debug(f"moa.variant:{variant_id} has no gene for " - f"gene, {gene}") + logger.debug( + "moa.variant:%s has no gene for gene, %s", variant_id, gene + ) continue # Form query and run through variation-normalizer @@ -257,12 +277,18 @@ async def _add_protein_consequences(self, variants: List[Dict]) -> None: vrs_variation = await self.vicc_normalizers.normalize_variation([query]) if not vrs_variation: - logger.debug("Variation Normalizer unable to normalize: " - f"moa.variant:{variant_id} using query: {query}") + logger.debug( + "Variation Normalizer unable to normalize: moa.variant: %s using query: %s", + variant_id, + query, + ) continue else: - logger.debug("Variation Normalizer does not support " - f"{moa_variant_id}: {feature}") + logger.debug( + "Variation Normalizer does not support %s: %s", + moa_variant_id, + feature, + ) continue # Create VRS Variation object @@ -273,13 +299,19 @@ async def _add_protein_consequences(self, variants: List[Dict]) -> None: # Add MOA representative coordinate data to extensions coordinates_keys = [ - "chromosome", "start_position", "end_position", "reference_allele", - "alternate_allele", "cdna_change", "protein_change", "exon" + "chromosome", + "start_position", + "end_position", + "reference_allele", + "alternate_allele", + "cdna_change", + "protein_change", + "exon", ] extensions = [ core_models.Extension( name="MOA representative coordinate", - value={k: variant[k] for k in coordinates_keys} + value={k: variant[k] for k in coordinates_keys}, ) ] @@ -290,30 +322,32 @@ async def _add_protein_consequences(self, variants: List[Dict]) -> None: code=str(variant_id), system="https://moalmanac.org/api/features/", ), - relation=core_models.Relation.EXACT_MATCH + relation=core_models.Relation.EXACT_MATCH, ) ] if variant["rsid"]: - mappings.append(core_models.Mapping( - coding=core_models.Coding( - code=variant["rsid"], - system="https://www.ncbi.nlm.nih.gov/snp/" - ), - relation=core_models.Relation.RELATED_MATCH - )) + mappings.append( + core_models.Mapping( + coding=core_models.Coding( + code=variant["rsid"], + system="https://www.ncbi.nlm.nih.gov/snp/", + ), + relation=core_models.Relation.RELATED_MATCH, + ) + ) psc = ProteinSequenceConsequence( id=moa_variant_id, label=feature, definingContext=moa_variation.root, mappings=mappings or None, - extensions=extensions + extensions=extensions, ).model_dump(exclude_none=True) self.able_to_normalize["variations"][variant_id] = { "psc": psc, - "moa_gene": moa_gene + "moa_gene": moa_gene, } self.variations.append(psc) @@ -330,15 +364,16 @@ def _add_genes(self, genes: List[str]) -> None: moa_gene = core_models.Gene( id=f"moa.normalize.gene:{quote(gene)}", label=gene, - extensions=[core_models.Extension( - name="gene_normalizer_id", - value=normalized_gene_id - )] + extensions=[ + core_models.Extension( + name="gene_normalizer_id", value=normalized_gene_id + ) + ], ).model_dump(exclude_none=True) self.able_to_normalize["genes"][quote(gene)] = moa_gene self.genes.append(moa_gene) else: - logger.debug(f"Gene Normalizer unable to normalize: {gene}") + logger.debug("Gene Normalizer unable to normalize: %s", gene) def _add_documents(self, sources: List) -> None: """Create document objects for all MOA sources. @@ -354,9 +389,9 @@ def _add_documents(self, sources: List) -> None: core_models.Mapping( coding=core_models.Coding( code=source["nct"], - system="https://clinicaltrials.gov/search?term=" + system="https://clinicaltrials.gov/search?term=", ), - relation=core_models.Relation.EXACT_MATCH + relation=core_models.Relation.EXACT_MATCH, ) ] else: @@ -369,9 +404,9 @@ def _add_documents(self, sources: List) -> None: pmid=source["pmid"] if source["pmid"] else None, doi=source["doi"] if source["doi"] else None, mappings=mappings, - extensions=[core_models.Extension( - name="source_type", value=source["type"] - )] + extensions=[ + core_models.Extension(name="source_type", value=source["type"]) + ], ).model_dump(exclude_none=True) self.able_to_normalize["documents"][source_id] = document self.documents.append(document) @@ -380,7 +415,7 @@ def _get_therapeutic_substitute_group( self, therapeutic_sub_group_id: str, therapies: List[Dict], - therapy_interaction_type: str + therapy_interaction_type: str, ) -> None: """MOA does not support therapeutic substitute group @@ -398,22 +433,24 @@ def _get_therapeutic_agent(self, label: str) -> Optional[Dict]: :return: If able to normalize therapy, returns therapeutic agent represented as a dict """ - therapy_norm_resp, normalized_therapeutic_id = \ - self.vicc_normalizers.normalize_therapy([label]) + ( + therapy_norm_resp, + normalized_therapeutic_id, + ) = self.vicc_normalizers.normalize_therapy([label]) if not normalized_therapeutic_id: - logger.debug(f"Therapy Normalizer unable to normalize: {label}") + logger.debug("Therapy Normalizer unable to normalize: %s", label) return None extensions = [ self._get_therapy_normalizer_ext_data( - normalized_therapeutic_id, - therapy_norm_resp + normalized_therapeutic_id, therapy_norm_resp ), ] - regulatory_approval_extension = \ + regulatory_approval_extension = ( self.vicc_normalizers.get_regulatory_approval_extension(therapy_norm_resp) + ) if regulatory_approval_extension: extensions.append(regulatory_approval_extension) @@ -421,7 +458,7 @@ def _get_therapeutic_agent(self, label: str) -> Optional[Dict]: return core_models.TherapeuticAgent( id=f"moa.{therapy_norm_resp.therapeutic_agent.id}", label=label, - extensions=extensions + extensions=extensions, ).model_dump(exclude_none=True) def _add_disease(self, disease: Dict) -> Optional[Dict]: @@ -448,16 +485,15 @@ def _add_disease(self, disease: Dict) -> Optional[Dict]: vrs_disease = self.able_to_normalize["diseases"].get(disease_id) if vrs_disease: return vrs_disease - else: - vrs_disease = None - if disease_id not in self.unable_to_normalize["diseases"]: - vrs_disease = self._get_disease(disease) - if vrs_disease: - self.able_to_normalize["diseases"][disease_id] = vrs_disease - self.diseases.append(vrs_disease) - else: - self.unable_to_normalize["diseases"].add(disease_id) - return vrs_disease + vrs_disease = None + if disease_id not in self.unable_to_normalize["diseases"]: + vrs_disease = self._get_disease(disease) + if vrs_disease: + self.able_to_normalize["diseases"][disease_id] = vrs_disease + self.diseases.append(vrs_disease) + else: + self.unable_to_normalize["diseases"].add(disease_id) + return vrs_disease def _get_disease(self, disease: Dict) -> Optional[Dict]: """Get core_models.Disease object for a MOA disease @@ -472,14 +508,16 @@ def _get_disease(self, disease: Dict) -> Optional[Dict]: ot_code = disease["oncotree_code"] ot_term = disease["oncotree_term"] if ot_code: - mappings.append(core_models.Mapping( - coding=core_models.Coding( - code=ot_code, - system="https://oncotree.mskcc.org/", - label=ot_term - ), - relation=core_models.Relation.EXACT_MATCH - )) + mappings.append( + core_models.Mapping( + coding=core_models.Coding( + code=ot_code, + system="https://oncotree.mskcc.org/", + label=ot_term, + ), + relation=core_models.Relation.EXACT_MATCH, + ) + ) queries.append(f"oncotree:{disease['oncotree_code']}") disease_name = disease["name"] @@ -489,11 +527,13 @@ def _get_disease(self, disease: Dict) -> Optional[Dict]: if disease_name: queries.append(disease_name) - disease_norm_resp, normalized_disease_id = \ - self.vicc_normalizers.normalize_disease(queries) + ( + disease_norm_resp, + normalized_disease_id, + ) = self.vicc_normalizers.normalize_disease(queries) if not normalized_disease_id: - logger.debug(f"Disease Normalizer unable to normalize: {queries}") + logger.debug("Disease Normalizer unable to normalize: %s", queries) return None return core_models.Disease( @@ -502,8 +542,7 @@ def _get_disease(self, disease: Dict) -> Optional[Dict]: mappings=mappings if mappings else None, extensions=[ self._get_disease_normalizer_ext_data( - normalized_disease_id, - disease_norm_resp + normalized_disease_id, disease_norm_resp ), - ] + ], ).model_dump(exclude_none=True) diff --git a/tests/conftest.py b/tests/conftest.py index 25d2edae..81197e35 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,8 @@ """Module for pytest fixtures.""" +import json from copy import deepcopy + import pytest -import os -import json from metakb.normalizers import ViccNormalizers @@ -14,10 +14,7 @@ def cetuximab_extensions(): { "type": "Extension", "name": "therapy_normalizer_data", - "value": { - "normalized_id": "rxcui:318341", - "label": "cetuximab" - } + "value": {"normalized_id": "rxcui:318341", "label": "cetuximab"}, }, { "type": "Extension", @@ -32,9 +29,9 @@ def cetuximab_extensions(): "mappings": [ { "coding": {"code": "C3262", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] + ], }, { "id": "mesh:D015179", @@ -43,9 +40,9 @@ def cetuximab_extensions(): "mappings": [ { "coding": {"code": "C2956", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] + ], }, { "id": "mesh:D006258", @@ -54,9 +51,9 @@ def cetuximab_extensions(): "mappings": [ { "coding": {"code": "C4013", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] + ], }, { "id": "mesh:D002294", @@ -65,13 +62,13 @@ def cetuximab_extensions(): "mappings": [ { "coding": {"code": "C2929", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] - } - ] - } - } + ], + }, + ], + }, + }, ] @@ -82,10 +79,7 @@ def encorafenib_extensions(): { "type": "Extension", "name": "therapy_normalizer_data", - "value": { - "normalized_id": "rxcui:2049106", - "label": "encorafenib" - } + "value": {"normalized_id": "rxcui:2049106", "label": "encorafenib"}, }, { "type": "Extension", @@ -100,9 +94,9 @@ def encorafenib_extensions(): "mappings": [ { "coding": {"code": "C3224", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] + ], }, { "id": "mesh:D009369", @@ -111,13 +105,13 @@ def encorafenib_extensions(): "mappings": [ { "coding": {"code": "C3262", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] - } - ] - } - } + ], + }, + ], + }, + }, ] @@ -127,7 +121,7 @@ def civic_mpid33(civic_vid33): return { "id": "civic.mpid:33", "type": "ProteinSequenceConsequence", - "description": "EGFR L858R has long been recognized as a functionally significant mutation in cancer, and is one of the most prevalent single mutations in lung cancer. Best described in non-small cell lung cancer (NSCLC), the mutation seems to confer sensitivity to first and second generation TKI's like gefitinib and neratinib. NSCLC patients with this mutation treated with TKI's show increased overall and progression-free survival, as compared to chemotherapy alone. Third generation TKI's are currently in clinical trials that specifically focus on mutant forms of EGFR, a few of which have shown efficacy in treating patients that failed to respond to earlier generation TKI therapies.", # noqa: E501 + "description": "EGFR L858R has long been recognized as a functionally significant mutation in cancer, and is one of the most prevalent single mutations in lung cancer. Best described in non-small cell lung cancer (NSCLC), the mutation seems to confer sensitivity to first and second generation TKI's like gefitinib and neratinib. NSCLC patients with this mutation treated with TKI's show increased overall and progression-free survival, as compared to chemotherapy alone. Third generation TKI's are currently in clinical trials that specifically focus on mutant forms of EGFR, a few of which have shown efficacy in treating patients that failed to respond to earlier generation TKI therapies.", "label": "EGFR L858R", "definingContext": civic_vid33, "members": [ @@ -142,15 +136,12 @@ def civic_mpid33(civic_vid33): "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", - "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" + "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", }, "start": 55191821, - "end": 55191822 + "end": 55191822, }, - "state": { - "type": "LiteralSequenceExpression", - "sequence": "G" - } + "state": {"type": "LiteralSequenceExpression", "sequence": "G"}, } ], "aliases": ["LEU858ARG"], @@ -158,42 +149,42 @@ def civic_mpid33(civic_vid33): { "coding": { "code": "CA126713", - "system": "https://reg.clinicalgenome.org/" + "system": "https://reg.clinicalgenome.org/", }, - "relation": "relatedMatch" + "relation": "relatedMatch", }, { "coding": { "code": "16609", - "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/" + "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/", }, - "relation": "relatedMatch" + "relation": "relatedMatch", }, { "coding": { "code": "376282", - "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/" + "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/", }, - "relation": "relatedMatch" + "relation": "relatedMatch", }, { "coding": { "code": "376280", - "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/" + "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/", }, - "relation": "relatedMatch" + "relation": "relatedMatch", }, { "coding": { "code": "rs121434568", - "system": "https://www.ncbi.nlm.nih.gov/snp/" + "system": "https://www.ncbi.nlm.nih.gov/snp/", }, - "relation": "relatedMatch" + "relation": "relatedMatch", }, { "coding": {"code": "33", "system": "https://civicdb.org/variants/"}, - "relation": "exactMatch" - } + "relation": "exactMatch", + }, ], "extensions": [ { @@ -207,38 +198,35 @@ def civic_mpid33(civic_vid33): "representative_transcript": "ENST00000275493.2", "ensembl_version": 75, "reference_build": "GRCh37", - "type": "coordinates" + "type": "coordinates", }, - "type": "Extension" + "type": "Extension", }, { "name": "CIViC Molecular Profile Score", "value": 379.0, - "type": "Extension" + "type": "Extension", }, { "name": "Variant types", "value": [ { "code": "SO:0001583", - "system": "http://www.sequenceontology.org/browser/current_svn/term/", # noqa: E501 + "system": "http://www.sequenceontology.org/browser/current_svn/term/", "label": "missense_variant", - "version": None + "version": None, } ], - "type": "Extension" - } - ] + "type": "Extension", + }, + ], } @pytest.fixture(scope="session") def civic_eid2997_qualifier(civic_gid19): """Create qualifier for civic eid 2997""" - return { - "alleleOrigin": "somatic", - "geneContext": civic_gid19 - } + return {"alleleOrigin": "somatic", "geneContext": civic_gid19} @pytest.fixture(scope="session") @@ -249,25 +237,29 @@ def civic_source592(): "label": "Dungo et al., 2013", "title": "Afatinib: first global approval.", "pmid": 23982599, - "type": "Document" + "type": "Document", } @pytest.fixture(scope="session") def civic_eid2997_study( - civic_mpid33, civic_tid146, civic_did8, civic_eid2997_qualifier, civic_method, - civic_source592 + civic_mpid33, + civic_tid146, + civic_did8, + civic_eid2997_qualifier, + civic_method, + civic_source592, ): """Create CIVIC EID2997 Statement test fixture. Uses TherapeuticAgent.""" return { "id": "civic.eid:2997", "type": "VariantTherapeuticResponseStudy", - "description": "Afatinib, an irreversible inhibitor of the ErbB family of tyrosine kinases has been approved in the US for the first-line treatment of patients with metastatic non-small-cell lung cancer (NSCLC) who have tumours with EGFR exon 19 deletions or exon 21 (L858R) substitution mutations as detected by a US FDA-approved test", # noqa: E501 + "description": "Afatinib, an irreversible inhibitor of the ErbB family of tyrosine kinases has been approved in the US for the first-line treatment of patients with metastatic non-small-cell lung cancer (NSCLC) who have tumours with EGFR exon 19 deletions or exon 21 (L858R) substitution mutations as detected by a US FDA-approved test", "direction": "supports", "strength": { "code": "e000001", "label": "authoritative evidence", - "system": "https://go.osu.edu/evidence-codes" + "system": "https://go.osu.edu/evidence-codes", }, "predicate": "predictsSensitivityTo", "variant": civic_mpid33, @@ -275,7 +267,7 @@ def civic_eid2997_study( "tumorType": civic_did8, "qualifiers": civic_eid2997_qualifier, "specifiedBy": civic_method, - "isReportedIn": [civic_source592] + "isReportedIn": [civic_source592], } @@ -286,32 +278,20 @@ def civic_gid5(): "id": "civic.gid:5", "type": "Gene", "label": "BRAF", - "description": "BRAF mutations are found to be recurrent in many cancer types. Of these, the mutation of valine 600 to glutamic acid (V600E) is the most prevalent. V600E has been determined to be an activating mutation, and cells that harbor it, along with other V600 mutations are sensitive to the BRAF inhibitor dabrafenib. It is also common to use MEK inhibition as a substitute for BRAF inhibitors, and the MEK inhibitor trametinib has seen some success in BRAF mutant melanomas. BRAF mutations have also been correlated with poor prognosis in many cancer types, although there is at least one study that questions this conclusion in papillary thyroid cancer.\n\nOncogenic BRAF mutations are divided into three categories that determine their sensitivity to inhibitors.\nClass 1 BRAF mutations (V600) are RAS-independent, signal as monomers and are sensitive to current RAF monomer inhibitors.\nClass 2 BRAF mutations (K601E, K601N, K601T, L597Q, L597V, G469A, G469V, G469R, G464V, G464E, and fusions) are RAS-independent, signaling as constitutive dimers and are resistant to vemurafenib. Such mutants may be sensitive to novel RAF dimer inhibitors or MEK inhibitors.\nClass 3 BRAF mutations (D287H, V459L, G466V, G466E, G466A, S467L, G469E, N581S, N581I, D594N, D594G, D594A, D594H, F595L, G596D, and G596R) with low or absent kinase activity are RAS-dependent and they activate ERK by increasing their binding to activated RAS and wild-type CRAF. Class 3 BRAF mutations coexist with mutations in RAS or NF1 in melanoma may be treated with MEK inhibitors. In epithelial tumors such as CRC or NSCLC may be effectively treated with combinations that include inhibitors of receptor tyrosine kinase.", # noqa: E501 + "description": "BRAF mutations are found to be recurrent in many cancer types. Of these, the mutation of valine 600 to glutamic acid (V600E) is the most prevalent. V600E has been determined to be an activating mutation, and cells that harbor it, along with other V600 mutations are sensitive to the BRAF inhibitor dabrafenib. It is also common to use MEK inhibition as a substitute for BRAF inhibitors, and the MEK inhibitor trametinib has seen some success in BRAF mutant melanomas. BRAF mutations have also been correlated with poor prognosis in many cancer types, although there is at least one study that questions this conclusion in papillary thyroid cancer.\n\nOncogenic BRAF mutations are divided into three categories that determine their sensitivity to inhibitors.\nClass 1 BRAF mutations (V600) are RAS-independent, signal as monomers and are sensitive to current RAF monomer inhibitors.\nClass 2 BRAF mutations (K601E, K601N, K601T, L597Q, L597V, G469A, G469V, G469R, G464V, G464E, and fusions) are RAS-independent, signaling as constitutive dimers and are resistant to vemurafenib. Such mutants may be sensitive to novel RAF dimer inhibitors or MEK inhibitors.\nClass 3 BRAF mutations (D287H, V459L, G466V, G466E, G466A, S467L, G469E, N581S, N581I, D594N, D594G, D594A, D594H, F595L, G596D, and G596R) with low or absent kinase activity are RAS-dependent and they activate ERK by increasing their binding to activated RAS and wild-type CRAF. Class 3 BRAF mutations coexist with mutations in RAS or NF1 in melanoma may be treated with MEK inhibitors. In epithelial tumors such as CRC or NSCLC may be effectively treated with combinations that include inhibitors of receptor tyrosine kinase.", "mappings": [ { "coding": { "code": "ncbigene:673", - "system": "https://www.ncbi.nlm.nih.gov/gene/" + "system": "https://www.ncbi.nlm.nih.gov/gene/", }, - "relation": "exactMatch" + "relation": "exactMatch", } ], - "aliases": [ - "B-RAF1", - "B-raf", - "BRAF", - "BRAF-1", - "BRAF1", - "NS7", - "RAFB1" - ], + "aliases": ["B-RAF1", "B-raf", "BRAF", "BRAF-1", "BRAF1", "NS7", "RAFB1"], "extensions": [ - { - "type": "Extension", - "name": "gene_normalizer_id", - "value": "hgnc:1097" - } - ] + {"type": "Extension", "name": "gene_normalizer_id", "value": "hgnc:1097"} + ], } @@ -331,21 +311,12 @@ def civic_vid12(): "type": "SequenceReference", }, "start": 599, - "end": 600 - }, - "state": { - "sequence": "E", - "type": "LiteralSequenceExpression" + "end": 600, }, + "state": {"sequence": "E", "type": "LiteralSequenceExpression"}, "expressions": [ - { - "syntax": "hgvs.p", - "value": "NP_004324.2:p.Val600Glu" - }, - { - "syntax": "hgvs.c", - "value": "NM_004333.4:c.1799T>A" - }, + {"syntax": "hgvs.p", "value": "NP_004324.2:p.Val600Glu"}, + {"syntax": "hgvs.c", "value": "NM_004333.4:c.1799T>A"}, { "syntax": "hgvs.c", "value": "ENST00000288602.6:c.1799T>A", @@ -353,8 +324,8 @@ def civic_vid12(): { "syntax": "hgvs.g", "value": "NC_000007.13:g.140453136A>T", - } - ] + }, + ], } @@ -364,7 +335,7 @@ def civic_mpid12(civic_vid12): return { "id": "civic.mpid:12", "type": "ProteinSequenceConsequence", - "description": "BRAF V600E has been shown to be recurrent in many cancer types. It is one of the most widely studied variants in cancer. This variant is correlated with poor prognosis in certain cancer types, including colorectal cancer and papillary thyroid cancer. The targeted therapeutic dabrafenib has been shown to be effective in clinical trials with an array of BRAF mutations and cancer types. Dabrafenib has also shown to be effective when combined with the MEK inhibitor trametinib in colorectal cancer and melanoma. However, in patients with TP53, CDKN2A and KRAS mutations, dabrafenib resistance has been reported. Ipilimumab, regorafenib, vemurafenib, and a number of combination therapies have been successful in treating V600E mutations. However, cetuximab and panitumumab have been largely shown to be ineffective without supplementary treatment.", # noqa: E501 + "description": "BRAF V600E has been shown to be recurrent in many cancer types. It is one of the most widely studied variants in cancer. This variant is correlated with poor prognosis in certain cancer types, including colorectal cancer and papillary thyroid cancer. The targeted therapeutic dabrafenib has been shown to be effective in clinical trials with an array of BRAF mutations and cancer types. Dabrafenib has also shown to be effective when combined with the MEK inhibitor trametinib in colorectal cancer and melanoma. However, in patients with TP53, CDKN2A and KRAS mutations, dabrafenib resistance has been reported. Ipilimumab, regorafenib, vemurafenib, and a number of combination therapies have been successful in treating V600E mutations. However, cetuximab and panitumumab have been largely shown to be ineffective without supplementary treatment.", "label": "BRAF V600E", "definingContext": civic_vid12, "members": [ @@ -379,55 +350,48 @@ def civic_mpid12(civic_vid12): "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", - "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul" + "refgetAccession": "SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", }, "start": 140753335, - "end": 140753336 + "end": 140753336, }, - "state": { - "type": "LiteralSequenceExpression", - "sequence": "T" - } + "state": {"type": "LiteralSequenceExpression", "sequence": "T"}, } ], - "aliases": [ - "VAL600GLU", - "V640E", - "VAL640GLU" - ], + "aliases": ["VAL600GLU", "V640E", "VAL640GLU"], "mappings": [ { "coding": { "code": "CA123643", - "system": "https://reg.clinicalgenome.org/" + "system": "https://reg.clinicalgenome.org/", }, - "relation": "relatedMatch" + "relation": "relatedMatch", }, { "coding": { "code": "13961", - "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/" + "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/", }, - "relation": "relatedMatch" + "relation": "relatedMatch", }, { "coding": { "code": "376069", - "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/" + "system": "https://www.ncbi.nlm.nih.gov/clinvar/variation/", }, - "relation": "relatedMatch" + "relation": "relatedMatch", }, { "coding": { "code": "rs113488022", - "system": "https://www.ncbi.nlm.nih.gov/snp/" + "system": "https://www.ncbi.nlm.nih.gov/snp/", }, - "relation": "relatedMatch" + "relation": "relatedMatch", }, { "coding": {"code": "12", "system": "https://civicdb.org/variants/"}, - "relation": "exactMatch" - } + "relation": "exactMatch", + }, ], "extensions": [ { @@ -441,28 +405,28 @@ def civic_mpid12(civic_vid12): "representative_transcript": "ENST00000288602.6", "ensembl_version": 75, "reference_build": "GRCh37", - "type": "coordinates" + "type": "coordinates", }, - "type": "Extension" + "type": "Extension", }, { "name": "CIViC Molecular Profile Score", "value": 1363.5, - "type": "Extension" + "type": "Extension", }, { "name": "Variant types", "value": [ { "code": "SO:0001583", - "system": "http://www.sequenceontology.org/browser/current_svn/term/", # noqa: E501 + "system": "http://www.sequenceontology.org/browser/current_svn/term/", "label": "missense_variant", - "version": None + "version": None, } ], - "type": "Extension" - } - ] + "type": "Extension", + }, + ], } @@ -482,21 +446,12 @@ def civic_vid33(): "type": "SequenceReference", }, "start": 857, - "end": 858 - }, - "state": { - "sequence": "R", - "type": "LiteralSequenceExpression" + "end": 858, }, + "state": {"sequence": "R", "type": "LiteralSequenceExpression"}, "expressions": [ - { - "syntax": "hgvs.p", - "value": "NP_005219.2:p.Leu858Arg" - }, - { - "syntax": "hgvs.c", - "value": "ENST00000275493.2:c.2573T>G" - }, + {"syntax": "hgvs.p", "value": "NP_005219.2:p.Leu858Arg"}, + {"syntax": "hgvs.c", "value": "ENST00000275493.2:c.2573T>G"}, { "syntax": "hgvs.c", "value": "NM_005228.4:c.2573T>G", @@ -504,8 +459,8 @@ def civic_vid33(): { "syntax": "hgvs.g", "value": "NC_000007.13:g.55259515T>G", - } - ] + }, + ], } @@ -516,26 +471,17 @@ def civic_gid19(): "id": "civic.gid:19", "type": "Gene", "label": "EGFR", - "description": "EGFR is widely recognized for its importance in cancer. Amplification and mutations have been shown to be driving events in many cancer types. Its role in non-small cell lung cancer, glioblastoma and basal-like breast cancers has spurred many research and drug development efforts. Tyrosine kinase inhibitors have shown efficacy in EGFR amplfied tumors, most notably gefitinib and erlotinib. Mutations in EGFR have been shown to confer resistance to these drugs, particularly the variant T790M, which has been functionally characterized as a resistance marker for both of these drugs. The later generation TKI's have seen some success in treating these resistant cases, and targeted sequencing of the EGFR locus has become a common practice in treatment of non-small cell lung cancer. Overproduction of ligands is another possible mechanism of activation of EGFR. ERBB ligands include EGF, TGF-a, AREG, EPG, BTC, HB-EGF, EPR and NRG1-4 (for detailed information please refer to the respective ligand section).", # noqa: E501 + "description": "EGFR is widely recognized for its importance in cancer. Amplification and mutations have been shown to be driving events in many cancer types. Its role in non-small cell lung cancer, glioblastoma and basal-like breast cancers has spurred many research and drug development efforts. Tyrosine kinase inhibitors have shown efficacy in EGFR amplfied tumors, most notably gefitinib and erlotinib. Mutations in EGFR have been shown to confer resistance to these drugs, particularly the variant T790M, which has been functionally characterized as a resistance marker for both of these drugs. The later generation TKI's have seen some success in treating these resistant cases, and targeted sequencing of the EGFR locus has become a common practice in treatment of non-small cell lung cancer. Overproduction of ligands is another possible mechanism of activation of EGFR. ERBB ligands include EGF, TGF-a, AREG, EPG, BTC, HB-EGF, EPR and NRG1-4 (for detailed information please refer to the respective ligand section).", "mappings": [ { "coding": { "code": "ncbigene:1956", - "system": "https://www.ncbi.nlm.nih.gov/gene/" + "system": "https://www.ncbi.nlm.nih.gov/gene/", }, - "relation": "exactMatch" + "relation": "exactMatch", } ], - "aliases": [ - "EGFR", - "ERBB", - "ERBB1", - "ERRP", - "HER1", - "NISBD2", - "PIG61", - "mENA" - ], + "aliases": ["EGFR", "ERBB", "ERBB1", "ERRP", "HER1", "NISBD2", "PIG61", "mENA"], } @@ -550,15 +496,15 @@ def civic_tid146(): { "coding": { "code": "C66940", - "system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=" # noqa: E501 + "system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=", }, - "relation": "exactMatch" + "relation": "exactMatch", } ], "aliases": [ "BIBW2992", "BIBW 2992", - "(2e)-N-(4-(3-Chloro-4-Fluoroanilino)-7-(((3s)-Oxolan-3-yl)Oxy)Quinoxazolin-6-yl)-4-(Dimethylamino)But-2-Enamide" # noqa: E501 + "(2e)-N-(4-(3-Chloro-4-Fluoroanilino)-7-(((3s)-Oxolan-3-yl)Oxy)Quinoxazolin-6-yl)-4-(Dimethylamino)But-2-Enamide", ], "extensions": [ { @@ -574,27 +520,24 @@ def civic_tid146(): "mappings": [ { "coding": {"code": "C2926", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] + ], }, { "id": "hemonc:25316", "type": "Disease", "label": "Non-small cell lung cancer squamous", - } - ] - } + }, + ], + }, }, { "type": "Extension", "name": "therapy_normalizer_data", - "value": { - "normalized_id": "rxcui:1430438", - "label": "afatinib" - } - } - ] + "value": {"normalized_id": "rxcui:1430438", "label": "afatinib"}, + }, + ], } @@ -609,9 +552,9 @@ def civic_did8(): { "coding": { "code": "DOID:3908", - "system": "https://www.disease-ontology.org/" + "system": "https://www.disease-ontology.org/", }, - "relation": "exactMatch" + "relation": "exactMatch", } ], "extensions": [ @@ -621,10 +564,10 @@ def civic_did8(): "value": { "normalized_id": "ncit:C2926", "label": "Lung Non-Small Cell Carcinoma", - "mondo_id": "0005233" - } + "mondo_id": "0005233", + }, } - ] + ], } @@ -635,7 +578,7 @@ def pmid_23982599(): "id": "pmid:23982599", "type": "Document", "label": "Dungo et al., 2013", - "description": "Afatinib: first global approval." + "description": "Afatinib: first global approval.", } @@ -650,9 +593,9 @@ def civic_tid28(): { "coding": { "code": "C1857", - "system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=" # noqa: E501 + "system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=", }, - "relation": "exactMatch" + "relation": "exactMatch", } ], "aliases": [ @@ -665,16 +608,13 @@ def civic_tid28(): "MoAb E7.6.3", "Monoclonal Antibody ABX-EGF", "Monoclonal Antibody E7.6.3", - "Vectibix" + "Vectibix", ], "extensions": [ { "type": "Extension", "name": "therapy_normalizer_data", - "value": { - "normalized_id": "rxcui:263034", - "label": "panitumumab" - } + "value": {"normalized_id": "rxcui:263034", "label": "panitumumab"}, }, { "type": "Extension", @@ -689,9 +629,9 @@ def civic_tid28(): "mappings": [ { "coding": {"code": "C3262", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] + ], }, { "id": "mesh:D015179", @@ -700,14 +640,14 @@ def civic_tid28(): "mappings": [ { "coding": {"code": "C2956", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] - } - ] - } - } - ] + ], + }, + ], + }, + }, + ], } @@ -722,9 +662,9 @@ def civic_tid16(cetuximab_extensions): { "coding": { "code": "C1723", - "system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=" # noqa: E501 + "system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=", }, - "relation": "exactMatch" + "relation": "exactMatch", } ], "aliases": [ @@ -735,9 +675,9 @@ def civic_tid16(cetuximab_extensions): "Chimeric MoAb C225", "Chimeric Monoclonal Antibody C225", "Erbitux", - "IMC-C225" + "IMC-C225", ], - "extensions": cetuximab_extensions + "extensions": cetuximab_extensions, } @@ -752,9 +692,9 @@ def civic_tsg(civic_tid16, civic_tid28): { "type": "Extension", "name": "civic_therapy_interaction_type", - "value": "SUBSTITUTES" + "value": "SUBSTITUTES", } - ] + ], } @@ -769,18 +709,13 @@ def civic_tid483(encorafenib_extensions): { "coding": { "code": "C98283", - "system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=" # noqa: E501 + "system": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=", }, - "relation": "exactMatch" + "relation": "exactMatch", } ], - "aliases": [ - "Braftovi", - "LGX 818", - "LGX-818", - "LGX818" - ], - "extensions": encorafenib_extensions + "aliases": ["Braftovi", "LGX 818", "LGX-818", "LGX818"], + "extensions": encorafenib_extensions, } @@ -795,9 +730,9 @@ def civic_ct(civic_tid483, civic_tid16): { "type": "Extension", "name": "civic_therapy_interaction_type", - "value": "COMBINATION" + "value": "COMBINATION", } - ] + ], } @@ -812,9 +747,9 @@ def civic_did11(): { "coding": { "code": "DOID:9256", - "system": "https://www.disease-ontology.org/" + "system": "https://www.disease-ontology.org/", }, - "relation": "exactMatch" + "relation": "exactMatch", } ], "extensions": [ @@ -824,50 +759,41 @@ def civic_did11(): "value": { "normalized_id": "ncit:C4978", "label": "Malignant Colorectal Neoplasm", - "mondo_id": "0005575" - } + "mondo_id": "0005575", + }, } - ] + ], } @pytest.fixture(scope="session") -def civic_eid816_study( - civic_mpid12, - civic_tsg, - civic_did11, - civic_gid5, - civic_method -): +def civic_eid816_study(civic_mpid12, civic_tsg, civic_did11, civic_gid5, civic_method): """Create CIVIC EID816 study test fixture. Uses TherapeuticSubstituteGroup.""" return { "id": "civic.eid:816", "type": "VariantTherapeuticResponseStudy", - "description": "This meta-analysis of 7 randomized control trials evaluating overall survival (OS) (8 for progression free survival) could not definitely state that survival benefit of anti-EGFR monoclonal antibodies is limited to patients with wild type BRAF. In other words, the authors believe that there is insufficient data to justify the exclusion of anti-EGFR monoclonal antibody therapy for patients with mutant BRAF. In these studies, mutant BRAF specifically meant the V600E mutation.", # noqa: E501 + "description": "This meta-analysis of 7 randomized control trials evaluating overall survival (OS) (8 for progression free survival) could not definitely state that survival benefit of anti-EGFR monoclonal antibodies is limited to patients with wild type BRAF. In other words, the authors believe that there is insufficient data to justify the exclusion of anti-EGFR monoclonal antibody therapy for patients with mutant BRAF. In these studies, mutant BRAF specifically meant the V600E mutation.", "direction": "refutes", "strength": { "code": "e000005", "label": "clinical cohort evidence", - "system": "https://go.osu.edu/evidence-codes" + "system": "https://go.osu.edu/evidence-codes", }, "predicate": "predictsResistanceTo", "variant": civic_mpid12, "therapeutic": civic_tsg, "tumorType": civic_did11, - "qualifiers": { - "alleleOrigin": "somatic", - "geneContext": civic_gid5 - }, + "qualifiers": {"alleleOrigin": "somatic", "geneContext": civic_gid5}, "specifiedBy": civic_method, "isReportedIn": [ { "id": "civic.source:548", "label": "Rowland et al., 2015", - "title": "Meta-analysis of BRAF mutation as a predictive biomarker of benefit from anti-EGFR monoclonal antibody therapy for RAS wild-type metastatic colorectal cancer.", # noqa: E501 + "title": "Meta-analysis of BRAF mutation as a predictive biomarker of benefit from anti-EGFR monoclonal antibody therapy for RAS wild-type metastatic colorectal cancer.", "pmid": 25989278, - "type": "Document" + "type": "Document", } - ] + ], } @@ -883,31 +809,28 @@ def civic_eid9851_study( return { "id": "civic.eid:9851", "type": "VariantTherapeuticResponseStudy", - "description": "The open-label phase 3 BEACON CRC trial included 665 patients with BRAF V600E-mutated metastatic CRC. Patients were randomly assigned in a 1:1:1 ratio to receive encorafenib, binimetinib, and cetuximab (triplet-therapy group); encorafenib and cetuximab (doublet-therapy group); or the investigators\u2019 choice of either cetuximab and irinotecan or cetuximab and FOLFIRI. The median overall survival was 8.4 months (95% CI, 7.5 to 11.0) in the doublet-therapy group and 5.4 months (95% CI, 4.8 to 6.6) in the control group, with a significantly lower risk of death compared to the control group (hazard ratio for death doublet-group vs. control, 0.60; 95% CI, 0.45 to 0.79; P<0.001). The confirmed response rate was 26% (95% CI, 18 to 35) in the triplet-therapy group, 20% in the doublet-therapy group (95% CI 13 to 29) and 2% (95% CI, 0 to 7) in the control group (doublet group vs. control P<0.001). Median PFS was 4.2 months (95% CI, 3.7 to 5.4) in the doublet-therapy group, and 1.5 months (95% CI, 1.5 to 1.7) in the control group (hazard ratio for disease progression doublet-group vs control, 0.40; 95% CI, 0.31 to 0.52, P<0.001).", # noqa: E501 + "description": "The open-label phase 3 BEACON CRC trial included 665 patients with BRAF V600E-mutated metastatic CRC. Patients were randomly assigned in a 1:1:1 ratio to receive encorafenib, binimetinib, and cetuximab (triplet-therapy group); encorafenib and cetuximab (doublet-therapy group); or the investigators\u2019 choice of either cetuximab and irinotecan or cetuximab and FOLFIRI. The median overall survival was 8.4 months (95% CI, 7.5 to 11.0) in the doublet-therapy group and 5.4 months (95% CI, 4.8 to 6.6) in the control group, with a significantly lower risk of death compared to the control group (hazard ratio for death doublet-group vs. control, 0.60; 95% CI, 0.45 to 0.79; P<0.001). The confirmed response rate was 26% (95% CI, 18 to 35) in the triplet-therapy group, 20% in the doublet-therapy group (95% CI 13 to 29) and 2% (95% CI, 0 to 7) in the control group (doublet group vs. control P<0.001). Median PFS was 4.2 months (95% CI, 3.7 to 5.4) in the doublet-therapy group, and 1.5 months (95% CI, 1.5 to 1.7) in the control group (hazard ratio for disease progression doublet-group vs control, 0.40; 95% CI, 0.31 to 0.52, P<0.001).", "direction": "supports", "strength": { "code": "e000001", "label": "authoritative evidence", - "system": "https://go.osu.edu/evidence-codes" + "system": "https://go.osu.edu/evidence-codes", }, "predicate": "predictsSensitivityTo", "variant": civic_mpid12, "therapeutic": civic_ct, "tumorType": civic_did11, - "qualifiers": { - "alleleOrigin": "somatic", - "geneContext": civic_gid5 - }, + "qualifiers": {"alleleOrigin": "somatic", "geneContext": civic_gid5}, "specifiedBy": civic_method, "isReportedIn": [ { "id": "civic.source:3025", "label": "Kopetz et al., 2019", - "title": "Encorafenib, Binimetinib, and Cetuximab in BRAF V600E-Mutated Colorectal Cancer.", # noqa: E501 + "title": "Encorafenib, Binimetinib, and Cetuximab in BRAF V600E-Mutated Colorectal Cancer.", "pmid": 31566309, - "type": "Document" + "type": "Document", } - ] + ], } @@ -916,7 +839,7 @@ def civic_eid1409_statement(): """Create test fixture for CIViC Evidence 1406.""" return { "id": "civic.eid:1409", - "description": "Phase 3 randomized clinical trial comparing vemurafenib with dacarbazine in 675 patients with previously untreated, metastatic melanoma with the BRAF V600E mutation. At 6 months, overall survival was 84% (95% confidence interval [CI], 78 to 89) in the vemurafenib group and 64% (95% CI, 56 to 73) in the dacarbazine group. A relative reduction of 63% in the risk of death and of 74% in the risk of either death or disease progression was observed with vemurafenib as compared with dacarbazine (P<0.001 for both comparisons).", # noqa: E501 + "description": "Phase 3 randomized clinical trial comparing vemurafenib with dacarbazine in 675 patients with previously untreated, metastatic melanoma with the BRAF V600E mutation. At 6 months, overall survival was 84% (95% confidence interval [CI], 78 to 89) in the vemurafenib group and 64% (95% CI, 56 to 73) in the dacarbazine group. A relative reduction of 63% in the risk of death and of 74% in the risk of either death or disease progression was observed with vemurafenib as compared with dacarbazine (P<0.001 for both comparisons).", "direction": "supports", "evidence_level": "civic.evidence_level:A", "proposition": "proposition:wsW_PurZodw_qHg1Iw8iAR1CUQte1CLA", @@ -926,7 +849,7 @@ def civic_eid1409_statement(): "disease_descriptor": "civic.did:206", "method": "method:1", "supported_by": ["pmid:21639808"], - "type": "Statement" + "type": "Statement", } @@ -935,7 +858,7 @@ def civic_aid6_statement(): """Create CIViC AID 6 test fixture.""" return { "id": "civic.aid:6", - "description": "L858R is among the most common sensitizing EGFR mutations in NSCLC, and is assessed via DNA mutational analysis, including Sanger sequencing and next generation sequencing methods. Tyrosine kinase inhibitor afatinib is FDA approved, and is recommended (category 1) by NCCN guidelines along with erlotinib, gefitinib and osimertinib as first line systemic therapy in NSCLC with sensitizing EGFR mutation.", # noqa: E501 + "description": "L858R is among the most common sensitizing EGFR mutations in NSCLC, and is assessed via DNA mutational analysis, including Sanger sequencing and next generation sequencing methods. Tyrosine kinase inhibitor afatinib is FDA approved, and is recommended (category 1) by NCCN guidelines along with erlotinib, gefitinib and osimertinib as first line systemic therapy in NSCLC with sensitizing EGFR mutation.", "direction": "supports", "evidence_level": "amp_asco_cap_2017_level:1A", "proposition": "proposition:Zfp_VG0uvxwteCcJYO6_AJv1KDmJlFjs", @@ -945,12 +868,15 @@ def civic_aid6_statement(): "disease_descriptor": "civic.did:8", "method": "method:2", "supported_by": [ - "document:9WsQBGXOmTFRXBUanTaIec8Gvgg8bsMA", "civic.eid:2997", - "civic.eid:2629", "civic.eid:982", - "civic.eid:968", "civic.eid:883", - "civic.eid:879" + "document:9WsQBGXOmTFRXBUanTaIec8Gvgg8bsMA", + "civic.eid:2997", + "civic.eid:2629", + "civic.eid:982", + "civic.eid:968", + "civic.eid:883", + "civic.eid:879", ], - "type": "Statement" + "type": "Statement", } @@ -960,9 +886,9 @@ def civic_aid6_document(): return { "id": "document:9WsQBGXOmTFRXBUanTaIec8Gvgg8bsMA", "document_id": "https://www.nccn.org/professionals/" - "physician_gls/default.aspx", + "physician_gls/default.aspx", "label": "NCCN Guidelines: Non-Small Cell Lung Cancer version 3.2018", - "type": "Document" + "type": "Document", } @@ -972,7 +898,7 @@ def civic_eid2_statement(): return { "id": "civic.eid:2", "type": "Statement", - "description": "GIST tumors harboring PDGFRA D842V mutation are more likely to be benign than malignant.", # noqa: E501 + "description": "GIST tumors harboring PDGFRA D842V mutation are more likely to be benign than malignant.", "direction": "supports", "evidence_level": "civic.evidence_level:B", "proposition": "proposition:KVuJMXiPm-oK4vvijE9Cakvucayay3jE", @@ -980,7 +906,7 @@ def civic_eid2_statement(): "variation_descriptor": "civic.vid:99", "disease_descriptor": "civic.did:2", "method": "method:1", - "supported_by": ["pmid:15146165"] + "supported_by": ["pmid:15146165"], } @@ -992,7 +918,7 @@ def civic_eid2_proposition(): "type": "diagnostic_proposition", "predicate": "is_diagnostic_exclusion_criterion_for", "subject": "ga4gh:VA.bjWVYvXPaPbIRAfZvE0Uw_P-i36PGkAz", - "object_qualifier": "ncit:C3868" + "object_qualifier": "ncit:C3868", } @@ -1003,7 +929,7 @@ def civic_vid99(): "id": "civic.vid:99", "type": "VariationDescriptor", "label": "D842V", - "description": "PDGFRA D842 mutations are characterized broadly as imatinib resistance mutations. This is most well characterized in gastrointestinal stromal tumors, but other cell lines containing these mutations have been shown to be resistant as well. Exogenous expression of the A842V mutation resulted in constitutive tyrosine phosphorylation of PDGFRA in the absence of ligand in 293T cells and cytokine-independent proliferation of the IL-3-dependent Ba/F3 cell line, both evidence that this is an activating mutation. In imatinib resistant cell lines, a number of other therapeutics have demonstrated efficacy. These include; crenolanib, sirolimus, and midostaurin (PKC412).", # noqa: E501 + "description": "PDGFRA D842 mutations are characterized broadly as imatinib resistance mutations. This is most well characterized in gastrointestinal stromal tumors, but other cell lines containing these mutations have been shown to be resistant as well. Exogenous expression of the A842V mutation resulted in constitutive tyrosine phosphorylation of PDGFRA in the absence of ligand in 293T cells and cytokine-independent proliferation of the IL-3-dependent Ba/F3 cell line, both evidence that this is an activating mutation. In imatinib resistant cell lines, a number of other therapeutics have demonstrated efficacy. These include; crenolanib, sirolimus, and midostaurin (PKC412).", "variation_id": "ga4gh:VA.bjWVYvXPaPbIRAfZvE0Uw_P-i36PGkAz", "variation": { "_id": "ga4gh:VA.bjWVYvXPaPbIRAfZvE0Uw_P-i36PGkAz", @@ -1012,25 +938,16 @@ def civic_vid99(): "interval": { "start": {"value": 841, "type": "Number"}, "end": {"value": 842, "type": "Number"}, - "type": "SequenceInterval" + "type": "SequenceInterval", }, "sequence_id": "ga4gh:SQ.XpQn9sZLGv_GU3uiWO7YHq9-_alGjrVX", - "type": "SequenceLocation" - }, - "state": { - "sequence": "V", - "type": "LiteralSequenceExpression" + "type": "SequenceLocation", }, - "type": "Allele" + "state": {"sequence": "V", "type": "LiteralSequenceExpression"}, + "type": "Allele", }, - "xrefs": [ - "clinvar:13543", - "caid:CA123194", - "dbsnp:121908585" - ], - "alternate_labels": [ - "ASP842VAL" - ], + "xrefs": ["clinvar:13543", "caid:CA123194", "dbsnp:121908585"], + "alternate_labels": ["ASP842VAL"], "extensions": [ { "name": "civic_representative_coordinate", @@ -1042,14 +959,14 @@ def civic_vid99(): "variant_bases": "T", "representative_transcript": "ENST00000257290.5", "ensembl_version": 75, - "reference_build": "GRCh37" + "reference_build": "GRCh37", }, - "type": "Extension" + "type": "Extension", }, { "name": "civic_actionability_score", "value": "100.5", - "type": "Extension" + "type": "Extension", }, { "name": "variant_group", @@ -1057,37 +974,37 @@ def civic_vid99(): { "id": "civic.variant_group:1", "label": "Imatinib Resistance", - "description": "While imatinib has shown to be incredibly successful in treating philadelphia chromosome positive CML, patients that have shown primary or secondary resistance to the drug have been observed to harbor T315I and E255K ABL kinase domain mutations. These mutations, among others, have been observed both in primary refractory disease and acquired resistance. In gastrointestinal stromal tumors (GIST), PDGFRA 842 mutations have also been shown to confer resistance to imatinib. ", # noqa: E501 - "type": "variant_group" + "description": "While imatinib has shown to be incredibly successful in treating philadelphia chromosome positive CML, patients that have shown primary or secondary resistance to the drug have been observed to harbor T315I and E255K ABL kinase domain mutations. These mutations, among others, have been observed both in primary refractory disease and acquired resistance. In gastrointestinal stromal tumors (GIST), PDGFRA 842 mutations have also been shown to confer resistance to imatinib. ", + "type": "variant_group", } ], - "type": "Extension" - } + "type": "Extension", + }, ], "structural_type": "SO:0001583", "expressions": [ { "syntax": "hgvs.c", "value": "NM_006206.4:c.2525A>T", - "type": "Expression" + "type": "Expression", }, { "syntax": "hgvs.p", "value": "NP_006197.1:p.Asp842Val", - "type": "Expression" + "type": "Expression", }, { "syntax": "hgvs.c", "value": "ENST00000257290.5:c.2525A>T", - "type": "Expression" + "type": "Expression", }, { "syntax": "hgvs.g", "value": "NC_000004.11:g.55152093A>T", - "type": "Expression" - } + "type": "Expression", + }, ], - "gene_context": "civic.gid:38" + "gene_context": "civic.gid:38", } @@ -1099,9 +1016,7 @@ def civic_did2(): "type": "DiseaseDescriptor", "label": "Gastrointestinal Stromal Tumor", "disease_id": "ncit:C3868", - "xrefs": [ - "DOID:9253" - ] + "xrefs": ["DOID:9253"], } @@ -1112,17 +1027,10 @@ def civic_gid38(): "id": "civic.gid:38", "type": "GeneDescriptor", "label": "PDGFRA", - "description": "Commonly mutated in GI tract tumors, PDGFR family genes (mutually exclusive to KIT mutations) are a hallmark of gastrointestinal stromal tumors. Gene fusions involving the PDGFRA kinase domain are highly correlated with eosinophilia, and the WHO classifies myeloid and lymphoid neoplasms with these characteristics as a distinct disorder. Mutations in the 842 region of PDGFRA have been often found to confer resistance to the tyrosine kinase inhibitor, imatinib.", # noqa: E501 + "description": "Commonly mutated in GI tract tumors, PDGFR family genes (mutually exclusive to KIT mutations) are a hallmark of gastrointestinal stromal tumors. Gene fusions involving the PDGFRA kinase domain are highly correlated with eosinophilia, and the WHO classifies myeloid and lymphoid neoplasms with these characteristics as a distinct disorder. Mutations in the 842 region of PDGFRA have been often found to confer resistance to the tyrosine kinase inhibitor, imatinib.", "gene_id": "hgnc:8803", - "alternate_labels": [ - "PDGFRA", - "PDGFR2", - "PDGFR-2", - "CD140A" - ], - "xrefs": [ - "ncbigene:5156" - ] + "alternate_labels": ["PDGFRA", "PDGFR2", "PDGFR-2", "CD140A"], + "xrefs": ["ncbigene:5156"], } @@ -1131,7 +1039,7 @@ def civic_eid74_statement(): """Create a test fixture for CIViC EID74 statement.""" return { "id": "civic.eid:74", - "description": "In patients with medullary carcinoma, the presence of RET M918T mutation is associated with increased probability of lymph node metastases.", # noqa: E501 + "description": "In patients with medullary carcinoma, the presence of RET M918T mutation is associated with increased probability of lymph node metastases.", "direction": "supports", "evidence_level": "civic.evidence_level:B", "proposition": "proposition:Vyzbpg-s6mw27yJfYBFxGyQeuEJacP4l", @@ -1140,7 +1048,7 @@ def civic_eid74_statement(): "disease_descriptor": "civic.did:15", "method": "method:1", "supported_by": ["pmid:18073307"], - "type": "Statement" + "type": "Statement", } @@ -1152,7 +1060,7 @@ def civic_eid74_proposition(): "type": "diagnostic_proposition", "predicate": "is_diagnostic_inclusion_criterion_for", "subject": "ga4gh:VA.GweduWrfxV58YnSvUBfHPGOA-KCH_iIl", - "object_qualifier": "ncit:C3879" + "object_qualifier": "ncit:C3879", } @@ -1163,7 +1071,7 @@ def civic_vid113(): "id": "civic.vid:113", "type": "VariationDescriptor", "label": "M918T", - "description": "RET M819T is the most common somatically acquired mutation in medullary thyroid cancer (MTC). While there currently are no RET-specific inhibiting agents, promiscuous kinase inhibitors have seen some success in treating RET overactivity. Data suggests however, that the M918T mutation may lead to drug resistance, especially against the VEGFR-inhibitor motesanib. It has also been suggested that RET M819T leads to more aggressive MTC with a poorer prognosis.", # noqa: E501 + "description": "RET M819T is the most common somatically acquired mutation in medullary thyroid cancer (MTC). While there currently are no RET-specific inhibiting agents, promiscuous kinase inhibitors have seen some success in treating RET overactivity. Data suggests however, that the M918T mutation may lead to drug resistance, especially against the VEGFR-inhibitor motesanib. It has also been suggested that RET M819T leads to more aggressive MTC with a poorer prognosis.", "variation_id": "ga4gh:VA.GweduWrfxV58YnSvUBfHPGOA-KCH_iIl", "variation": { "_id": "ga4gh:VA.GweduWrfxV58YnSvUBfHPGOA-KCH_iIl", @@ -1172,25 +1080,16 @@ def civic_vid113(): "interval": { "end": {"value": 918, "type": "Number"}, "start": {"value": 917, "type": "Number"}, - "type": "SequenceInterval" + "type": "SequenceInterval", }, "sequence_id": "ga4gh:SQ.jMu9-ItXSycQsm4hyABeW_UfSNRXRVnl", - "type": "SequenceLocation" - }, - "state": { - "sequence": "T", - "type": "LiteralSequenceExpression" + "type": "SequenceLocation", }, - "type": "Allele" + "state": {"sequence": "T", "type": "LiteralSequenceExpression"}, + "type": "Allele", }, - "xrefs": [ - "clinvar:13919", - "caid:CA009082", - "dbsnp:74799832" - ], - "alternate_labels": [ - "MET918THR" - ], + "xrefs": ["clinvar:13919", "caid:CA009082", "dbsnp:74799832"], + "alternate_labels": ["MET918THR"], "extensions": [ { "name": "civic_representative_coordinate", @@ -1202,52 +1101,48 @@ def civic_vid113(): "variant_bases": "C", "representative_transcript": "ENST00000355710.3", "ensembl_version": 75, - "reference_build": "GRCh37" + "reference_build": "GRCh37", }, - "type": "Extension" - }, - { - "name": "civic_actionability_score", - "value": "86", - "type": "Extension" + "type": "Extension", }, + {"name": "civic_actionability_score", "value": "86", "type": "Extension"}, { "name": "variant_group", "value": [ { "id": "civic.variant_group:6", "label": "Motesanib Resistance", - "description": "RET activation is a common oncogenic marker of medullary thyroid carcinoma. Treatment of these patients with the targeted therapeutic motesanib has shown to be effective. However, the missense mutations C634W and M918T have shown to confer motesanib resistance in cell lines. ", # noqa: E501 - "type": "variant_group" + "description": "RET activation is a common oncogenic marker of medullary thyroid carcinoma. Treatment of these patients with the targeted therapeutic motesanib has shown to be effective. However, the missense mutations C634W and M918T have shown to confer motesanib resistance in cell lines. ", + "type": "variant_group", } ], - "type": "Extension" - } + "type": "Extension", + }, ], "structural_type": "SO:0001583", "expressions": [ { "syntax": "hgvs.c", "value": "NM_020975.4:c.2753T>C", - "type": "Expression" + "type": "Expression", }, { "syntax": "hgvs.p", "value": "NP_065681.1:p.Met918Thr", - "type": "Expression" + "type": "Expression", }, { "syntax": "hgvs.c", "value": "ENST00000355710.3:c.2753T>C", - "type": "Expression" + "type": "Expression", }, { "syntax": "hgvs.g", "value": "NC_000010.10:g.43617416T>C", - "type": "Expression" - } + "type": "Expression", + }, ], - "gene_context": "civic.gid:42" + "gene_context": "civic.gid:42", } @@ -1259,9 +1154,7 @@ def civic_did15(): "type": "DiseaseDescriptor", "label": "Thyroid Gland Medullary Carcinoma", "disease_id": "ncit:C3879", - "xrefs": [ - "DOID:3973" - ] + "xrefs": ["DOID:3973"], } @@ -1272,7 +1165,7 @@ def civic_gid42(): "id": "civic.gid:42", "type": "GeneDescriptor", "label": "RET", - "description": "RET mutations and the RET fusion RET-PTC lead to activation of this tyrosine kinase receptor and are associated with thyroid cancers. RET point mutations are the most common mutations identified in medullary thyroid cancer (MTC) with germline and somatic mutations in RET associated with hereditary and sporadic forms, respectively. The most common somatic form mutation is M918T (exon 16) and a variety of other mutations effecting exons 10, 11 and 15 have been described. The prognostic significance of these mutations have been hotly debated in the field, however, data suggests that some RET mutation may confer drug resistence. No RET-specific agents are currently clinically available but several promiscuous kinase inhibitors that target RET, among others, have been approved for MTC treatment.", # noqa: E501 + "description": "RET mutations and the RET fusion RET-PTC lead to activation of this tyrosine kinase receptor and are associated with thyroid cancers. RET point mutations are the most common mutations identified in medullary thyroid cancer (MTC) with germline and somatic mutations in RET associated with hereditary and sporadic forms, respectively. The most common somatic form mutation is M918T (exon 16) and a variety of other mutations effecting exons 10, 11 and 15 have been described. The prognostic significance of these mutations have been hotly debated in the field, however, data suggests that some RET mutation may confer drug resistence. No RET-specific agents are currently clinically available but several promiscuous kinase inhibitors that target RET, among others, have been approved for MTC treatment.", "gene_id": "hgnc:9967", "alternate_labels": [ "RET", @@ -1283,11 +1176,9 @@ def civic_gid42(): "MEN2A", "HSCR1", "CDHR16", - "CDHF12" + "CDHF12", ], - "xrefs": [ - "ncbigene:5979" - ] + "xrefs": ["ncbigene:5979"], } @@ -1296,7 +1187,7 @@ def civic_aid9_statement(): """Create a test fixture for CIViC AID9 statement.""" return { "id": "civic.aid:9", - "description": "ACVR1 G328V mutations occur within the kinase domain, leading to activation of downstream signaling. Exclusively seen in high-grade pediatric gliomas, supporting diagnosis of diffuse intrinsic pontine glioma.", # noqa: E501 + "description": "ACVR1 G328V mutations occur within the kinase domain, leading to activation of downstream signaling. Exclusively seen in high-grade pediatric gliomas, supporting diagnosis of diffuse intrinsic pontine glioma.", "direction": "supports", "evidence_level": "amp_asco_cap_2017_level:2C", "proposition": "proposition:Pjri4dU2VaEKcdKtVkoAUJ8bHFXnW2My", @@ -1304,9 +1195,8 @@ def civic_aid9_statement(): "variation_descriptor": "civic.vid:1686", "disease_descriptor": "civic.did:2950", "method": "method:2", - "supported_by": ["civic.eid:4846", - "civic.eid:6955"], - "type": "Statement" + "supported_by": ["civic.eid:4846", "civic.eid:6955"], + "type": "Statement", } @@ -1318,7 +1208,7 @@ def civic_aid9_proposition(): "predicate": "is_diagnostic_inclusion_criterion_for", "subject": "ga4gh:VA.yuvNtv-SpNOzcGsKsNnnK0n026rbfp6T", "object_qualifier": "DOID:0080684", - "type": "diagnostic_proposition" + "type": "diagnostic_proposition", } @@ -1337,25 +1227,16 @@ def civic_vid1686(): "interval": { "end": {"value": 328, "type": "Number"}, "start": {"value": 327, "type": "Number"}, - "type": "SequenceInterval" + "type": "SequenceInterval", }, "sequence_id": "ga4gh:SQ.6CnHhDq_bDCsuIBf0AzxtKq_lXYM7f0m", - "type": "SequenceLocation" - }, - "state": { - "sequence": "V", - "type": "LiteralSequenceExpression" + "type": "SequenceLocation", }, - "type": "Allele" + "state": {"sequence": "V", "type": "LiteralSequenceExpression"}, + "type": "Allele", }, - "xrefs": [ - "clinvar:376363", - "caid:CA16602802", - "dbsnp:387906589" - ], - "alternate_labels": [ - "GLY328VAL" - ], + "xrefs": ["clinvar:376363", "caid:CA16602802", "dbsnp:387906589"], + "alternate_labels": ["GLY328VAL"], "extensions": [ { "name": "civic_representative_coordinate", @@ -1367,51 +1248,43 @@ def civic_vid1686(): "variant_bases": "A", "representative_transcript": "ENST00000434821.1", "ensembl_version": 75, - "reference_build": "GRCh37" + "reference_build": "GRCh37", }, - "type": "Extension" - }, - { - "name": "civic_actionability_score", - "value": "30", - "type": "Extension" + "type": "Extension", }, + {"name": "civic_actionability_score", "value": "30", "type": "Extension"}, { "name": "variant_group", "value": [ { "id": "civic.variant_group:23", "label": "ACVR1 kinase domain mutation", - "type": "variant_group" + "type": "variant_group", } ], - "type": "Extension" - } + "type": "Extension", + }, ], "structural_type": "SO:0001583", "expressions": [ - { - "syntax": "hgvs.c", - "value": "NM_001105.4:c.983G>T", - "type": "Expression" - }, + {"syntax": "hgvs.c", "value": "NM_001105.4:c.983G>T", "type": "Expression"}, { "syntax": "hgvs.p", "value": "NP_001096.1:p.Gly328Val", - "type": "Expression" + "type": "Expression", }, { "syntax": "hgvs.g", "value": "NC_000002.11:g.158622516C>A", - "type": "Expression" + "type": "Expression", }, { "syntax": "hgvs.c", "value": "ENST00000434821.1:c.983G>T", - "type": "Expression" - } + "type": "Expression", + }, ], - "gene_context": "civic.gid:154" + "gene_context": "civic.gid:154", } @@ -1423,9 +1296,7 @@ def civic_did2950(): "type": "DiseaseDescriptor", "label": "Diffuse Midline Glioma, H3 K27M-mutant", "disease_id": "DOID:0080684", - "xrefs": [ - "DOID:0080684" - ] + "xrefs": ["DOID:0080684"], } @@ -1445,11 +1316,9 @@ def civic_gid154(): "ALK2", "ACVRLK2", "ACVR1A", - "ACTRI" + "ACTRI", ], - "xrefs": [ - "ncbigene:90" - ] + "xrefs": ["ncbigene:90"], } @@ -1458,7 +1327,7 @@ def civic_eid26_statement(): """Create a test fixture for CIViC EID26 statement.""" return { "id": "civic.eid:26", - "description": "In acute myloid leukemia patients, D816 mutation is associated with earlier relapse and poorer prognosis than wildtype KIT.", # noqa: E501 + "description": "In acute myloid leukemia patients, D816 mutation is associated with earlier relapse and poorer prognosis than wildtype KIT.", "direction": "supports", "evidence_level": "civic.evidence_level:B", "proposition": "proposition:_HXqJtIo6MSmwagQUSOot4wdKE7O4DyN", @@ -1467,7 +1336,7 @@ def civic_eid26_statement(): "disease_descriptor": "civic.did:3", "method": "method:1", "supported_by": ["pmid:16384925"], - "type": "Statement" + "type": "Statement", } @@ -1479,7 +1348,7 @@ def civic_eid26_proposition(): "predicate": "is_prognostic_of_worse_outcome_for", "subject": "ga4gh:VA.QSLb0bR-CRIFfKIENdHhcuUZwW3IS1aP", "object_qualifier": "ncit:C3171", - "type": "prognostic_proposition" + "type": "prognostic_proposition", } @@ -1490,7 +1359,7 @@ def civic_vid65(): "id": "civic.vid:65", "type": "VariationDescriptor", "label": "D816V", - "description": "KIT D816V is a mutation observed in acute myeloid leukemia (AML). This variant has been linked to poorer prognosis and worse outcome in AML patients.", # noqa: E501 + "description": "KIT D816V is a mutation observed in acute myeloid leukemia (AML). This variant has been linked to poorer prognosis and worse outcome in AML patients.", "variation_id": "ga4gh:VA.QSLb0bR-CRIFfKIENdHhcuUZwW3IS1aP", "variation": { "_id": "ga4gh:VA.QSLb0bR-CRIFfKIENdHhcuUZwW3IS1aP", @@ -1499,25 +1368,16 @@ def civic_vid65(): "interval": { "end": {"value": 820, "type": "Number"}, "start": {"value": 819, "type": "Number"}, - "type": "SequenceInterval" + "type": "SequenceInterval", }, "sequence_id": "ga4gh:SQ.TcMVFj5kDODDWpiy1d_1-3_gOf4BYaAB", - "type": "SequenceLocation" - }, - "state": { - "sequence": "V", - "type": "LiteralSequenceExpression" + "type": "SequenceLocation", }, - "type": "Allele" + "state": {"sequence": "V", "type": "LiteralSequenceExpression"}, + "type": "Allele", }, - "xrefs": [ - "clinvar:13852", - "caid:CA123513", - "dbsnp:121913507" - ], - "alternate_labels": [ - "ASP816VAL" - ], + "xrefs": ["clinvar:13852", "caid:CA123513", "dbsnp:121913507"], + "alternate_labels": ["ASP816VAL"], "extensions": [ { "name": "civic_representative_coordinate", @@ -1529,51 +1389,47 @@ def civic_vid65(): "variant_bases": "T", "representative_transcript": "ENST00000288135.5", "ensembl_version": 75, - "reference_build": "GRCh37" + "reference_build": "GRCh37", }, - "type": "Extension" - }, - { - "name": "civic_actionability_score", - "value": "67", - "type": "Extension" + "type": "Extension", }, + {"name": "civic_actionability_score", "value": "67", "type": "Extension"}, { "name": "variant_group", "value": [ { "id": "civic.variant_group:2", "label": "KIT Exon 17", - "type": "variant_group" + "type": "variant_group", } ], - "type": "Extension" - } + "type": "Extension", + }, ], "structural_type": "SO:0001583", "expressions": [ { "syntax": "hgvs.c", "value": "NM_000222.2:c.2447A>T", - "type": "Expression" + "type": "Expression", }, { "syntax": "hgvs.p", "value": "NP_000213.1:p.Asp816Val", - "type": "Expression" + "type": "Expression", }, { "syntax": "hgvs.c", "value": "ENST00000288135.5:c.2447A>T", - "type": "Expression" + "type": "Expression", }, { "syntax": "hgvs.g", "value": "NC_000004.11:g.55599321A>T", - "type": "Expression" - } + "type": "Expression", + }, ], - "gene_context": "civic.gid:29" + "gene_context": "civic.gid:29", } @@ -1585,9 +1441,7 @@ def civic_did3(): "type": "DiseaseDescriptor", "label": "Acute Myeloid Leukemia", "disease_id": "ncit:C3171", - "xrefs": [ - "DOID:9119" - ] + "xrefs": ["DOID:9119"], } @@ -1598,19 +1452,10 @@ def civic_gid29(): "id": "civic.gid:29", "type": "GeneDescriptor", "label": "KIT", - "description": "c-KIT activation has been shown to have oncogenic activity in gastrointestinal stromal tumors (GISTs), melanomas, lung cancer, and other tumor types. The targeted therapeutics nilotinib and sunitinib have shown efficacy in treating KIT overactive patients, and are in late-stage trials in melanoma and GIST. KIT overactivity can be the result of many genomic events from genomic amplification to overexpression to missense mutations. Missense mutations have been shown to be key players in mediating clinical response and acquired resistance in patients being treated with these targeted therapeutics.", # noqa: E501 + "description": "c-KIT activation has been shown to have oncogenic activity in gastrointestinal stromal tumors (GISTs), melanomas, lung cancer, and other tumor types. The targeted therapeutics nilotinib and sunitinib have shown efficacy in treating KIT overactive patients, and are in late-stage trials in melanoma and GIST. KIT overactivity can be the result of many genomic events from genomic amplification to overexpression to missense mutations. Missense mutations have been shown to be key players in mediating clinical response and acquired resistance in patients being treated with these targeted therapeutics.", "gene_id": "hgnc:6342", - "alternate_labels": [ - "MASTC", - "KIT", - "SCFR", - "PBT", - "CD117", - "C-Kit" - ], - "xrefs": [ - "ncbigene:3815" - ] + "alternate_labels": ["MASTC", "KIT", "SCFR", "PBT", "CD117", "C-Kit"], + "xrefs": ["ncbigene:3815"], } @@ -1619,7 +1464,7 @@ def civic_eid1756_statement(): """Create test fixture for CIViC EID1756 statement.""" return { "id": "civic.eid:1756", - "description": "Study of 1817 PCa cases and 2026 cancer free controls to clarify the association of (MTHFR)c.677C>T (and c.1298A>C ) of pancreatic cancer risk in a population of Han Chinese in Shanghai. Results indicated a lower risk for the heterozygous CT genotype and homozygous TT genotype carriers of (MTHFR)c.677C>T which had a significantly lower risk of developing pancreatic cancer compared with the wild-type CC genotype.", # noqa: E501 + "description": "Study of 1817 PCa cases and 2026 cancer free controls to clarify the association of (MTHFR)c.677C>T (and c.1298A>C ) of pancreatic cancer risk in a population of Han Chinese in Shanghai. Results indicated a lower risk for the heterozygous CT genotype and homozygous TT genotype carriers of (MTHFR)c.677C>T which had a significantly lower risk of developing pancreatic cancer compared with the wild-type CC genotype.", "direction": "supports", "evidence_level": "civic.evidence_level:B", "proposition": "proposition:cDLAt3AJPrHQPQ--JpKU4MkU528_kE-a", @@ -1628,7 +1473,7 @@ def civic_eid1756_statement(): "disease_descriptor": "civic.did:556", "method": "method:1", "supported_by": ["pmid:27819322"], - "type": "Statement" + "type": "Statement", } @@ -1640,7 +1485,7 @@ def civic_eid1756_proposition(): "predicate": "is_prognostic_of_better_outcome_for", "subject": "ga4gh:VA.Nq7ozfH2X6m1PGr_n38E-F0NZ7I9UASP", "object_qualifier": "ncit:C9005", - "type": "prognostic_proposition" + "type": "prognostic_proposition", } @@ -1659,26 +1504,16 @@ def civic_vid258(): "interval": { "end": {"value": 222, "type": "Number"}, "start": {"value": 221, "type": "Number"}, - "type": "SequenceInterval" + "type": "SequenceInterval", }, "sequence_id": "ga4gh:SQ.4RSETawLfMkNpQBPepa7Uf9ItHAEJUde", - "type": "SequenceLocation" - }, - "state": { - "sequence": "V", - "type": "LiteralSequenceExpression" + "type": "SequenceLocation", }, - "type": "Allele" + "state": {"sequence": "V", "type": "LiteralSequenceExpression"}, + "type": "Allele", }, - "xrefs": [ - "clinvar:3520", - "caid:CA170990", - "dbsnp:1801133" - ], - "alternate_labels": [ - "C677T", - "ALA222VAL" - ], + "xrefs": ["clinvar:3520", "caid:CA170990", "dbsnp:1801133"], + "alternate_labels": ["C677T", "ALA222VAL"], "extensions": [ { "name": "civic_representative_coordinate", @@ -1690,40 +1525,32 @@ def civic_vid258(): "variant_bases": "A", "representative_transcript": "ENST00000376592.1", "ensembl_version": 75, - "reference_build": "GRCh37" + "reference_build": "GRCh37", }, - "type": "Extension" + "type": "Extension", }, - { - "name": "civic_actionability_score", - "value": "55", - "type": "Extension" - } + {"name": "civic_actionability_score", "value": "55", "type": "Extension"}, ], "structural_type": "SO:0001583", "expressions": [ - { - "syntax": "hgvs.c", - "value": "NM_005957.4:c.665C>T", - "type": "Expression" - }, + {"syntax": "hgvs.c", "value": "NM_005957.4:c.665C>T", "type": "Expression"}, { "syntax": "hgvs.p", "value": "NP_005948.3:p.Ala222Val", - "type": "Expression" + "type": "Expression", }, { "syntax": "hgvs.c", "value": "ENST00000376592.1:c.665G>A", - "type": "Expression" + "type": "Expression", }, { "syntax": "hgvs.g", "value": "NC_000001.10:g.11856378G>A", - "type": "Expression" - } + "type": "Expression", + }, ], - "gene_context": "civic.gid:3672" + "gene_context": "civic.gid:3672", } @@ -1735,9 +1562,7 @@ def civic_did556(): "type": "DiseaseDescriptor", "label": "Pancreatic Cancer", "disease_id": "ncit:C9005", - "xrefs": [ - "DOID:1793" - ] + "xrefs": ["DOID:1793"], } @@ -1749,12 +1574,8 @@ def civic_gid3672(): "type": "GeneDescriptor", "label": "MTHFR", "gene_id": "hgnc:7436", - "alternate_labels": [ - "MTHFR" - ], - "xrefs": [ - "ncbigene:4524" - ] + "alternate_labels": ["MTHFR"], + "xrefs": ["ncbigene:4524"], } @@ -1765,7 +1586,7 @@ def pmid_15146165(): "id": "pmid:15146165", "label": "Lasota et al., 2004, Lab. Invest.", "type": "Document", - "description": "A great majority of GISTs with PDGFRA mutations represent gastric tumors of low or no malignant potential." # noqa: E501 + "description": "A great majority of GISTs with PDGFRA mutations represent gastric tumors of low or no malignant potential.", } @@ -1776,7 +1597,7 @@ def pmid_18073307(): "type": "Document", "id": "pmid:18073307", "label": "Elisei et al., 2008, J. Clin. Endocrinol. Metab.", - "description": "Prognostic significance of somatic RET oncogene mutations in sporadic medullary thyroid cancer: a 10-year follow-up study." # noqa: E501 + "description": "Prognostic significance of somatic RET oncogene mutations in sporadic medullary thyroid cancer: a 10-year follow-up study.", } @@ -1786,8 +1607,8 @@ def pmid_16384925(): return { "id": "pmid:16384925", "label": "Cairoli et al., 2006, Blood", - "description": "Prognostic impact of c-KIT mutations in core binding factor leukemias: an Italian retrospective study.", # noqa: E501 - "type": "Document" + "description": "Prognostic impact of c-KIT mutations in core binding factor leukemias: an Italian retrospective study.", + "type": "Document", } @@ -1798,37 +1619,38 @@ def pmid_27819322(): "type": "Document", "id": "pmid:27819322", "label": "Wu et al., 2016, Sci Rep", - "description": "MTHFR c.677C>T Inhibits Cell Proliferation and Decreases Prostate Cancer Susceptibility in the Han Chinese Population in Shanghai.", # noqa: E501 - "xrefs": ["pmc:PMC5098242"] + "description": "MTHFR c.677C>T Inhibits Cell Proliferation and Decreases Prostate Cancer Susceptibility in the Han Chinese Population in Shanghai.", + "xrefs": ["pmc:PMC5098242"], } @pytest.fixture(scope="session") def moa_aid66_study( - moa_vid66, moa_abl1, moa_imatinib, moa_chronic_myelogenous_leukemia, moa_method, - moa_source44 + moa_vid66, + moa_abl1, + moa_imatinib, + moa_chronic_myelogenous_leukemia, + moa_method, + moa_source44, ): """Create a Variant Therapeutic Response Study test fixture for MOA Assertion 66.""" return { "id": "moa.assertion:66", - "description": "T315I mutant ABL1 in p210 BCR-ABL cells resulted in retained high levels of phosphotyrosine at increasing concentrations of inhibitor STI-571, whereas wildtype appropriately received inhibition.", # noqa: E501 + "description": "T315I mutant ABL1 in p210 BCR-ABL cells resulted in retained high levels of phosphotyrosine at increasing concentrations of inhibitor STI-571, whereas wildtype appropriately received inhibition.", "direction": "none", "strength": { "code": "e000009", "label": "preclinical evidence", - "system": "https://go.osu.edu/evidence-codes" + "system": "https://go.osu.edu/evidence-codes", }, "predicate": "predictsResistanceTo", "variant": moa_vid66, "therapeutic": moa_imatinib, "tumorType": moa_chronic_myelogenous_leukemia, - "qualifiers": { - "alleleOrigin": "somatic", - "geneContext": moa_abl1 - }, + "qualifiers": {"alleleOrigin": "somatic", "geneContext": moa_abl1}, "specifiedBy": moa_method, "isReportedIn": [moa_source44], - "type": "VariantTherapeuticResponseStudy" + "type": "VariantTherapeuticResponseStudy", } @@ -1849,15 +1671,12 @@ def moa_vid66(): "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", - "refgetAccession": "SQ.dmFigTG-0fY6I54swb7PoDuxCeT6O3Wg" + "refgetAccession": "SQ.dmFigTG-0fY6I54swb7PoDuxCeT6O3Wg", }, "start": 314, - "end": 315 + "end": 315, }, - "state": { - "type": "LiteralSequenceExpression", - "sequence": "I" - } + "state": {"type": "LiteralSequenceExpression", "sequence": "I"}, }, "extensions": [ { @@ -1870,26 +1689,26 @@ def moa_vid66(): "alternate_allele": "T", "cdna_change": "c.944C>T", "protein_change": "p.T315I", - "exon": "5" + "exon": "5", }, - "type": "Extension" + "type": "Extension", } ], "mappings": [ { "coding": { "system": "https://moalmanac.org/api/features/", - "code": "66" + "code": "66", }, - "relation": "exactMatch" + "relation": "exactMatch", }, { "coding": { "system": "https://www.ncbi.nlm.nih.gov/snp/", - "code": "rs121913459" + "code": "rs121913459", }, - "relation": "relatedMatch" - } + "relation": "relatedMatch", + }, ], } @@ -1902,12 +1721,8 @@ def moa_abl1(): "type": "Gene", "label": "ABL1", "extensions": [ - { - "type": "Extension", - "name": "gene_normalizer_id", - "value": "hgnc:76" - } - ] + {"type": "Extension", "name": "gene_normalizer_id", "value": "hgnc:76"} + ], } @@ -1932,9 +1747,9 @@ def moa_imatinib(): "mappings": [ { "coding": {"code": "C9235", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] + ], }, { "id": "hemonc:582", @@ -1943,9 +1758,9 @@ def moa_imatinib(): "mappings": [ { "coding": {"code": "C3174", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] + ], }, { "id": "hemonc:24309", @@ -1954,9 +1769,9 @@ def moa_imatinib(): "mappings": [ { "coding": {"code": "C3167", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] + ], }, { "id": "hemonc:634", @@ -1965,9 +1780,9 @@ def moa_imatinib(): "mappings": [ { "coding": {"code": "C3247", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] + ], }, { "id": "hemonc:602", @@ -1976,14 +1791,14 @@ def moa_imatinib(): "mappings": [ { "coding": {"code": "C3868", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] + ], }, { "id": "hemonc:33893", "type": "Disease", - "label": "Chronic myelogenous leukemia pediatric" + "label": "Chronic myelogenous leukemia pediatric", }, { "id": "hemonc:667", @@ -1992,9 +1807,9 @@ def moa_imatinib(): "mappings": [ { "coding": {"code": "C9306", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] + ], }, { "id": "hemonc:616", @@ -2003,22 +1818,19 @@ def moa_imatinib(): "mappings": [ { "coding": {"code": "C27038", "system": "ncit"}, - "relation": "relatedMatch" + "relation": "relatedMatch", } - ] - } - ] - } + ], + }, + ], + }, }, { "type": "Extension", "name": "therapy_normalizer_data", - "value": { - "normalized_id": "rxcui:282388", - "label": "imatinib" - } - } - ] + "value": {"normalized_id": "rxcui:282388", "label": "imatinib"}, + }, + ], } @@ -2036,8 +1848,8 @@ def moa_chronic_myelogenous_leukemia(): "value": { "normalized_id": "ncit:C3174", "label": "Chronic Myelogenous Leukemia, BCR-ABL1 Positive", - "mondo_id": "0011996" - } + "mondo_id": "0011996", + }, } ], "mappings": [ @@ -2045,11 +1857,11 @@ def moa_chronic_myelogenous_leukemia(): "coding": { "label": "Chronic Myelogenous Leukemia", "system": "https://oncotree.mskcc.org/", - "code": "CML" + "code": "CML", }, - "relation": "exactMatch" + "relation": "exactMatch", } - ] + ], } @@ -2061,11 +1873,11 @@ def civic_method(): "label": "CIViC Curation SOP (2019)", "isReportedIn": { "label": "Danos et al., 2019, Genome Med.", - "title": "Standard operating procedure for curation and clinical interpretation of variants in cancer", # noqa: E501 + "title": "Standard operating procedure for curation and clinical interpretation of variants in cancer", "doi": "10.1186/s13073-019-0687-x", - "pmid": 31779674 + "pmid": 31779674, }, - "type": "Method" + "type": "Method", } @@ -2077,11 +1889,11 @@ def moa_method(): "label": "MOAlmanac (2021)", "isReportedIn": { "label": "Reardon, B., Moore, N.D., Moore, N.S. et al.", - "title": "Integrating molecular profiles into clinical frameworks through the Molecular Oncology Almanac to prospectively guide precision oncology", # noqa: E501 + "title": "Integrating molecular profiles into clinical frameworks through the Molecular Oncology Almanac to prospectively guide precision oncology", "doi": "10.1038/s43018-021-00243-3", - "pmid": 35121878 + "pmid": 35121878, }, - "type": "Method" + "type": "Method", } @@ -2090,14 +1902,11 @@ def method3(): """Create test fixture for method:3.""" return { "id": "method:3", - "label": "Standards and guidelines for the interpretation of sequence variants: a joint consensus recommendation of the American College of Medical Genetics and Genomics and the Association for Molecular Pathology", # noqa: E501 + "label": "Standards and guidelines for the interpretation of sequence variants: a joint consensus recommendation of the American College of Medical Genetics and Genomics and the Association for Molecular Pathology", "url": "https://pubmed.ncbi.nlm.nih.gov/25741868/", - "version": { - "year": 2015, - "month": 5 - }, + "version": {"year": 2015, "month": 5}, "type": "Method", - "authors": "Richards S, Aziz N, Bale S, et al." + "authors": "Richards S, Aziz N, Bale S, et al.", } @@ -2106,15 +1915,11 @@ def method4(): """Create a test fixture for MOA method:4.""" return { "id": "method:4", - "label": "Clinical interpretation of integrative molecular profiles to guide precision cancer medicine", # noqa: E501 + "label": "Clinical interpretation of integrative molecular profiles to guide precision cancer medicine", "url": "https://www.biorxiv.org/content/10.1101/2020.09.22.308833v1", "type": "Method", - "version": { - "year": 2020, - "month": 9, - "day": 22 - }, - "authors": "Reardon, B., Moore, N.D., Moore, N. et al." + "version": {"year": 2020, "month": 9, "day": 22}, + "authors": "Reardon, B., Moore, N.D., Moore, N. et al.", } @@ -2130,17 +1935,13 @@ def moa_source44(): return { "id": "moa.source:44", "extensions": [ - { - "type": "Extension", - "name": "source_type", - "value": "Journal" - } + {"type": "Extension", "name": "source_type", "value": "Journal"} ], "type": "Document", - "title": "Gorre, Mercedes E., et al. Clinical resistance to STI-571 cancer therapy caused by BCR-ABL gene mutation or amplification. Science 293.5531 (2001): 876-880.", # noqa: E501 + "title": "Gorre, Mercedes E., et al. Clinical resistance to STI-571 cancer therapy caused by BCR-ABL gene mutation or amplification. Science 293.5531 (2001): 876-880.", "url": "https://doi.org/10.1126/science.1062538", "doi": "10.1126/science.1062538", - "pmid": 11423618 + "pmid": 11423618, } @@ -2165,34 +1966,39 @@ def _dict_check(expected_d: dict, actual_d: dict, is_cdm: bool = False) -> None: if isinstance(v, dict): if v.get("name") in { "therapy_normalizer_data", - "disease_normalizer_data" + "disease_normalizer_data", }: updated_ext = v.copy() - normalizer_data_type = v["name"].split("_normalizer_data")[0] # noqa: E501 - updated_ext["name"] = f"{normalizer_data_type}_normalizer_id" # noqa: E501 + normalizer_data_type = v["name"].split("_normalizer_data")[ + 0 + ] + updated_ext[ + "name" + ] = f"{normalizer_data_type}_normalizer_id" updated_ext["value"] = v["value"]["normalized_id"] expected_l.append(json.dumps(updated_ext, sort_keys=True)) continue - else: - new_extensions = [] - extensions = v.get("extensions") or [] - for ext in extensions: - if ext.get("name") in { - "therapy_normalizer_data", - "disease_normalizer_data" - }: - normalizer_data_type = ext["name"].split("_normalizer_data")[0] # noqa: E501 - new_extensions.append( - { - "name": f"{normalizer_data_type}_normalizer_id", # noqa: E501 - "type": "Extension", - "value": ext["value"]["normalized_id"] - } - ) - else: - new_extensions.append(ext) - if extensions: - v["extensions"] = new_extensions + new_extensions = [] + extensions = v.get("extensions") or [] + for ext in extensions: + if ext.get("name") in { + "therapy_normalizer_data", + "disease_normalizer_data", + }: + normalizer_data_type = ext["name"].split( + "_normalizer_data" + )[0] + new_extensions.append( + { + "name": f"{normalizer_data_type}_normalizer_id", + "type": "Extension", + "value": ext["value"]["normalized_id"], + } + ) + else: + new_extensions.append(ext) + if extensions: + v["extensions"] = new_extensions expected_l.append(json.dumps(v, sort_keys=True)) assert set(actual_l) == set(expected_l), k else: @@ -2208,6 +2014,7 @@ def assertion_checks(): :param is_cdm: Whether checks are for transformers (CDM) or query handler. CDM have extra fields that are not exposed to the query handler """ + def _check(actual_data: list, test_data: list, is_cdm: bool = False) -> None: assert len(actual_data) == len(test_data) for expected in test_data: @@ -2221,18 +2028,19 @@ def _check(actual_data: list, test_data: list, is_cdm: bool = False) -> None: continue assert found_match, f"Did not find {expected['id']} in response" + return _check @pytest.fixture(scope="session") def check_transformed_cdm(assertion_checks): """Test fixture to compare CDM transformations.""" - def check_transformed_cdm( - data, studies, transformed_file - ): + + def check_transformed_cdm(data, studies, transformed_file): """Test that transform to CDM works correctly.""" assertion_checks(data["studies"], studies, is_cdm=True) - os.remove(transformed_file) + transformed_file.unlink() + return check_transformed_cdm diff --git a/tests/unit/database/test_database.py b/tests/unit/database/test_database.py index 5dbdbfee..c646346a 100644 --- a/tests/unit/database/test_database.py +++ b/tests/unit/database/test_database.py @@ -26,34 +26,39 @@ def graph(): @pytest.fixture(scope="module") def get_node_by_id(graph: Graph): """Return node by its ID""" + def _get_node(node_id: str): query = f"MATCH (n {{id: '{node_id}'}}) RETURN (n)" with graph.driver.session() as s: record = s.run(query).single(strict=True) return record[0] + return _get_node @pytest.fixture(scope="module") def check_unique_property(graph: Graph): """Verify that nodes satisfy uniqueness property""" - def _check_function(label: str, property: str): + + def _check_function(label: str, prop: str): query = f""" MATCH (x:{label}) - WITH x.{property} AS {property}, COUNT(x) AS x_count + WITH x.{prop} AS {prop}, COUNT(x) AS x_count WHERE x_count > 1 - RETURN COUNT({property}) + RETURN COUNT({prop}) """ with graph.driver.session() as s: record = s.run(query).single() assert record.values()[0] == 0 + return _check_function @pytest.fixture(scope="module") def get_node_labels(graph: Graph): """Get node labels""" + def _get_labels_function(parent_label: str): query = f""" MATCH (n:{parent_label}) @@ -62,20 +67,20 @@ def _get_labels_function(parent_label: str): with graph.driver.session() as s: record = s.run(query).single() return record.values()[0] + return _get_labels_function @pytest.fixture(scope="module") def check_node_labels(get_node_labels: callable): """Check node labels match expected""" + def _check_function( - node_label: str, - expected: List[Set[str]], - expected_num_labels: int + node_label: str, expected: List[Set[str]], expected_num_labels: int ): node_labels = get_node_labels(node_label) assert len(node_labels) == expected_num_labels - node_labels_set = list(set(x) for x in node_labels) + node_labels_set = [set(x) for x in node_labels] for e in expected: assert e in node_labels_set @@ -85,6 +90,7 @@ def _check_function( @pytest.fixture(scope="module") def check_study_relation(graph: Graph): """Check that node is used in a study.""" + def _check_function(value_label: str): query = f""" MATCH (d:{value_label}) @@ -96,6 +102,7 @@ def _check_function(value_label: str): with graph.driver.session() as s: record = s.run(query).single() assert record.values()[0] == 0 + return _check_function @@ -104,9 +111,15 @@ def check_relation_count(graph: Graph): """Check that the quantity of relationships from one Node type to another are within a certain range. """ - def _check_function(self_label: str, other_label: str, relation: str, - min: int = 1, max: Optional[int] = 1, - direction: Optional[str] = "out"): + + def _check_function( + self_label: str, + other_label: str, + relation: str, + min_rels: int = 1, + max_rels: Optional[int] = 1, + direction: Optional[str] = "out", + ): if direction == "out": rel_query = f"-[:{relation}]->" elif direction == "in": @@ -114,24 +127,27 @@ def _check_function(self_label: str, other_label: str, relation: str, elif direction is None: rel_query = f"-[:{relation}]-" else: - raise ValueError("direction must be 'out', 'in' or None") + msg = "direction must be 'out', 'in' or None" + raise ValueError(msg) query = f""" MATCH (s:{self_label}) OPTIONAL MATCH (s){rel_query}(d:{other_label}) WITH s, COUNT(d) as d_count - WHERE d_count < {min} - {f"OR d_count > {max}" if max is not None else ""} + WHERE d_count < {min_rels} + {f"OR d_count > {max_rels}" if max_rels is not None else ""} RETURN COUNT(s) """ with graph.driver.session() as s: record = s.run(query).single() assert record.values()[0] == 0 + return _check_function @pytest.fixture(scope="module") def check_extension_props(): """Check that node extension properties match expected""" + def _check_function( node: Node, fixture_extensions: List[Dict], ext_names: Set[str] ): @@ -149,6 +165,7 @@ def _check_function( assert node[ext["name"]] == ext["value"] checked.add(ext["name"]) assert checked == ext_names + return _check_function @@ -157,10 +174,15 @@ def check_node_props(): """Check that node properties match expected. For extensions, use `check_extension_props` """ + def _check_function( - node: Node, fixture: Dict, expected_keys: Set[str], - extension_names: Set[str] = set() + node: Node, + fixture: Dict, + expected_keys: Set[str], + extension_names: Optional[Set[str]] = None, ): + if extension_names is None: + extension_names = set() assert node.keys() == expected_keys for k in expected_keys - extension_names: if k == "mappings": @@ -169,6 +191,7 @@ def _check_function( assert set(node[k]) == set(fixture[k]) else: assert node[k] == fixture[k] + return _check_function @@ -179,7 +202,7 @@ def test_gene_rules( get_node_by_id, civic_gid5, check_node_props, - check_extension_props + check_extension_props, ): """Verify property and relationship rules for Gene nodes.""" check_unique_property("Gene", "id") @@ -194,29 +217,43 @@ def test_gene_rules( extension_names = {"gene_normalizer_id"} check_extension_props(gene, civic_gid5["extensions"], extension_names) expected_keys = { - "gene_normalizer_id", "label", "id", "description", "mappings", "type", - "aliases" + "gene_normalizer_id", + "label", + "id", + "description", + "mappings", + "type", + "aliases", } check_node_props(gene, civic_gid5, expected_keys, extension_names) def test_variation_rules( - graph, check_unique_property, + graph, + check_unique_property, check_relation_count, get_node_by_id, check_node_labels, - civic_vid12 + civic_vid12, ): """Verify property and relationship rules for Variation nodes.""" check_unique_property("Variation", "id") # members dont have defining context check_relation_count( - "Variation", "CategoricalVariation", "HAS_DEFINING_CONTEXT", direction="in", - min=0, max=None + "Variation", + "CategoricalVariation", + "HAS_DEFINING_CONTEXT", + direction="in", + min=0, + max=None, ) check_relation_count( - "Variation", "CategoricalVariation", "HAS_MEMBERS", min=0, max=None, - direction="in" + "Variation", + "CategoricalVariation", + "HAS_MEMBERS", + min=0, + max=None, + direction="in", ) expected_labels = [{"Variation", "Allele"}] @@ -238,8 +275,14 @@ def test_variation_rules( v = get_node_by_id(civic_vid12["id"]) assert set(v.keys()) == { - "id", "label", "digest", "state", "expression_hgvs_p", "expression_hgvs_c", - "expression_hgvs_g", "type" + "id", + "label", + "digest", + "state", + "expression_hgvs_p", + "expression_hgvs_c", + "expression_hgvs_g", + "type", } assert v["type"] == "Allele" @@ -267,7 +310,7 @@ def test_categorical_variation_rules( check_relation_count, check_node_labels, get_node_by_id, - civic_mpid12 + civic_mpid12, ): """Verify property and relationship rules for Categorical Variation nodes.""" check_unique_property("CategoricalVariation", "id") @@ -291,7 +334,7 @@ def test_categorical_variation_rules( "civic_representative_coordinate", "mappings", "variant_types", - "type" + "type", } assert cv["type"] == civic_mpid12["type"] assert cv["label"] == civic_mpid12["label"] @@ -308,12 +351,12 @@ def test_categorical_variation_rules( "chromosome", "start", "stop", - "type" + "type", } mappings = json.loads(cv["mappings"]) for m in mappings: - assert m["coding"] and isinstance(m["coding"], dict) - assert m["relation"] and isinstance(m["relation"], str) + assert isinstance(m["coding"], dict) + assert isinstance(m["relation"], str) variant_types = json.loads(cv["variant_types"]) for vt in variant_types: @@ -321,10 +364,7 @@ def test_categorical_variation_rules( def test_location_rules( - check_unique_property, - check_relation_count, - check_node_labels, - get_node_by_id + check_unique_property, check_relation_count, check_node_labels, get_node_by_id ): """Verify property and relationship rules for Location nodes.""" check_unique_property("Location", "id") @@ -339,11 +379,16 @@ def test_location_rules( loc_digest = "7qyw-4VUk3oCczBuoaF_8vGQo19dM_mk" loc = get_node_by_id(f"ga4gh:SL.{loc_digest}") assert set(loc.keys()) == { - "id", "digest", "sequence_reference", "start", "end", "type" + "id", + "digest", + "sequence_reference", + "start", + "end", + "type", } assert json.loads(loc["sequence_reference"]) == { "type": "SequenceReference", - "refgetAccession": "SQ.vyo55F6mA6n2LgN4cagcdRzOuh38V4mE" + "refgetAccession": "SQ.vyo55F6mA6n2LgN4cagcdRzOuh38V4mE", } assert loc["start"] == 766 assert loc["end"] == 769 @@ -360,15 +405,19 @@ def test_therapeutic_procedure_rules( check_node_props, check_extension_props, civic_ct, - civic_tsg + civic_tsg, ): """Verify property and relationship rules for Therapeutic Procedure nodes.""" check_unique_property("TherapeuticProcedure", "id") # min is 0 because TherapeuticAgent may not be attached to study directly, but # through CombinationTherapy and TherapeuticSubstituteGroup check_relation_count( - "TherapeuticProcedure", "Study", "HAS_THERAPEUTIC", min=0, max=None, - direction="in" + "TherapeuticProcedure", + "Study", + "HAS_THERAPEUTIC", + min=0, + max=None, + direction="in", ) check_relation_count( "CombinationTherapy", "TherapeuticAgent", "HAS_COMPONENTS", max=None @@ -380,14 +429,17 @@ def test_therapeutic_procedure_rules( "TherapeuticSubstituteGroup", "TherapeuticAgent", "HAS_SUBSTITUTES", max=None ) check_relation_count( - "TherapeuticSubstituteGroup", "Study", "HAS_THERAPEUTIC", max=None, - direction="in" + "TherapeuticSubstituteGroup", + "Study", + "HAS_THERAPEUTIC", + max=None, + direction="in", ) expected_node_labels = [ {"TherapeuticProcedure", "TherapeuticAgent"}, {"TherapeuticProcedure", "CombinationTherapy"}, - {"TherapeuticProcedure", "TherapeuticSubstituteGroup"} + {"TherapeuticProcedure", "TherapeuticSubstituteGroup"}, ] check_node_labels("TherapeuticProcedure", expected_node_labels, 3) @@ -396,8 +448,13 @@ def test_therapeutic_procedure_rules( extension_names = {"therapy_normalizer_id", "regulatory_approval"} check_extension_props(ta, civic_tid146["extensions"], extension_names) expected_keys = { - "id", "label", "aliases", "therapy_normalizer_id", "regulatory_approval", - "mappings", "type" + "id", + "label", + "aliases", + "therapy_normalizer_id", + "regulatory_approval", + "mappings", + "type", } check_node_props(ta, civic_tid146, expected_keys, extension_names) @@ -423,7 +480,7 @@ def test_condition_rules( get_node_by_id, civic_did8, check_node_props, - check_extension_props + check_extension_props, ): """Verify property and relationship rules for condition nodes.""" check_unique_property("Condition", "id") @@ -448,7 +505,7 @@ def test_study_rules( check_node_labels, get_node_by_id, civic_eid2997_study, - check_node_props + check_node_props, ): """Verify property and relationship rules for Study nodes.""" check_unique_property("Study", "id") @@ -476,10 +533,17 @@ def test_study_rules( study = get_node_by_id(civic_eid2997_study["id"]) expected_keys = { - "id", "description", "direction", "predicate", "alleleOrigin", "type" + "id", + "description", + "direction", + "predicate", + "alleleOrigin", + "type", } civic_eid2997_study_cp = civic_eid2997_study.copy() - civic_eid2997_study_cp["alleleOrigin"] = civic_eid2997_study_cp["qualifiers"]["alleleOrigin"] # noqa: E501 + civic_eid2997_study_cp["alleleOrigin"] = civic_eid2997_study_cp["qualifiers"][ + "alleleOrigin" + ] check_node_props(study, civic_eid2997_study_cp, expected_keys) @@ -491,7 +555,7 @@ def test_document_rules( get_node_by_id, moa_source44, check_node_props, - check_extension_props + check_extension_props, ): """Verify property and relationship rules for Document nodes.""" check_unique_property("Document", "id") @@ -536,7 +600,7 @@ def test_method_rules( check_relation_count, get_node_by_id, civic_method, - check_node_props + check_node_props, ): """Verify property and relationship rules for Method nodes.""" check_unique_property("Method", "id") diff --git a/tests/unit/harvesters/moa/test_moa_assertions.py b/tests/unit/harvesters/moa/test_moa_assertions.py index 9a04ec72..88c02d9a 100644 --- a/tests/unit/harvesters/moa/test_moa_assertions.py +++ b/tests/unit/harvesters/moa/test_moa_assertions.py @@ -1,10 +1,11 @@ """Test MOAlmanac assertions""" + import json +from unittest.mock import patch import pytest -from mock import patch -from metakb import PROJECT_ROOT # noqa: I202 +from metakb import PROJECT_ROOT from metakb.harvesters import MoaHarvester @@ -14,11 +15,11 @@ def assertion165(): return { "id": 165, "context": "Resistance to BRAFi monotherapy", - "description": "Administration of bevacizumab in a dabrafenib-resistant melanoma cancer cell line (A375R) counteracted the tumor growth stimulating effect of administering dabrafenib post-resistance. This study suggests that a regime which combines BRAFi with bevacizumab or inhibitors of PI3K/Akt/mTOR may be more effective than BRAFi monotherapy in the setting of resistance.", # noqa: E501 + "description": "Administration of bevacizumab in a dabrafenib-resistant melanoma cancer cell line (A375R) counteracted the tumor growth stimulating effect of administering dabrafenib post-resistance. This study suggests that a regime which combines BRAFi with bevacizumab or inhibitors of PI3K/Akt/mTOR may be more effective than BRAFi monotherapy in the setting of resistance.", "disease": { "name": "Melanoma", "oncotree_code": "MEL", - "oncotree_term": "Melanoma" + "oncotree_term": "Melanoma", }, "therapy_name": "Dabrafenib + Bevacizumab", "therapy_type": "Targeted therapy", @@ -44,30 +45,28 @@ def assertion165(): "rsid": "rs113488022", "start_position": "140453136", "variant_annotation": "Missense", - "feature": "BRAF p.V600E (Missense)" - } + "feature": "BRAF p.V600E (Missense)", + }, } @patch.object(MoaHarvester, "_get_all_variants") @patch.object(MoaHarvester, "_get_all_assertions") -def test_assertion_170(test_get_all_assertions, test_get_all_variants, - assertion165): +def test_assertion_170(test_get_all_assertions, test_get_all_variants, assertion165): """Test moa harvester works correctly for assertions.""" - with open(f"{PROJECT_ROOT}/tests/data/" - f"harvesters/moa/assertions.json") as f: + with ( + PROJECT_ROOT / "tests" / "data" / "harvesters/moa/assertions.json" + ).open() as f: data = json.load(f) test_get_all_assertions.return_value = data - with open(f"{PROJECT_ROOT}/tests/data/" - f"harvesters/moa/variants.json") as f: + with (PROJECT_ROOT / "tests" / "data" / "harvesters/moa/variants.json").open() as f: data = json.load(f) test_get_all_variants.return_value = data assertion_resp = MoaHarvester()._get_all_assertions() _, variants_list = MoaHarvester().harvest_variants() - assertions = MoaHarvester().harvest_assertions( - assertion_resp, variants_list) + assertions = MoaHarvester().harvest_assertions(assertion_resp, variants_list) actual = None for a in assertions: diff --git a/tests/unit/harvesters/moa/test_moa_harvest.py b/tests/unit/harvesters/moa/test_moa_harvest.py index 59d6ada0..1520025d 100644 --- a/tests/unit/harvesters/moa/test_moa_harvest.py +++ b/tests/unit/harvesters/moa/test_moa_harvest.py @@ -1,14 +1,14 @@ """Test MOAlmanac Harvester.""" -from metakb.harvesters import MoaHarvester + from metakb import APP_ROOT -import os +from metakb.harvesters import MoaHarvester def test_harvest(): """Test MOAlmanac harvest method.""" - fn = 'test_moa_harvester.json' + fn = "test_moa_harvester.json" assert MoaHarvester().harvest(filename=fn) - file_path = APP_ROOT / 'data' / 'moa' / 'harvester' / fn + file_path = APP_ROOT / "data" / "moa" / "harvester" / fn assert file_path.exists() - os.remove(file_path) + file_path.unlink() assert not file_path.exists() diff --git a/tests/unit/harvesters/moa/test_moa_source.py b/tests/unit/harvesters/moa/test_moa_source.py index 30b0603d..e87c55ed 100644 --- a/tests/unit/harvesters/moa/test_moa_source.py +++ b/tests/unit/harvesters/moa/test_moa_source.py @@ -1,10 +1,10 @@ """Test MOAlmanac source""" import json +from unittest.mock import patch -from mock import patch import pytest -from metakb import PROJECT_ROOT # noqa: I202 +from metakb import PROJECT_ROOT from metakb.harvesters import MoaHarvester @@ -26,15 +26,16 @@ def source68(): "nct": "NCT01673854", "pmid": 27532019, "url": "https://doi.org/10.1186/s40425-016-0148-7", - "citation": "Amin A, Lawson DH, Salama AK, et al. Phase II study of vemurafenib followed by ipilimumab in patients with previously untreated BRAF-mutated metastatic melanoma. J Immunother Cancer. 2016;4:44." # noqa: E501 + "citation": "Amin A, Lawson DH, Salama AK, et al. Phase II study of vemurafenib followed by ipilimumab in patients with previously untreated BRAF-mutated metastatic melanoma. J Immunother Cancer. 2016;4:44.", } @patch.object(MoaHarvester, "_get_all_assertions") def test_source68(test_get_all_assertions, source68): """Test moa harvester works correctly for evidence.""" - with open(f"{PROJECT_ROOT}/tests/data/" - f"harvesters/moa/assertions.json") as f: + with ( + PROJECT_ROOT / "tests" / "data" / "harvesters/moa/assertions.json" + ).open() as f: data = json.load(f) test_get_all_assertions.return_value = data diff --git a/tests/unit/harvesters/test_base_class.py b/tests/unit/harvesters/test_base_class.py index 41bfd308..0bc4e294 100644 --- a/tests/unit/harvesters/test_base_class.py +++ b/tests/unit/harvesters/test_base_class.py @@ -1,13 +1,13 @@ -"""This module tests the Harvester base class.""" -from metakb.harvesters import base +"""Tests the Harvester base class.""" import pytest +from metakb.harvesters import base + -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def bh(): """Create a base Harvester fixture for testing.""" - bh = base.Harvester() - return bh + return base.Harvester() def test_base_harvester_harvest_not_implemented(bh): diff --git a/tests/unit/harvesters/test_civic_harvester.py b/tests/unit/harvesters/test_civic_harvester.py index 08be15c8..2b5300ed 100644 --- a/tests/unit/harvesters/test_civic_harvester.py +++ b/tests/unit/harvesters/test_civic_harvester.py @@ -1,15 +1,13 @@ """Test CIViC Harvester class""" -import os import json import pytest -from metakb import PROJECT_ROOT, APP_ROOT +from metakb import APP_ROOT, PROJECT_ROOT from metakb.harvesters import CivicHarvester - TEST_DATA_PATH = PROJECT_ROOT / "tests" / "data" / "harvesters" / "civic" -TEST_CIVICPY_CACHE_PATH = list(sorted(TEST_DATA_PATH.glob("civicpy_cache_*.pkl")))[-1] +TEST_CIVICPY_CACHE_PATH = sorted(TEST_DATA_PATH.glob("civicpy_cache_*.pkl"))[-1] @pytest.fixture(scope="module") @@ -51,45 +49,45 @@ def harvested_assertions(harvester): @pytest.fixture(scope="module") def civic_variant_12(): """Create test fixture for CIViC Variant 12""" - with open(TEST_DATA_PATH / "civic_variant_12.json", "r") as f: + with (TEST_DATA_PATH / "civic_variant_12.json").open() as f: return json.load(f) @pytest.fixture(scope="module") def civic_molecular_profile_12(): """Create test fixture for CIViC Molecular Profile 12""" - with open(TEST_DATA_PATH / "civic_molecular_profile_12.json", "r") as f: + with (TEST_DATA_PATH / "civic_molecular_profile_12.json").open() as f: return json.load(f) @pytest.fixture(scope="module") def civic_gene_5(): """Create test fixture for CIViC Gene 5""" - with open(TEST_DATA_PATH / "civic_gene_5.json", "r") as f: + with (TEST_DATA_PATH / "civic_gene_5.json").open() as f: return json.load(f) @pytest.fixture(scope="module") def civic_eid_3017(): """Create test fixture for CIViC EID 3017""" - with open(TEST_DATA_PATH / "civic_eid_3017.json", "r") as f: + with (TEST_DATA_PATH / "civic_eid_3017.json").open() as f: return json.load(f) @pytest.fixture(scope="module") def civic_aid_7(): """Create test fixture for CIViC AID 7""" - with open(TEST_DATA_PATH / "civic_aid_7.json", "r") as f: + with (TEST_DATA_PATH / "civic_aid_7.json").open() as f: return json.load(f) def test_harvest(harvester): """Test that CIViC harvest method works correctly""" - fn = 'test_civic_harvester.json' + fn = "test_civic_harvester.json" assert harvester.harvest(filename=fn) file_path = APP_ROOT / "data" / "civic" / "harvester" / fn assert file_path.exists() - os.remove(file_path) + file_path.unlink() assert not file_path.exists() @@ -139,7 +137,9 @@ def test_civic_evidence(harvested_evidence, civic_eid_3017): elif e["id"] == 6178: assert e["assertion_ids"] == [12, 7] checked.append(e["id"]) - assert len(checked) == 2, f"Expected to check CIViC Evidence Items 3017 and 6178, but only checked {checked}" # noqa: E501 + assert ( + len(checked) == 2 + ), f"Expected to check CIViC Evidence Items 3017 and 6178, but only checked {checked}" assert checked, "CIViC Evidence Item 3017 not in harvested evidence" diff --git a/tests/unit/setup/test_minimal_setup.py b/tests/unit/setup/test_minimal_setup.py index a665cf45..6b3ce292 100644 --- a/tests/unit/setup/test_minimal_setup.py +++ b/tests/unit/setup/test_minimal_setup.py @@ -1,4 +1,4 @@ -"""This module tests basic project setup.""" +"""Tests basic project setup.""" import sys diff --git a/tests/unit/test_search_studies.py b/tests/unit/test_search_studies.py index 18123cab..a0ab689e 100644 --- a/tests/unit/test_search_studies.py +++ b/tests/unit/test_search_studies.py @@ -32,7 +32,7 @@ def find_and_check_study( resp: SearchStudiesService, expected_study: Dict, assertion_checks: callable, - should_find_match: bool = True + should_find_match: bool = True, ): """Check that expected study is or is not in response""" if should_find_match: diff --git a/tests/unit/transform/test_civic_transform_diagnostic.py b/tests/unit/transform/test_civic_transform_diagnostic.py index 0aa30ed9..a89ff8af 100644 --- a/tests/unit/transform/test_civic_transform_diagnostic.py +++ b/tests/unit/transform/test_civic_transform_diagnostic.py @@ -1,42 +1,42 @@ """Test CIViC Transformation to common data model for prognostic.""" +import json + import pytest import pytest_asyncio -from metakb.transform.civic import CivicTransform + from metakb import PROJECT_ROOT -import json +from metakb.transform.civic import CivicTransform DATA_DIR = PROJECT_ROOT / "tests" / "data" / "transform" / "diagnostic" FILENAME = "civic_cdm.json" @pytest_asyncio.fixture(scope="module") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def data(normalizers): """Create a CIViC Transform test fixture.""" harvester_path = DATA_DIR / "civic_harvester.json" - c = CivicTransform(data_dir=DATA_DIR, harvester_path=harvester_path, - normalizers=normalizers) + c = CivicTransform( + data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers + ) await c.transform() c.create_json(transform_dir=DATA_DIR, filename=FILENAME) - with open(DATA_DIR / FILENAME, "r") as f: - data = json.load(f) - return data + with (DATA_DIR / FILENAME).open() as f: + return json.load(f) @pytest.fixture(scope="module") -def statements(civic_eid2_statement, civic_eid74_statement, - civic_aid9_statement): +def statements(civic_eid2_statement, civic_eid74_statement, civic_aid9_statement): """Create test fixture for statements.""" return [civic_eid2_statement, civic_eid74_statement, civic_aid9_statement] @pytest.fixture(scope="module") -def propositions(civic_eid2_proposition, civic_eid74_proposition, - civic_aid9_proposition): +def propositions( + civic_eid2_proposition, civic_eid74_proposition, civic_aid9_proposition +): """Create test fixture for proposition.""" - return [ - civic_eid2_proposition, civic_eid74_proposition, civic_aid9_proposition - ] + return [civic_eid2_proposition, civic_eid74_proposition, civic_aid9_proposition] @pytest.fixture(scope="module") @@ -64,17 +64,39 @@ def documents(pmid_15146165, pmid_18073307): @pytest.mark.skip(reason="Will be resolved in issue-241") -def test_civic_cdm(data, statements, propositions, variation_descriptors, - gene_descriptors, disease_descriptors, - civic_methods, documents, check_statement, - check_proposition, check_variation_descriptor, - check_descriptor, check_document, check_method, - check_transformed_cdm): +def test_civic_cdm( + data, + statements, + propositions, + variation_descriptors, + gene_descriptors, + disease_descriptors, + civic_methods, + documents, + check_statement, + check_proposition, + check_variation_descriptor, + check_descriptor, + check_document, + check_method, + check_transformed_cdm, +): """Test that civic transform works correctly.""" check_transformed_cdm( - data, statements, propositions, variation_descriptors, - gene_descriptors, disease_descriptors, None, - civic_methods, documents, check_statement, check_proposition, - check_variation_descriptor, check_descriptor, check_document, - check_method, DATA_DIR / FILENAME + data, + statements, + propositions, + variation_descriptors, + gene_descriptors, + disease_descriptors, + None, + civic_methods, + documents, + check_statement, + check_proposition, + check_variation_descriptor, + check_descriptor, + check_document, + check_method, + DATA_DIR / FILENAME, ) diff --git a/tests/unit/transform/test_civic_transform_prognostic.py b/tests/unit/transform/test_civic_transform_prognostic.py index 577cc946..75e41261 100644 --- a/tests/unit/transform/test_civic_transform_prognostic.py +++ b/tests/unit/transform/test_civic_transform_prognostic.py @@ -1,26 +1,28 @@ """Test CIViC Transformation to common data model for prognostic.""" +import json + import pytest import pytest_asyncio -from metakb.transform.civic import CivicTransform + from metakb import PROJECT_ROOT -import json +from metakb.transform.civic import CivicTransform DATA_DIR = PROJECT_ROOT / "tests" / "data" / "transform" / "prognostic" FILENAME = "civic_cdm.json" @pytest_asyncio.fixture(scope="module") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def data(normalizers): """Create a CIViC Transform test fixture.""" harvester_path = DATA_DIR / "civic_harvester.json" - c = CivicTransform(data_dir=DATA_DIR, harvester_path=harvester_path, - normalizers=normalizers) + c = CivicTransform( + data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers + ) await c.transform() c.create_json(transform_dir=DATA_DIR, filename=FILENAME) - with open(DATA_DIR / FILENAME, "r") as f: - data = json.load(f) - return data + with (DATA_DIR / FILENAME).open() as f: + return json.load(f) @pytest.fixture(scope="module") @@ -60,17 +62,39 @@ def documents(pmid_16384925, pmid_27819322): @pytest.mark.skip(reason="Will be resolved in issue-242") -def test_civic_cdm(data, statements, propositions, variation_descriptors, - gene_descriptors, disease_descriptors, - civic_methods, documents, check_statement, - check_proposition, check_variation_descriptor, - check_descriptor, check_document, check_method, - check_transformed_cdm): +def test_civic_cdm( + data, + statements, + propositions, + variation_descriptors, + gene_descriptors, + disease_descriptors, + civic_methods, + documents, + check_statement, + check_proposition, + check_variation_descriptor, + check_descriptor, + check_document, + check_method, + check_transformed_cdm, +): """Test that civic transform works correctly.""" check_transformed_cdm( - data, statements, propositions, variation_descriptors, - gene_descriptors, disease_descriptors, None, - civic_methods, documents, check_statement, check_proposition, - check_variation_descriptor, check_descriptor, check_document, - check_method, DATA_DIR / FILENAME + data, + statements, + propositions, + variation_descriptors, + gene_descriptors, + disease_descriptors, + None, + civic_methods, + documents, + check_statement, + check_proposition, + check_variation_descriptor, + check_descriptor, + check_document, + check_method, + DATA_DIR / FILENAME, ) diff --git a/tests/unit/transform/test_civic_transform_therapeutic.py b/tests/unit/transform/test_civic_transform_therapeutic.py index 81d0b5e7..d1f99cd5 100644 --- a/tests/unit/transform/test_civic_transform_therapeutic.py +++ b/tests/unit/transform/test_civic_transform_therapeutic.py @@ -1,27 +1,28 @@ """Test CIViC Transformation to common data model for Therapeutic Response.""" +import json + import pytest import pytest_asyncio -from metakb.transform.civic import CivicTransform -from metakb import PROJECT_ROOT -import json +from metakb import PROJECT_ROOT +from metakb.transform.civic import CivicTransform DATA_DIR = PROJECT_ROOT / "tests" / "data" / "transform" / "therapeutic" FILENAME = "civic_cdm.json" @pytest_asyncio.fixture(scope="module") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def data(normalizers): """Create a CIViC Transform test fixture.""" harvester_path = DATA_DIR / "civic_harvester.json" - c = CivicTransform(data_dir=DATA_DIR, harvester_path=harvester_path, - normalizers=normalizers) + c = CivicTransform( + data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers + ) await c.transform() c.create_json(transform_dir=DATA_DIR, filename=FILENAME) - with open(DATA_DIR / FILENAME, "r") as f: - data = json.load(f) - return data + with (DATA_DIR / FILENAME).open() as f: + return json.load(f) @pytest.fixture(scope="module") @@ -32,6 +33,4 @@ def studies(civic_eid2997_study, civic_eid816_study, civic_eid9851_study): def test_civic_cdm(data, studies, check_transformed_cdm): """Test that civic transform works correctly.""" - check_transformed_cdm( - data, studies, DATA_DIR / FILENAME - ) + check_transformed_cdm(data, studies, DATA_DIR / FILENAME) diff --git a/tests/unit/transform/test_moa_transform.py b/tests/unit/transform/test_moa_transform.py index 9156fea5..00686e73 100644 --- a/tests/unit/transform/test_moa_transform.py +++ b/tests/unit/transform/test_moa_transform.py @@ -1,26 +1,28 @@ """Test MOA Transformation to common data model""" +import json + import pytest import pytest_asyncio -from metakb.transform.moa import MoaTransform + from metakb import PROJECT_ROOT -import json +from metakb.transform.moa import MoaTransform DATA_DIR = PROJECT_ROOT / "tests" / "data" / "transform" FILENAME = "moa_cdm.json" @pytest_asyncio.fixture(scope="module") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def data(normalizers): """Create a MOA Transform test fixture.""" harvester_path = DATA_DIR / "moa_harvester.json" - moa = MoaTransform(data_dir=DATA_DIR, harvester_path=harvester_path, - normalizers=normalizers) + moa = MoaTransform( + data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers + ) await moa.transform() moa.create_json(transform_dir=DATA_DIR, filename=FILENAME) - with open(DATA_DIR / FILENAME, "r") as f: - data = json.load(f) - return data + with (DATA_DIR / FILENAME).open() as f: + return json.load(f) @pytest.fixture(scope="module") @@ -39,15 +41,12 @@ def moa_vid145(): "type": "SequenceLocation", "sequenceReference": { "type": "SequenceReference", - "refgetAccession": "SQ.cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y" + "refgetAccession": "SQ.cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y", }, "start": 599, - "end": 600 + "end": 600, }, - "state": { - "type": "LiteralSequenceExpression", - "sequence": "E" - } + "state": {"type": "LiteralSequenceExpression", "sequence": "E"}, }, "extensions": [ { @@ -60,26 +59,26 @@ def moa_vid145(): "alternate_allele": "T", "cdna_change": "c.1799T>A", "protein_change": "p.V600E", - "exon": "15" + "exon": "15", }, - "type": "Extension" + "type": "Extension", } ], "mappings": [ { "coding": { "system": "https://moalmanac.org/api/features/", - "code": "145" + "code": "145", }, - "relation": "exactMatch" + "relation": "exactMatch", }, { "coding": { "system": "https://www.ncbi.nlm.nih.gov/snp/", - "code": "rs113488022" + "code": "rs113488022", }, - "relation": "relatedMatch" - } + "relation": "relatedMatch", + }, ], } @@ -91,7 +90,7 @@ def moa_cetuximab(cetuximab_extensions): "id": "moa.normalize.therapy.rxcui:318341", "type": "TherapeuticAgent", "label": "Cetuximab", - "extensions": cetuximab_extensions + "extensions": cetuximab_extensions, } @@ -102,27 +101,22 @@ def moa_encorafenib(encorafenib_extensions): "id": "moa.normalize.therapy.rxcui:2049106", "type": "TherapeuticAgent", "label": "Encorafenib", - "extensions": encorafenib_extensions + "extensions": encorafenib_extensions, } @pytest.fixture(scope="module") -def moa_aid155_study( - moa_vid145, - moa_cetuximab, - moa_encorafenib, - moa_method -): +def moa_aid155_study(moa_vid145, moa_cetuximab, moa_encorafenib, moa_method): """Create MOA AID 155 study test fixture. Uses CombinationTherapy.""" return { "id": "moa.assertion:155", "type": "VariantTherapeuticResponseStudy", - "description": "The U.S. Food and Drug Administration (FDA) granted regular approval to encorafenib in combination with cetuximab for the treatment of adult patients with metastatic colorectal cancer (CRC) with BRAF V600E mutation, as detected by an FDA-approved test, after prior therapy.", # noqa: E501 + "description": "The U.S. Food and Drug Administration (FDA) granted regular approval to encorafenib in combination with cetuximab for the treatment of adult patients with metastatic colorectal cancer (CRC) with BRAF V600E mutation, as detected by an FDA-approved test, after prior therapy.", "direction": "none", "strength": { "code": "e000002", "label": "FDA recognized evidence", - "system": "https://go.osu.edu/evidence-codes" + "system": "https://go.osu.edu/evidence-codes", }, "predicate": "predictsSensitivityTo", "variant": moa_vid145, @@ -134,9 +128,9 @@ def moa_aid155_study( { "type": "Extension", "name": "moa_therapy_type", - "value": "Targeted therapy" + "value": "Targeted therapy", } - ] + ], }, "tumorType": { "id": "moa.normalize.disease.ncit:C5105", @@ -149,8 +143,8 @@ def moa_aid155_study( "value": { "normalized_id": "ncit:C5105", "label": "Colorectal Adenocarcinoma", - "mondo_id": "0005008" - } + "mondo_id": "0005008", + }, } ], "mappings": [ @@ -158,11 +152,11 @@ def moa_aid155_study( "coding": { "label": "Colorectal Adenocarcinoma", "system": "https://oncotree.mskcc.org/", - "code": "COADREAD" + "code": "COADREAD", }, - "relation": "exactMatch" + "relation": "exactMatch", } - ] + ], }, "qualifiers": { "alleleOrigin": "somatic", @@ -174,27 +168,23 @@ def moa_aid155_study( { "type": "Extension", "name": "gene_normalizer_id", - "value": "hgnc:1097" + "value": "hgnc:1097", } - ] - } + ], + }, }, "specifiedBy": moa_method, "isReportedIn": [ { "id": "moa.source:63", "extensions": [ - { - "type": "Extension", - "name": "source_type", - "value": "FDA" - } + {"type": "Extension", "name": "source_type", "value": "FDA"} ], "type": "Document", - "title": "Array BioPharma Inc. Braftovi (encorafenib) [package insert]. U.S. Food and Drug Administration website. www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf. Revised April 2020. Accessed October 15, 2020.", # noqa: E501 - "url": "https://www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf", # noqa: E501 + "title": "Array BioPharma Inc. Braftovi (encorafenib) [package insert]. U.S. Food and Drug Administration website. www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf. Revised April 2020. Accessed October 15, 2020.", + "url": "https://www.accessdata.fda.gov/drugsatfda_docs/label/2020/210496s006lbl.pdf", } - ] + ], } @@ -206,6 +196,4 @@ def studies(moa_aid66_study, moa_aid155_study): def test_moa_cdm(data, studies, check_transformed_cdm): """Test that moa transform works correctly.""" - check_transformed_cdm( - data, studies, DATA_DIR / FILENAME - ) + check_transformed_cdm(data, studies, DATA_DIR / FILENAME) From d4d23b25ea98ac6bd3e9c0e910549ffb125fc6af Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Fri, 29 Mar 2024 14:58:48 -0400 Subject: [PATCH 11/13] exclude stuff that should be excluded --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 653ad361..5fec92eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,6 +76,7 @@ branch = true [tool.ruff] src = ["src"] +exclude = ["docs/*", "analysis/*", "codebuild/*"] [tool.ruff.lint] select = [ From d9e5ad53bb96abbfe554f01d8597b54feec6ce64 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Fri, 29 Mar 2024 15:02:18 -0400 Subject: [PATCH 12/13] format --- .../harvester/civic_harvester_example.py | 59 +++++----- .../transform/civic_transform_example.py | 109 ++++++++++-------- analysis/graph/db_helper.py | 6 +- .../harvester/moa_harvester_example.py | 50 ++++---- .../transform/moa_transform_example.py | 77 +++++++------ codebuild/deploy_eb_env.py | 58 +++++----- codebuild/deploy_eb_env_dev.py | 58 +++++----- codebuild/terminate_eb_env.py | 30 ++--- codebuild/terminate_eb_env_dev.py | 30 ++--- 9 files changed, 252 insertions(+), 225 deletions(-) diff --git a/analysis/civic/examples/harvester/civic_harvester_example.py b/analysis/civic/examples/harvester/civic_harvester_example.py index 4f9b9676..f79a70b9 100644 --- a/analysis/civic/examples/harvester/civic_harvester_example.py +++ b/analysis/civic/examples/harvester/civic_harvester_example.py @@ -8,32 +8,35 @@ def create_evidence_examples(data): """Create five CIViC evidence examples.""" evidence_items = list() - for i in range(len(data['evidence'])): - if data['evidence'][i]['assertions']: - evidence_items.append(data['evidence'][i]) + for i in range(len(data["evidence"])): + if data["evidence"][i]["assertions"]: + evidence_items.append(data["evidence"][i]) if len(evidence_items) == 6: break for evidence_item in evidence_items: - variant_id = evidence_item['variant_id'] - gene_id = evidence_item['gene_id'] - assertions = evidence_item['assertions'] + variant_id = evidence_item["variant_id"] + gene_id = evidence_item["gene_id"] + assertions = evidence_item["assertions"] - for v in data['variants']: - if v['id'] == variant_id: + for v in data["variants"]: + if v["id"] == variant_id: variant = v - for g in data['genes']: - if g['id'] == gene_id: + for g in data["genes"]: + if g["id"] == gene_id: gene = g - with open(f"{PROJECT_ROOT}/analysis/civic/examples/harvester/" - f"{evidence_item['name']}.json", 'w+') as f: + with open( + f"{PROJECT_ROOT}/analysis/civic/examples/harvester/" + f"{evidence_item['name']}.json", + "w+", + ) as f: example = { - 'EVIDENCE': evidence_item, - 'GENE': gene, - 'VARIANT': variant, - 'ASSERTIONS': assertions + "EVIDENCE": evidence_item, + "GENE": gene, + "VARIANT": variant, + "ASSERTIONS": assertions, } json.dump(example, f, indent=4) @@ -45,26 +48,30 @@ def create_variant_examples(data): """ variants_ids = [12, 1, 221, 190] variants = list() - for i in range(len(data['variants'])): - if data['variants'][i]['id'] in variants_ids: - variants.append(data['variants'][i]) + for i in range(len(data["variants"])): + if data["variants"][i]["id"] in variants_ids: + variants.append(data["variants"][i]) for variant in variants: - with open(f"{PROJECT_ROOT}/analysis/civic/examples/harvester/" - f"{variant['name'].lower()}.json", 'w+') as f: - variant['evidence_items'] = variant['evidence_items'][0] + with open( + f"{PROJECT_ROOT}/analysis/civic/examples/harvester/" + f"{variant['name'].lower()}.json", + "w+", + ) as f: + variant["evidence_items"] = variant["evidence_items"][0] f.write(json.dumps(variant, indent=4)) -if __name__ == '__main__': +if __name__ == "__main__": c = CivicHarvester() c.harvest() - latest = sorted((APP_ROOT / "data" / "civic" / "harvester").glob("civic_harvester_*.json"))[-1] # noqa: E501 + latest = sorted( + (APP_ROOT / "data" / "civic" / "harvester").glob("civic_harvester_*.json") + )[-1] # noqa: E501 with open(latest, "r") as f: civic_data = json.load(f) - civic_ex_dir =\ - PROJECT_ROOT / 'analysis' / 'civic' / 'examples' / 'harvester' + civic_ex_dir = PROJECT_ROOT / "analysis" / "civic" / "examples" / "harvester" civic_ex_dir.mkdir(exist_ok=True, parents=True) create_evidence_examples(civic_data) diff --git a/analysis/civic/examples/transform/civic_transform_example.py b/analysis/civic/examples/transform/civic_transform_example.py index 57fc4cb2..ca1c4c7d 100644 --- a/analysis/civic/examples/transform/civic_transform_example.py +++ b/analysis/civic/examples/transform/civic_transform_example.py @@ -8,21 +8,22 @@ def create_civic_example(civic_data): """Create CIViC transform examples from list of evidence items.""" ex = { - 'statements': [], - 'propositions': [], - 'variation_descriptors': [], - 'gene_descriptors': [], - 'therapy_descriptors': [], - 'disease_descriptors': [], - 'methods': [], - 'documents': [] + "statements": [], + "propositions": [], + "variation_descriptors": [], + "gene_descriptors": [], + "therapy_descriptors": [], + "disease_descriptors": [], + "methods": [], + "documents": [], } supported_by_statement_ids = set() - for s in civic_data['statements']: - if s['id'] == 'civic.aid:6': - supported_by_statement_ids = \ - {s for s in s['supported_by'] if s.startswith('civic.eid')} - supported_by_statement_ids.add(s['id']) + for s in civic_data["statements"]: + if s["id"] == "civic.aid:6": + supported_by_statement_ids = { + s for s in s["supported_by"] if s.startswith("civic.eid") + } + supported_by_statement_ids.add(s["id"]) break proposition_ids = set() @@ -32,56 +33,66 @@ def create_civic_example(civic_data): gids = set() methods = set() documents = set() - for s in civic_data['statements']: - if s['id'] in supported_by_statement_ids: - ex['statements'].append(s) - proposition_ids.add(s['proposition']) - vids.add(s['variation_descriptor']) - tids.add(s['therapy_descriptor']) - dids.add(s['disease_descriptor']) - methods.add(s['method']) - documents.update({d for d in s['supported_by'] if - not d.startswith('civic.eid')}) + for s in civic_data["statements"]: + if s["id"] in supported_by_statement_ids: + ex["statements"].append(s) + proposition_ids.add(s["proposition"]) + vids.add(s["variation_descriptor"]) + tids.add(s["therapy_descriptor"]) + dids.add(s["disease_descriptor"]) + methods.add(s["method"]) + documents.update( + {d for d in s["supported_by"] if not d.startswith("civic.eid")} + ) - for p in civic_data['propositions']: - if p['id'] in proposition_ids: - ex['propositions'].append(p) + for p in civic_data["propositions"]: + if p["id"] in proposition_ids: + ex["propositions"].append(p) - for v in civic_data['variation_descriptors']: - if v['id'] in vids: - ex['variation_descriptors'].append(v) - gids.add(v['gene_context']) + for v in civic_data["variation_descriptors"]: + if v["id"] in vids: + ex["variation_descriptors"].append(v) + gids.add(v["gene_context"]) - for t in civic_data['therapy_descriptors']: - if t['id'] in tids: - ex['therapy_descriptors'].append(t) + for t in civic_data["therapy_descriptors"]: + if t["id"] in tids: + ex["therapy_descriptors"].append(t) - for d in civic_data['disease_descriptors']: - if d['id'] in dids: - ex['disease_descriptors'].append(d) + for d in civic_data["disease_descriptors"]: + if d["id"] in dids: + ex["disease_descriptors"].append(d) - for g in civic_data['gene_descriptors']: - if g['id'] in gids: - ex['gene_descriptors'].append(g) + for g in civic_data["gene_descriptors"]: + if g["id"] in gids: + ex["gene_descriptors"].append(g) - for m in civic_data['methods']: - if m['id'] in methods: - ex['methods'].append(m) + for m in civic_data["methods"]: + if m["id"] in methods: + ex["methods"].append(m) - for d in civic_data['documents']: - if d['id'] in documents: - ex['documents'].append(d) + for d in civic_data["documents"]: + if d["id"] in documents: + ex["documents"].append(d) - with open(PROJECT_ROOT / "analysis" / "civic" / "examples" / # noqa: W504 - "transform" / "civic_cdm_example.json", 'w+') as f2: + with open( + PROJECT_ROOT + / "analysis" + / "civic" + / "examples" # noqa: W504 + / "transform" + / "civic_cdm_example.json", + "w+", + ) as f2: json.dump(ex, f2, indent=4) -if __name__ == '__main__': +if __name__ == "__main__": civic = CivicTransform() civic.transform() civic.create_json() - latest = sorted((APP_ROOT / "data" / "civic" / "transform").glob("civic_cdm_*.json"))[-1] # noqa: E501 + latest = sorted( + (APP_ROOT / "data" / "civic" / "transform").glob("civic_cdm_*.json") + )[-1] # noqa: E501 with open(latest, "r") as f: civic_data = json.load(f) create_civic_example(civic_data) diff --git a/analysis/graph/db_helper.py b/analysis/graph/db_helper.py index 3985457b..d9ad3d53 100644 --- a/analysis/graph/db_helper.py +++ b/analysis/graph/db_helper.py @@ -7,13 +7,13 @@ g = Graph(uri="bolt://localhost:7687", credentials=("neo4j", "admin")) g.clear() -fpath = APP_ROOT / 'data' / 'civic' / 'transform' / 'civic_cdm.json' -with open(fpath, 'r') as f: +fpath = APP_ROOT / "data" / "civic" / "transform" / "civic_cdm.json" +with open(fpath, "r") as f: items = json.load(f) count = 0 for item in items: - if 'assertion' in item.keys(): + if "assertion" in item.keys(): continue else: g.add_transformed_data(item) diff --git a/analysis/moa/examples/harvester/moa_harvester_example.py b/analysis/moa/examples/harvester/moa_harvester_example.py index 0b17db0c..8ff93992 100644 --- a/analysis/moa/examples/harvester/moa_harvester_example.py +++ b/analysis/moa/examples/harvester/moa_harvester_example.py @@ -9,29 +9,28 @@ def create_assertion_examples(data): """Create five MOAlmanac assertion examples.""" assertions = [] for i in [0, 69, 599, 699, 759]: - if data['assertions'][i]['source_ids']: - assertions.append(data['assertions'][i]) + if data["assertions"][i]["source_ids"]: + assertions.append(data["assertions"][i]) for assertion in assertions: - source_id = assertion['source_ids'] - for s in data['sources']: - if s['id'] == source_id: + source_id = assertion["source_ids"] + for s in data["sources"]: + if s["id"] == source_id: source = s break - feature_id = assertion['variant']['id'] - for v in data['variants']: - if v['id'] == feature_id: + feature_id = assertion["variant"]["id"] + for v in data["variants"]: + if v["id"] == feature_id: variant = v break - with open(f"{PROJECT_ROOT}/analysis/moa/examples/harvester/" - f"assertion {assertion['id']}.json", 'w+') as f: - example = { - 'ASSERTIONS': assertion, - 'SOURCES': source, - 'VARIANTS': variant - } + with open( + f"{PROJECT_ROOT}/analysis/moa/examples/harvester/" + f"assertion {assertion['id']}.json", + "w+", + ) as f: + example = {"ASSERTIONS": assertion, "SOURCES": source, "VARIANTS": variant} json.dump(example, f, indent=4) print(f"Created JSON for evidence: assertion {assertion['id']}") @@ -44,24 +43,29 @@ def create_variant_examples(data): """ variants_ids = [1, 147, 551, 701] variants = [] - for i in range(len(data['variants'])): - if data['variants'][i]['id'] in variants_ids: - variants.append(data['variants'][i]) + for i in range(len(data["variants"])): + if data["variants"][i]["id"] in variants_ids: + variants.append(data["variants"][i]) for variant in variants: - with open(f"{PROJECT_ROOT}/analysis/moa/examples/harvester/" - f"{variant['feature'].lower()}.json", 'w+') as f: + with open( + f"{PROJECT_ROOT}/analysis/moa/examples/harvester/" + f"{variant['feature'].lower()}.json", + "w+", + ) as f: f.write(json.dumps(variant, indent=4)) print(f"Created JSON for variant: {variant['feature']}") f.close() -if __name__ == '__main__': +if __name__ == "__main__": moa = MoaHarvester() moa.harvest() - latest = sorted((APP_ROOT / "data" / "moa" / "harvester").glob("moa_harvester_*.json"))[-1] # noqa: E501 + latest = sorted( + (APP_ROOT / "data" / "moa" / "harvester").glob("moa_harvester_*.json") + )[-1] # noqa: E501 with open(latest, "r") as f: moa_data = json.load(f) - moa_ex_dir = PROJECT_ROOT / 'analysis' / 'moa' / 'examples' + moa_ex_dir = PROJECT_ROOT / "analysis" / "moa" / "examples" moa_ex_dir.mkdir(exist_ok=True, parents=True) create_assertion_examples(moa_data) create_variant_examples(moa_data) diff --git a/analysis/moa/examples/transform/moa_transform_example.py b/analysis/moa/examples/transform/moa_transform_example.py index 0f8be71e..3ea54e02 100644 --- a/analysis/moa/examples/transform/moa_transform_example.py +++ b/analysis/moa/examples/transform/moa_transform_example.py @@ -7,7 +7,7 @@ def create_moa_example(moa_data): """Create MOA transform examples from list of evidence items.""" - assertion_id = ['moa.assertion:71', 'moa.assertion:188'] + assertion_id = ["moa.assertion:71", "moa.assertion:188"] ex = {} proposition = None var_des = None @@ -18,55 +18,60 @@ def create_moa_example(moa_data): doc = None for asst_id in assertion_id: - for statement in moa_data['statements']: - if statement['id'] == asst_id: - ex['statements'] = [statement] - proposition = statement['proposition'] - var_des = statement['variation_descriptor'] - t_des = statement['therapy_descriptor'] - d_des = statement['disease_descriptor'] - method = statement['method'] - doc = statement['supported_by'][0] + for statement in moa_data["statements"]: + if statement["id"] == asst_id: + ex["statements"] = [statement] + proposition = statement["proposition"] + var_des = statement["variation_descriptor"] + t_des = statement["therapy_descriptor"] + d_des = statement["disease_descriptor"] + method = statement["method"] + doc = statement["supported_by"][0] - for p in moa_data['propositions']: - if p['id'] == proposition: - ex['propositions'] = [p] + for p in moa_data["propositions"]: + if p["id"] == proposition: + ex["propositions"] = [p] - for v in moa_data['variation_descriptors']: - if v['id'] == var_des: - ex['variation_descriptors'] = [v] - g_des = v['gene_context'] + for v in moa_data["variation_descriptors"]: + if v["id"] == var_des: + ex["variation_descriptors"] = [v] + g_des = v["gene_context"] - for g in moa_data['gene_descriptors']: - if g['id'] == g_des: - ex['gene_descriptors'] = [g] + for g in moa_data["gene_descriptors"]: + if g["id"] == g_des: + ex["gene_descriptors"] = [g] - for t in moa_data['therapy_descriptors']: - if t['id'] == t_des: - ex['therapy_descriptors'] = [t] + for t in moa_data["therapy_descriptors"]: + if t["id"] == t_des: + ex["therapy_descriptors"] = [t] - for d in moa_data['disease_descriptors']: - if d['id'] == d_des: - ex['disease_descriptors'] = [d] + for d in moa_data["disease_descriptors"]: + if d["id"] == d_des: + ex["disease_descriptors"] = [d] - for m in moa_data['methods']: - if m['id'] == method: - ex['methods'] = [m] + for m in moa_data["methods"]: + if m["id"] == method: + ex["methods"] = [m] - for d in moa_data['documents']: - if d['id'] == doc: - ex['documents'] = [d] + for d in moa_data["documents"]: + if d["id"] == doc: + ex["documents"] = [d] - with open(f"{PROJECT_ROOT}/analysis/moa/examples/transform/" - f"{ex['statements'][0]['id']}.json", 'w+') as f: + with open( + f"{PROJECT_ROOT}/analysis/moa/examples/transform/" + f"{ex['statements'][0]['id']}.json", + "w+", + ) as f: json.dump(ex, f, indent=4) -if __name__ == '__main__': +if __name__ == "__main__": moa = MoaTransform() moa.transform() moa.create_json() - latest = sorted((APP_ROOT / "data" / "moa" / "transform").glob("moa_cdm_*.json"))[-1] # noqa: E501 + latest = sorted((APP_ROOT / "data" / "moa" / "transform").glob("moa_cdm_*.json"))[ + -1 + ] # noqa: E501 with open(latest, "r") as f: moa_data = json.load(f) create_moa_example(moa_data) diff --git a/codebuild/deploy_eb_env.py b/codebuild/deploy_eb_env.py index 6b16667c..35e33bb5 100644 --- a/codebuild/deploy_eb_env.py +++ b/codebuild/deploy_eb_env.py @@ -1,50 +1,48 @@ """Module for deploying MetaKB EB environment.""" import boto3 import time -elasticbeanstalk = boto3.client('elasticbeanstalk') -servicecatalog = boto3.client('servicecatalog') + +elasticbeanstalk = boto3.client("elasticbeanstalk") +servicecatalog = boto3.client("servicecatalog") terminate_time = 12 eb_app_name = "metakb" eb_env_name = "metakb-staging-env" sc_product_id = "prod-m4b65t5jgmcm4" -print(f'Launching new Service Catalog Product for staging environment: ' - f'{eb_app_name}') -sc_product_artifacts =\ - servicecatalog.list_provisioning_artifacts(ProductId=sc_product_id) -for artifact in sc_product_artifacts['ProvisioningArtifactDetails']: - if artifact['Active']: - provisioning_artifact_id = artifact['Id'] +print( + f"Launching new Service Catalog Product for staging environment: " f"{eb_app_name}" +) +sc_product_artifacts = servicecatalog.list_provisioning_artifacts( + ProductId=sc_product_id +) +for artifact in sc_product_artifacts["ProvisioningArtifactDetails"]: + if artifact["Active"]: + provisioning_artifact_id = artifact["Id"] try: eb_provisioned_product = servicecatalog.provision_product( ProductId=sc_product_id, ProvisioningArtifactId=provisioning_artifact_id, ProvisionedProductName=eb_env_name, ProvisioningParameters=[ - { - 'Key': 'Env', - 'Value': eb_app_name - }, - { - 'Key': 'EnvType', - 'Value': 'staging' - }, - { - 'Key': 'TerminateTime', - 'Value': str(terminate_time) - } - ]) - eb_provisioned_product_Id = \ - eb_provisioned_product['RecordDetail']['ProvisionedProductId'] + {"Key": "Env", "Value": eb_app_name}, + {"Key": "EnvType", "Value": "staging"}, + {"Key": "TerminateTime", "Value": str(terminate_time)}, + ], + ) + eb_provisioned_product_Id = eb_provisioned_product["RecordDetail"][ + "ProvisionedProductId" + ] product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) - eb_provisioned_product_status =\ - product_status['ProvisionedProductDetail']['Status'] + Id=eb_provisioned_product_Id + ) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] while eb_provisioned_product_status == "UNDER_CHANGE": time.sleep(10) product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) - eb_provisioned_product_status = \ - product_status['ProvisionedProductDetail']['Status'] + Id=eb_provisioned_product_Id + ) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"][ + "Status" + ] print(eb_provisioned_product_status) except: # noqa: E722 print("The EB environment is already running...") diff --git a/codebuild/deploy_eb_env_dev.py b/codebuild/deploy_eb_env_dev.py index d8d51023..9d6fe452 100644 --- a/codebuild/deploy_eb_env_dev.py +++ b/codebuild/deploy_eb_env_dev.py @@ -1,50 +1,48 @@ """Module for deploying MetaKB EB environment.""" import boto3 import time -elasticbeanstalk = boto3.client('elasticbeanstalk') -servicecatalog = boto3.client('servicecatalog') + +elasticbeanstalk = boto3.client("elasticbeanstalk") +servicecatalog = boto3.client("servicecatalog") terminate_time = 12 eb_app_name = "metakb" eb_env_name = "metakb-dev-env" sc_product_id = "prod-m4b65t5jgmcm4" -print(f'Launching new Service Catalog Product for staging environment: ' - f'{eb_app_name}') -sc_product_artifacts =\ - servicecatalog.list_provisioning_artifacts(ProductId=sc_product_id) -for artifact in sc_product_artifacts['ProvisioningArtifactDetails']: - if artifact['Active']: - provisioning_artifact_id = artifact['Id'] +print( + f"Launching new Service Catalog Product for staging environment: " f"{eb_app_name}" +) +sc_product_artifacts = servicecatalog.list_provisioning_artifacts( + ProductId=sc_product_id +) +for artifact in sc_product_artifacts["ProvisioningArtifactDetails"]: + if artifact["Active"]: + provisioning_artifact_id = artifact["Id"] try: eb_provisioned_product = servicecatalog.provision_product( ProductId=sc_product_id, ProvisioningArtifactId=provisioning_artifact_id, ProvisionedProductName=eb_env_name, ProvisioningParameters=[ - { - 'Key': 'Env', - 'Value': eb_app_name - }, - { - 'Key': 'EnvType', - 'Value': 'dev' - }, - { - 'Key': 'TerminateTime', - 'Value': str(terminate_time) - } - ]) - eb_provisioned_product_Id = \ - eb_provisioned_product['RecordDetail']['ProvisionedProductId'] + {"Key": "Env", "Value": eb_app_name}, + {"Key": "EnvType", "Value": "dev"}, + {"Key": "TerminateTime", "Value": str(terminate_time)}, + ], + ) + eb_provisioned_product_Id = eb_provisioned_product["RecordDetail"][ + "ProvisionedProductId" + ] product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) - eb_provisioned_product_status =\ - product_status['ProvisionedProductDetail']['Status'] + Id=eb_provisioned_product_Id + ) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] while eb_provisioned_product_status == "UNDER_CHANGE": time.sleep(10) product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) - eb_provisioned_product_status = \ - product_status['ProvisionedProductDetail']['Status'] + Id=eb_provisioned_product_Id + ) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"][ + "Status" + ] print(eb_provisioned_product_status) except: # noqa: E722 print("The EB environment is already running...") diff --git a/codebuild/terminate_eb_env.py b/codebuild/terminate_eb_env.py index 5d955318..477235af 100644 --- a/codebuild/terminate_eb_env.py +++ b/codebuild/terminate_eb_env.py @@ -2,28 +2,30 @@ import boto3 import json import time -client = boto3.client('lambda') -servicecatalog = boto3.client('servicecatalog') + +client = boto3.client("lambda") +servicecatalog = boto3.client("servicecatalog") eb_env_name = "metakb-staging-env" data = {"sc_provisioned_name": eb_env_name} -client.invoke(FunctionName='igm-inf-terminate-provisioned-product', - Payload=json.dumps(data)) +client.invoke( + FunctionName="igm-inf-terminate-provisioned-product", Payload=json.dumps(data) +) time.sleep(10) -provisioned_product =\ - servicecatalog.describe_provisioned_product(Name=eb_env_name) -eb_provisioned_product_Id = \ - provisioned_product['ProvisionedProductDetail']['Id'] +provisioned_product = servicecatalog.describe_provisioned_product(Name=eb_env_name) +eb_provisioned_product_Id = provisioned_product["ProvisionedProductDetail"]["Id"] product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) -eb_provisioned_product_status = \ - product_status['ProvisionedProductDetail']['Status'] + Id=eb_provisioned_product_Id +) +eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] while eb_provisioned_product_status == "UNDER_CHANGE": time.sleep(10) try: product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) - eb_provisioned_product_status = \ - product_status['ProvisionedProductDetail']['Status'] + Id=eb_provisioned_product_Id + ) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"][ + "Status" + ] except: # noqa: E722 eb_provisioned_product_status = "PRODUCT NOT FOUND" print(eb_provisioned_product_status) diff --git a/codebuild/terminate_eb_env_dev.py b/codebuild/terminate_eb_env_dev.py index 2e7bbd6d..6715c9af 100644 --- a/codebuild/terminate_eb_env_dev.py +++ b/codebuild/terminate_eb_env_dev.py @@ -2,28 +2,30 @@ import boto3 import json import time -client = boto3.client('lambda') -servicecatalog = boto3.client('servicecatalog') + +client = boto3.client("lambda") +servicecatalog = boto3.client("servicecatalog") eb_env_name = "metakb-dev-env" data = {"sc_provisioned_name": eb_env_name} -client.invoke(FunctionName='igm-inf-terminate-provisioned-product', - Payload=json.dumps(data)) +client.invoke( + FunctionName="igm-inf-terminate-provisioned-product", Payload=json.dumps(data) +) time.sleep(10) -provisioned_product =\ - servicecatalog.describe_provisioned_product(Name=eb_env_name) -eb_provisioned_product_Id = \ - provisioned_product['ProvisionedProductDetail']['Id'] +provisioned_product = servicecatalog.describe_provisioned_product(Name=eb_env_name) +eb_provisioned_product_Id = provisioned_product["ProvisionedProductDetail"]["Id"] product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) -eb_provisioned_product_status = \ - product_status['ProvisionedProductDetail']['Status'] + Id=eb_provisioned_product_Id +) +eb_provisioned_product_status = product_status["ProvisionedProductDetail"]["Status"] while eb_provisioned_product_status == "UNDER_CHANGE": time.sleep(10) try: product_status = servicecatalog.describe_provisioned_product( - Id=eb_provisioned_product_Id) - eb_provisioned_product_status = \ - product_status['ProvisionedProductDetail']['Status'] + Id=eb_provisioned_product_Id + ) + eb_provisioned_product_status = product_status["ProvisionedProductDetail"][ + "Status" + ] except: # noqa: E722 eb_provisioned_product_status = "PRODUCT NOT FOUND" print(eb_provisioned_product_status) From baee4a51f52b0d12dbac9a90760eeff3fceda875 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Fri, 29 Mar 2024 15:04:01 -0400 Subject: [PATCH 13/13] typos --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5fec92eb..5affbe44 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,8 +25,8 @@ requires-python = ">=3.8" description = "A search interface for cancer variant interpretations assembled by aggregating and harmonizing across multiple cancer variant interpretation knowledgebases." license = {file = "LICENSE"} dependencies = [ - "ga4gh.vrs==~=2.0.0a5", - "gene-normalizer[etl]==~=0.3.0-dev1", + "ga4gh.vrs~=2.0.0a5", + "gene-normalizer[etl]~=0.3.0-dev1", "variation-normalizer~=0.8.2", "disease-normalizer[etl]~=0.4.0.dev3", "thera-py[etl]~=0.5.0.dev3",