From 628ed62ce26a66914f921db9005e2467eea2e4b5 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Wed, 30 Oct 2024 07:36:09 -0400 Subject: [PATCH] chore!: rename transform class and data dir to transformers close #393 --- ...civic.rst => metakb.transformers.base.rst} | 8 +++---- ....moa.rst => metakb.transformers.civic.rst} | 8 +++---- ...m.base.rst => metakb.transformers.moa.rst} | 8 +++---- docs/source/reference/index.rst | 6 ++--- src/metakb/cli.py | 24 ++++++++++--------- src/metakb/transform/__init__.py | 4 ---- .../{transform => transformers}/README.md | 6 ++--- src/metakb/transformers/__init__.py | 4 ++++ .../{transform => transformers}/base.py | 18 +++++++------- .../{transform => transformers}/civic.py | 8 +++---- src/metakb/{transform => transformers}/moa.py | 8 +++---- tests/conftest.py | 2 +- .../diagnostic/civic_harvester.json | 2 +- .../moa_harvester.json | 0 .../prognostic/civic_harvester.json | 2 +- .../therapeutic/civic_harvester.json | 0 .../test_civic_transformer_diagnostic.py} | 10 ++++---- .../test_civic_transformer_prognostic.py} | 12 +++++----- .../test_civic_transformer_therapeutic.py} | 12 +++++----- .../test_moa_transformer.py} | 20 ++++++++-------- 20 files changed, 82 insertions(+), 80 deletions(-) rename docs/source/reference/api/{metakb.transform.civic.rst => metakb.transformers.base.rst} (55%) rename docs/source/reference/api/{metakb.transform.moa.rst => metakb.transformers.civic.rst} (54%) rename docs/source/reference/api/{metakb.transform.base.rst => metakb.transformers.moa.rst} (56%) delete mode 100644 src/metakb/transform/__init__.py rename src/metakb/{transform => transformers}/README.md (92%) create mode 100644 src/metakb/transformers/__init__.py rename src/metakb/{transform => transformers}/base.py (97%) rename src/metakb/{transform => transformers}/civic.py (99%) rename src/metakb/{transform => transformers}/moa.py (99%) rename tests/data/{transform => transformers}/diagnostic/civic_harvester.json (99%) rename tests/data/{transform => transformers}/moa_harvester.json (100%) rename tests/data/{transform => transformers}/prognostic/civic_harvester.json (99%) rename tests/data/{transform => transformers}/therapeutic/civic_harvester.json (100%) rename tests/unit/{transform/test_civic_transform_diagnostic.py => transformers/test_civic_transformer_diagnostic.py} (92%) rename tests/unit/{transform/test_civic_transform_prognostic.py => transformers/test_civic_transformer_prognostic.py} (89%) rename tests/unit/{transform/test_civic_transform_therapeutic.py => transformers/test_civic_transformer_therapeutic.py} (76%) rename tests/unit/{transform/test_moa_transform.py => transformers/test_moa_transformer.py} (92%) diff --git a/docs/source/reference/api/metakb.transform.civic.rst b/docs/source/reference/api/metakb.transformers.base.rst similarity index 55% rename from docs/source/reference/api/metakb.transform.civic.rst rename to docs/source/reference/api/metakb.transformers.base.rst index 27bd8c62..ce10af18 100644 --- a/docs/source/reference/api/metakb.transform.civic.rst +++ b/docs/source/reference/api/metakb.transformers.base.rst @@ -1,8 +1,8 @@ -metakb.transform.civic -====================== +metakb.transformers.base +======================== -.. automodule:: metakb.transform.civic +.. automodule:: metakb.transformers.base :members: :undoc-members: :special-members: __init__ - :exclude-members: model_fields, model_config, model_computed_fields \ No newline at end of file + :exclude-members: model_fields, model_config, model_computed_fields diff --git a/docs/source/reference/api/metakb.transform.moa.rst b/docs/source/reference/api/metakb.transformers.civic.rst similarity index 54% rename from docs/source/reference/api/metakb.transform.moa.rst rename to docs/source/reference/api/metakb.transformers.civic.rst index 22b682f3..c4f37c18 100644 --- a/docs/source/reference/api/metakb.transform.moa.rst +++ b/docs/source/reference/api/metakb.transformers.civic.rst @@ -1,8 +1,8 @@ -metakb.transform.moa -==================== +metakb.transformers.civic +========================= -.. automodule:: metakb.transform.moa +.. automodule:: metakb.transformers.civic :members: :undoc-members: :special-members: __init__ - :exclude-members: model_fields, model_config, model_computed_fields \ No newline at end of file + :exclude-members: model_fields, model_config, model_computed_fields diff --git a/docs/source/reference/api/metakb.transform.base.rst b/docs/source/reference/api/metakb.transformers.moa.rst similarity index 56% rename from docs/source/reference/api/metakb.transform.base.rst rename to docs/source/reference/api/metakb.transformers.moa.rst index 0a1a9ad6..d2d70913 100644 --- a/docs/source/reference/api/metakb.transform.base.rst +++ b/docs/source/reference/api/metakb.transformers.moa.rst @@ -1,8 +1,8 @@ -metakb.transform.base -===================== +metakb.transformers.moa +======================= -.. automodule:: metakb.transform.base +.. automodule:: metakb.transformers.moa :members: :undoc-members: :special-members: __init__ - :exclude-members: model_fields, model_config, model_computed_fields \ No newline at end of file + :exclude-members: model_fields, model_config, model_computed_fields diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst index aa58d6ff..0d5e9402 100644 --- a/docs/source/reference/index.rst +++ b/docs/source/reference/index.rst @@ -60,6 +60,6 @@ Transformers :toctree: api/ :template: module_summary.rst - metakb.transform.base - metakb.transform.civic - metakb.transform.moa + metakb.transformers.base + metakb.transformers.civic + metakb.transformers.moa diff --git a/src/metakb/cli.py b/src/metakb/cli.py index f9a81635..f31fa6ba 100644 --- a/src/metakb/cli.py +++ b/src/metakb/cli.py @@ -34,7 +34,7 @@ ) from metakb.normalizers import check_normalizers as check_normalizer_health from metakb.schemas.app import SourceName -from metakb.transform import CivicTransform, MoaTransform +from metakb.transformers import CivicTransformer, MoaTransformer _logger = logging.getLogger(__name__) @@ -451,12 +451,12 @@ def load_cdm( for src in sorted([s.value for s in SourceName]): pattern = f"{src}_cdm_{version}.json" - globbed = (APP_ROOT / "data" / src / "transform").glob(pattern) + globbed = (APP_ROOT / "data" / src / "transformers").glob(pattern) try: path = sorted(globbed)[-1] except IndexError as e: - msg = f"No valid transform file found matching pattern: {pattern}" + msg = f"No valid transformation file found matching pattern: {pattern}" raise FileNotFoundError(msg) from e load_from_json(path, driver) @@ -534,12 +534,12 @@ async def update( sources = tuple(SourceName) for src in sorted([s.value for s in sources]): pattern = f"{src}_cdm_*.json" - globbed = (APP_ROOT / "data" / src / "transform").glob(pattern) + globbed = (APP_ROOT / "data" / src / "transformers").glob(pattern) try: path = sorted(globbed)[-1] except IndexError as e: - msg = f"No valid transform file found matching pattern: {pattern}" + msg = f"No valid transformation files found matching pattern: {pattern}" raise FileNotFoundError(msg) from e load_from_json(path, driver) @@ -621,19 +621,21 @@ async def _transform_source( :param output_directory: custom directory to store output to -- use source defaults if not given """ - transform_sources = { - SourceName.CIVIC: CivicTransform, - SourceName.MOA: MoaTransform, + transformer_sources = { + SourceName.CIVIC: CivicTransformer, + SourceName.MOA: MoaTransformer, } _echo_info(f"Transforming {source.as_print_case()}...") start = timer() - transformer: CivicTransform | MoaTransform = transform_sources[source]( + transformer: CivicTransformer | MoaTransformer = transformer_sources[source]( normalizers=normalizer_handler, harvester_path=harvest_file ) harvested_data = transformer.extract_harvested_data() await transformer.transform(harvested_data) end = timer() - _echo_info(f"{source.as_print_case()} transform finished in {(end - start):.2f} s.") + _echo_info( + f"{source.as_print_case()} transformation finished in {(end - start):.2f} s." + ) output_file = ( output_directory / f"{source.value}_cdm_{_current_date_string()}.json" if output_directory @@ -715,7 +717,7 @@ def _retrieve_s3_cdms() -> str: with tmp_path.open("wb") as f: file.Object().download_fileobj(f) - cdm_dir = APP_ROOT / "data" / source / "transform" + cdm_dir = APP_ROOT / "data" / source / "transformers" cdm_zip = ZipFile(tmp_path, "r") cdm_zip.extract(f"{source}_cdm_{newest_version}.json", cdm_dir) diff --git a/src/metakb/transform/__init__.py b/src/metakb/transform/__init__.py deleted file mode 100644 index b39fe1bb..00000000 --- a/src/metakb/transform/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Transformations for sources.""" - -from .civic import CivicTransform # noqa: F401 -from .moa import MoaTransform # noqa: F401 diff --git a/src/metakb/transform/README.md b/src/metakb/transformers/README.md similarity index 92% rename from src/metakb/transform/README.md rename to src/metakb/transformers/README.md index 1170919c..7ebbacc4 100644 --- a/src/metakb/transform/README.md +++ b/src/metakb/transformers/README.md @@ -1,9 +1,9 @@ # Transformations -We take the harvested JSON from each source and transform this to our common data model. +We take the harvested JSON from each source and transform this to our common data model. ### Using the transformation modules -The VICC normalizers must first be installed. +The VICC normalizers must first be installed. ``` pip install thera-py @@ -34,4 +34,4 @@ python3 -m gene.cli --normalizer="hgnc" [disease-normalizer](https://github.com/cancervariants/disease-normalization) ``` python3 -m disease.cli --update_all --update_merged -``` \ No newline at end of file +``` diff --git a/src/metakb/transformers/__init__.py b/src/metakb/transformers/__init__.py new file mode 100644 index 00000000..c60a5e5e --- /dev/null +++ b/src/metakb/transformers/__init__.py @@ -0,0 +1,4 @@ +"""Transformations for sources.""" + +from .civic import CivicTransformer # noqa: F401 +from .moa import MoaTransformer # noqa: F401 diff --git a/src/metakb/transform/base.py b/src/metakb/transformers/base.py similarity index 97% rename from src/metakb/transform/base.py rename to src/metakb/transformers/base.py index 84933cb3..eeb1f520 100644 --- a/src/metakb/transform/base.py +++ b/src/metakb/transformers/base.py @@ -1,4 +1,4 @@ -"""A module for the Transform base class.""" +"""A module for the Transformer base class.""" import datetime import json @@ -111,7 +111,7 @@ class TransformedData(BaseModel): documents: list[Document] = [] -class Transform(ABC): +class Transformer(ABC): """A base class for transforming harvester data.""" _methods: ClassVar[list[Method]] = [ @@ -234,13 +234,13 @@ def __init__( harvester_path: Path | None = None, normalizers: ViccNormalizers | None = None, ) -> None: - """Initialize Transform base class. + """Initialize Transformer base class. :param Path data_dir: Path to source data directory :param Optional[Path] harvester_path: Path to previously harvested data :param ViccNormalizers normalizers: normalizer collection instance """ - self.name = self.__class__.__name__.lower().split("transform")[0] + self.name = self.__class__.__name__.lower().split("transformers")[0] self.data_dir = data_dir / self.name self.harvester_path = harvester_path @@ -362,7 +362,7 @@ def _get_combination_therapy( Combination Therapy """ components = [] - source_name = type(self).__name__.lower().replace("transform", "") + source_name = type(self).__name__.lower().replace("transformers", "") for therapy in therapies: if source_name == SourceName.MOA: @@ -516,15 +516,15 @@ def create_json(self, cdm_filepath: Path | None = None) -> None: :param cdm_filepath: Path to the JSON file where the CDM data will be stored. If not provided, will use the default path of - ``/data//transform/_cdm_YYYYMMDD.json`` + ``/data//transformers/_cdm_YYYYMMDD.json`` """ if not cdm_filepath: - transform_dir = self.data_dir / "transform" - transform_dir.mkdir(exist_ok=True, parents=True) + transformers_dir = self.data_dir / "transformers" + transformers_dir.mkdir(exist_ok=True, parents=True) today = datetime.datetime.strftime( datetime.datetime.now(tz=datetime.timezone.utc), DATE_FMT ) - cdm_filepath = transform_dir / f"{self.name}_cdm_{today}.json" + cdm_filepath = transformers_dir / f"{self.name}_cdm_{today}.json" with cdm_filepath.open("w+") as f: json.dump(self.processed_data.model_dump(exclude_none=True), f, indent=2) diff --git a/src/metakb/transform/civic.py b/src/metakb/transformers/civic.py similarity index 99% rename from src/metakb/transform/civic.py rename to src/metakb/transformers/civic.py index 40688da3..547f923d 100644 --- a/src/metakb/transform/civic.py +++ b/src/metakb/transformers/civic.py @@ -26,11 +26,11 @@ VariantTherapeuticResponseStudyPredicate, _VariantOncogenicityStudyQualifier, ) -from metakb.transform.base import ( +from metakb.transformers.base import ( CivicEvidenceLevel, MethodId, TherapeuticProcedureType, - Transform, + Transformer, ) _logger = logging.getLogger(__name__) @@ -99,7 +99,7 @@ class SourcePrefix(str, Enum): ASH = "ASH" -class CivicTransform(Transform): +class CivicTransformer(Transformer): """A class for transforming CIViC to the common data model.""" def __init__( @@ -108,7 +108,7 @@ def __init__( harvester_path: Path | None = None, normalizers: ViccNormalizers | None = None, ) -> None: - """Initialize CIViC Transform class. + """Initialize CIViC Transformer class. :param data_dir: Path to source data directory :param harvester_path: Path to previously harvested CIViC data diff --git a/src/metakb/transform/moa.py b/src/metakb/transformers/moa.py similarity index 99% rename from src/metakb/transform/moa.py rename to src/metakb/transformers/moa.py index e6716a35..6a195b0a 100644 --- a/src/metakb/transform/moa.py +++ b/src/metakb/transformers/moa.py @@ -25,17 +25,17 @@ VariantTherapeuticResponseStudyPredicate, _VariantOncogenicityStudyQualifier, ) -from metakb.transform.base import ( +from metakb.transformers.base import ( MethodId, MoaEvidenceLevel, TherapeuticProcedureType, - Transform, + Transformer, ) logger = logging.getLogger(__name__) -class MoaTransform(Transform): +class MoaTransformer(Transformer): """A class for transforming MOA resources to common data model.""" def __init__( @@ -44,7 +44,7 @@ def __init__( harvester_path: Path | None = None, normalizers: ViccNormalizers | None = None, ) -> None: - """Initialize MOAlmanac Transform class. + """Initialize MOAlmanac Transformer class. :param data_dir: Path to source data directory :param harvester_path: Path to previously harvested MOA data diff --git a/tests/conftest.py b/tests/conftest.py index bccec9a9..f8991935 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,7 +13,7 @@ TEST_DATA_DIR = Path(__file__).resolve().parents[0] / "data" TEST_HARVESTERS_DIR = TEST_DATA_DIR / "harvesters" -TEST_TRANSFORM_DIR = TEST_DATA_DIR / "transform" +TEST_TRANSFORMERS_DIR = TEST_DATA_DIR / "transformers" def pytest_addoption(parser): diff --git a/tests/data/transform/diagnostic/civic_harvester.json b/tests/data/transformers/diagnostic/civic_harvester.json similarity index 99% rename from tests/data/transform/diagnostic/civic_harvester.json rename to tests/data/transformers/diagnostic/civic_harvester.json index 5684411b..46b8e9e8 100644 --- a/tests/data/transform/diagnostic/civic_harvester.json +++ b/tests/data/transformers/diagnostic/civic_harvester.json @@ -433,4 +433,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tests/data/transform/moa_harvester.json b/tests/data/transformers/moa_harvester.json similarity index 100% rename from tests/data/transform/moa_harvester.json rename to tests/data/transformers/moa_harvester.json diff --git a/tests/data/transform/prognostic/civic_harvester.json b/tests/data/transformers/prognostic/civic_harvester.json similarity index 99% rename from tests/data/transform/prognostic/civic_harvester.json rename to tests/data/transformers/prognostic/civic_harvester.json index 0cfaaaf5..bfb0cd6f 100644 --- a/tests/data/transform/prognostic/civic_harvester.json +++ b/tests/data/transformers/prognostic/civic_harvester.json @@ -257,4 +257,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tests/data/transform/therapeutic/civic_harvester.json b/tests/data/transformers/therapeutic/civic_harvester.json similarity index 100% rename from tests/data/transform/therapeutic/civic_harvester.json rename to tests/data/transformers/therapeutic/civic_harvester.json diff --git a/tests/unit/transform/test_civic_transform_diagnostic.py b/tests/unit/transformers/test_civic_transformer_diagnostic.py similarity index 92% rename from tests/unit/transform/test_civic_transform_diagnostic.py rename to tests/unit/transformers/test_civic_transformer_diagnostic.py index ab1dc427..2ac0f82b 100644 --- a/tests/unit/transform/test_civic_transform_diagnostic.py +++ b/tests/unit/transformers/test_civic_transformer_diagnostic.py @@ -4,19 +4,19 @@ import pytest import pytest_asyncio -from tests.conftest import TEST_TRANSFORM_DIR +from tests.conftest import TEST_TRANSFORMERS_DIR -from metakb.transform.civic import CivicTransform +from metakb.transformers.civic import CivicTransformer -DATA_DIR = TEST_TRANSFORM_DIR / "diagnostic" +DATA_DIR = TEST_TRANSFORMERS_DIR / "diagnostic" FILENAME = "civic_cdm.json" @pytest_asyncio.fixture(scope="module") async def data(normalizers): - """Create a CIViC Transform test fixture.""" + """Create a CIViC Transformer test fixture.""" harvester_path = DATA_DIR / "civic_harvester.json" - c = CivicTransform( + c = CivicTransformer( data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers ) await c.transform() diff --git a/tests/unit/transform/test_civic_transform_prognostic.py b/tests/unit/transformers/test_civic_transformer_prognostic.py similarity index 89% rename from tests/unit/transform/test_civic_transform_prognostic.py rename to tests/unit/transformers/test_civic_transformer_prognostic.py index 46038098..651257a9 100644 --- a/tests/unit/transform/test_civic_transform_prognostic.py +++ b/tests/unit/transformers/test_civic_transformer_prognostic.py @@ -4,19 +4,19 @@ import pytest import pytest_asyncio -from tests.conftest import TEST_TRANSFORM_DIR +from tests.conftest import TEST_TRANSFORMERS_DIR -from metakb.transform.civic import CivicTransform +from metakb.transformers.civic import CivicTransformer -DATA_DIR = TEST_TRANSFORM_DIR / "prognostic" +DATA_DIR = TEST_TRANSFORMERS_DIR / "prognostic" FILENAME = "civic_cdm.json" @pytest_asyncio.fixture(scope="module") async def data(normalizers): - """Create a CIViC Transform test fixture.""" + """Create a CIViC Transformer test fixture.""" harvester_path = DATA_DIR / "civic_harvester.json" - c = CivicTransform( + c = CivicTransformer( data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers ) await c.transform() @@ -79,7 +79,7 @@ def test_civic_cdm( check_method, check_transformed_cdm, ): - """Test that civic transform works correctly.""" + """Test that civic transformation works correctly.""" check_transformed_cdm( data, statements, diff --git a/tests/unit/transform/test_civic_transform_therapeutic.py b/tests/unit/transformers/test_civic_transformer_therapeutic.py similarity index 76% rename from tests/unit/transform/test_civic_transform_therapeutic.py rename to tests/unit/transformers/test_civic_transformer_therapeutic.py index a33ec053..2292455b 100644 --- a/tests/unit/transform/test_civic_transform_therapeutic.py +++ b/tests/unit/transformers/test_civic_transformer_therapeutic.py @@ -4,19 +4,19 @@ import pytest import pytest_asyncio -from tests.conftest import TEST_TRANSFORM_DIR +from tests.conftest import TEST_TRANSFORMERS_DIR -from metakb.transform.civic import CivicTransform +from metakb.transformers.civic import CivicTransformer -DATA_DIR = TEST_TRANSFORM_DIR / "therapeutic" +DATA_DIR = TEST_TRANSFORMERS_DIR / "therapeutic" FILENAME = "civic_cdm.json" @pytest_asyncio.fixture(scope="module") async def data(normalizers): - """Create a CIViC Transform test fixture.""" + """Create a CIViC Transformer test fixture.""" harvester_path = DATA_DIR / "civic_harvester.json" - c = CivicTransform( + c = CivicTransformer( data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers ) harvested_data = c.extract_harvested_data() @@ -33,5 +33,5 @@ def studies(civic_eid2997_study, civic_eid816_study, civic_eid9851_study): def test_civic_cdm(data, studies, check_transformed_cdm): - """Test that civic transform works correctly.""" + """Test that civic transformation works correctly.""" check_transformed_cdm(data, studies, DATA_DIR / FILENAME) diff --git a/tests/unit/transform/test_moa_transform.py b/tests/unit/transformers/test_moa_transformer.py similarity index 92% rename from tests/unit/transform/test_moa_transform.py rename to tests/unit/transformers/test_moa_transformer.py index e2c1b5ac..5aaa617e 100644 --- a/tests/unit/transform/test_moa_transform.py +++ b/tests/unit/transformers/test_moa_transformer.py @@ -4,26 +4,26 @@ import pytest import pytest_asyncio -from tests.conftest import TEST_TRANSFORM_DIR +from tests.conftest import TEST_TRANSFORMERS_DIR -from metakb.transform.moa import MoaTransform +from metakb.transformers.moa import MoaTransformer FILENAME = "moa_cdm.json" @pytest_asyncio.fixture(scope="module") async def data(normalizers): - """Create a MOA Transform test fixture.""" - harvester_path = TEST_TRANSFORM_DIR / "moa_harvester.json" - moa = MoaTransform( - data_dir=TEST_TRANSFORM_DIR, + """Create a MOA Transformer test fixture.""" + harvester_path = TEST_TRANSFORMERS_DIR / "moa_harvester.json" + moa = MoaTransformer( + data_dir=TEST_TRANSFORMERS_DIR, harvester_path=harvester_path, normalizers=normalizers, ) harvested_data = moa.extract_harvested_data() await moa.transform(harvested_data) - moa.create_json(cdm_filepath=TEST_TRANSFORM_DIR / FILENAME) - with (TEST_TRANSFORM_DIR / FILENAME).open() as f: + moa.create_json(cdm_filepath=TEST_TRANSFORMERS_DIR / FILENAME) + with (TEST_TRANSFORMERS_DIR / FILENAME).open() as f: return json.load(f) @@ -195,5 +195,5 @@ def studies(moa_aid66_study, moa_aid155_study): def test_moa_cdm(data, studies, check_transformed_cdm): - """Test that moa transform works correctly.""" - check_transformed_cdm(data, studies, TEST_TRANSFORM_DIR / FILENAME) + """Test that moa transformation works correctly.""" + check_transformed_cdm(data, studies, TEST_TRANSFORMERS_DIR / FILENAME)