Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore!: rename transform class and dirs to transformers #396

Merged
merged 2 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
metakb.transform.civic
======================
metakb.transformers.base
========================

.. automodule:: metakb.transform.civic
.. automodule:: metakb.transformers.base
:members:
:undoc-members:
:special-members: __init__
:exclude-members: model_fields, model_config, model_computed_fields
:exclude-members: model_fields, model_config, model_computed_fields
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
metakb.transform.moa
====================
metakb.transformers.civic
=========================

.. automodule:: metakb.transform.moa
.. automodule:: metakb.transformers.civic
:members:
:undoc-members:
:special-members: __init__
:exclude-members: model_fields, model_config, model_computed_fields
:exclude-members: model_fields, model_config, model_computed_fields
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
metakb.transform.base
=====================
metakb.transformers.moa
=======================

.. automodule:: metakb.transform.base
.. automodule:: metakb.transformers.moa
:members:
:undoc-members:
:special-members: __init__
:exclude-members: model_fields, model_config, model_computed_fields
:exclude-members: model_fields, model_config, model_computed_fields
6 changes: 3 additions & 3 deletions docs/source/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,6 @@ Transformers
:toctree: api/
:template: module_summary.rst

metakb.transform.base
metakb.transform.civic
metakb.transform.moa
metakb.transformers.base
metakb.transformers.civic
metakb.transformers.moa
24 changes: 13 additions & 11 deletions src/metakb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
)
from metakb.normalizers import check_normalizers as check_normalizer_health
from metakb.schemas.app import SourceName
from metakb.transform import CivicTransform, MoaTransform
from metakb.transformers import CivicTransformer, MoaTransformer

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -451,12 +451,12 @@ def load_cdm(

for src in sorted([s.value for s in SourceName]):
pattern = f"{src}_cdm_{version}.json"
globbed = (APP_ROOT / "data" / src / "transform").glob(pattern)
globbed = (APP_ROOT / "data" / src / "transformers").glob(pattern)

try:
path = sorted(globbed)[-1]
except IndexError as e:
msg = f"No valid transform file found matching pattern: {pattern}"
msg = f"No valid transformation file found matching pattern: {pattern}"
raise FileNotFoundError(msg) from e

load_from_json(path, driver)
Expand Down Expand Up @@ -534,12 +534,12 @@ async def update(
sources = tuple(SourceName)
for src in sorted([s.value for s in sources]):
pattern = f"{src}_cdm_*.json"
globbed = (APP_ROOT / "data" / src / "transform").glob(pattern)
globbed = (APP_ROOT / "data" / src / "transformers").glob(pattern)

try:
path = sorted(globbed)[-1]
except IndexError as e:
msg = f"No valid transform file found matching pattern: {pattern}"
msg = f"No valid transformation files found matching pattern: {pattern}"
raise FileNotFoundError(msg) from e

load_from_json(path, driver)
Expand Down Expand Up @@ -621,19 +621,21 @@ async def _transform_source(
:param output_directory: custom directory to store output to -- use source defaults
if not given
"""
transform_sources = {
SourceName.CIVIC: CivicTransform,
SourceName.MOA: MoaTransform,
transformer_sources = {
SourceName.CIVIC: CivicTransformer,
SourceName.MOA: MoaTransformer,
}
_echo_info(f"Transforming {source.as_print_case()}...")
start = timer()
transformer: CivicTransform | MoaTransform = transform_sources[source](
transformer: CivicTransformer | MoaTransformer = transformer_sources[source](
normalizers=normalizer_handler, harvester_path=harvest_file
)
harvested_data = transformer.extract_harvested_data()
await transformer.transform(harvested_data)
end = timer()
_echo_info(f"{source.as_print_case()} transform finished in {(end - start):.2f} s.")
_echo_info(
f"{source.as_print_case()} transformation finished in {(end - start):.2f} s."
)
output_file = (
output_directory / f"{source.value}_cdm_{_current_date_string()}.json"
if output_directory
Expand Down Expand Up @@ -715,7 +717,7 @@ def _retrieve_s3_cdms() -> str:
with tmp_path.open("wb") as f:
file.Object().download_fileobj(f)

cdm_dir = APP_ROOT / "data" / source / "transform"
cdm_dir = APP_ROOT / "data" / source / "transformers"
cdm_zip = ZipFile(tmp_path, "r")
cdm_zip.extract(f"{source}_cdm_{newest_version}.json", cdm_dir)

Expand Down
4 changes: 0 additions & 4 deletions src/metakb/transform/__init__.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Transformations
We take the harvested JSON from each source and transform this to our common data model.
We take the harvested JSON from each source and transform this to our common data model.


### Using the transformation modules
The VICC normalizers must first be installed.
The VICC normalizers must first be installed.

```
pip install thera-py
Expand Down Expand Up @@ -34,4 +34,4 @@ python3 -m gene.cli --normalizer="hgnc"
[disease-normalizer](https://github.com/cancervariants/disease-normalization)
```
python3 -m disease.cli --update_all --update_merged
```
```
4 changes: 4 additions & 0 deletions src/metakb/transformers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""Transformations for sources."""

from .civic import CivicTransformer # noqa: F401
from .moa import MoaTransformer # noqa: F401
18 changes: 9 additions & 9 deletions src/metakb/transform/base.py → src/metakb/transformers/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""A module for the Transform base class."""
"""A module for the Transformer base class."""

import datetime
import json
Expand Down Expand Up @@ -111,7 +111,7 @@ class TransformedData(BaseModel):
documents: list[Document] = []


class Transform(ABC):
class Transformer(ABC):
"""A base class for transforming harvester data."""

_methods: ClassVar[list[Method]] = [
Expand Down Expand Up @@ -234,13 +234,13 @@ def __init__(
harvester_path: Path | None = None,
normalizers: ViccNormalizers | None = None,
) -> None:
"""Initialize Transform base class.
"""Initialize Transformer base class.

:param Path data_dir: Path to source data directory
:param Optional[Path] harvester_path: Path to previously harvested data
:param ViccNormalizers normalizers: normalizer collection instance
"""
self.name = self.__class__.__name__.lower().split("transform")[0]
self.name = self.__class__.__name__.lower().split("transformer")[0]
self.data_dir = data_dir / self.name
self.harvester_path = harvester_path

Expand Down Expand Up @@ -362,7 +362,7 @@ def _get_combination_therapy(
Combination Therapy
"""
components = []
source_name = type(self).__name__.lower().replace("transform", "")
source_name = type(self).__name__.lower().replace("transformer", "")

for therapy in therapies:
if source_name == SourceName.MOA:
Expand Down Expand Up @@ -516,15 +516,15 @@ def create_json(self, cdm_filepath: Path | None = None) -> None:

:param cdm_filepath: Path to the JSON file where the CDM data will be
stored. If not provided, will use the default path of
``<APP_ROOT>/data/<src_name>/transform/<src_name>_cdm_YYYYMMDD.json``
``<APP_ROOT>/data/<src_name>/transformers/<src_name>_cdm_YYYYMMDD.json``
"""
if not cdm_filepath:
transform_dir = self.data_dir / "transform"
transform_dir.mkdir(exist_ok=True, parents=True)
transformers_dir = self.data_dir / "transformers"
transformers_dir.mkdir(exist_ok=True, parents=True)
today = datetime.datetime.strftime(
datetime.datetime.now(tz=datetime.timezone.utc), DATE_FMT
)
cdm_filepath = transform_dir / f"{self.name}_cdm_{today}.json"
cdm_filepath = transformers_dir / f"{self.name}_cdm_{today}.json"

with cdm_filepath.open("w+") as f:
json.dump(self.processed_data.model_dump(exclude_none=True), f, indent=2)
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
VariantTherapeuticResponseStudyPredicate,
_VariantOncogenicityStudyQualifier,
)
from metakb.transform.base import (
from metakb.transformers.base import (
CivicEvidenceLevel,
MethodId,
TherapeuticProcedureType,
Transform,
Transformer,
)

_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -99,7 +99,7 @@ class SourcePrefix(str, Enum):
ASH = "ASH"


class CivicTransform(Transform):
class CivicTransformer(Transformer):
"""A class for transforming CIViC to the common data model."""

def __init__(
Expand All @@ -108,7 +108,7 @@ def __init__(
harvester_path: Path | None = None,
normalizers: ViccNormalizers | None = None,
) -> None:
"""Initialize CIViC Transform class.
"""Initialize CIViC Transformer class.

:param data_dir: Path to source data directory
:param harvester_path: Path to previously harvested CIViC data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,17 @@
VariantTherapeuticResponseStudyPredicate,
_VariantOncogenicityStudyQualifier,
)
from metakb.transform.base import (
from metakb.transformers.base import (
MethodId,
MoaEvidenceLevel,
TherapeuticProcedureType,
Transform,
Transformer,
)

logger = logging.getLogger(__name__)


class MoaTransform(Transform):
class MoaTransformer(Transformer):
"""A class for transforming MOA resources to common data model."""

def __init__(
Expand All @@ -44,7 +44,7 @@ def __init__(
harvester_path: Path | None = None,
normalizers: ViccNormalizers | None = None,
) -> None:
"""Initialize MOAlmanac Transform class.
"""Initialize MOAlmanac Transformer class.

:param data_dir: Path to source data directory
:param harvester_path: Path to previously harvested MOA data
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

TEST_DATA_DIR = Path(__file__).resolve().parents[0] / "data"
TEST_HARVESTERS_DIR = TEST_DATA_DIR / "harvesters"
TEST_TRANSFORM_DIR = TEST_DATA_DIR / "transform"
TEST_TRANSFORMERS_DIR = TEST_DATA_DIR / "transformers"


def pytest_addoption(parser):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -433,4 +433,4 @@
]
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -257,4 +257,4 @@
]
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@

import pytest
import pytest_asyncio
from tests.conftest import TEST_TRANSFORM_DIR
from tests.conftest import TEST_TRANSFORMERS_DIR

from metakb.transform.civic import CivicTransform
from metakb.transformers.civic import CivicTransformer

DATA_DIR = TEST_TRANSFORM_DIR / "diagnostic"
DATA_DIR = TEST_TRANSFORMERS_DIR / "diagnostic"
FILENAME = "civic_cdm.json"


@pytest_asyncio.fixture(scope="module")
async def data(normalizers):
"""Create a CIViC Transform test fixture."""
"""Create a CIViC Transformer test fixture."""
harvester_path = DATA_DIR / "civic_harvester.json"
c = CivicTransform(
c = CivicTransformer(
data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers
)
await c.transform()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@

import pytest
import pytest_asyncio
from tests.conftest import TEST_TRANSFORM_DIR
from tests.conftest import TEST_TRANSFORMERS_DIR

from metakb.transform.civic import CivicTransform
from metakb.transformers.civic import CivicTransformer

DATA_DIR = TEST_TRANSFORM_DIR / "prognostic"
DATA_DIR = TEST_TRANSFORMERS_DIR / "prognostic"
FILENAME = "civic_cdm.json"


@pytest_asyncio.fixture(scope="module")
async def data(normalizers):
"""Create a CIViC Transform test fixture."""
"""Create a CIViC Transformer test fixture."""
harvester_path = DATA_DIR / "civic_harvester.json"
c = CivicTransform(
c = CivicTransformer(
data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers
)
await c.transform()
Expand Down Expand Up @@ -79,7 +79,7 @@ def test_civic_cdm(
check_method,
check_transformed_cdm,
):
"""Test that civic transform works correctly."""
"""Test that civic transformation works correctly."""
check_transformed_cdm(
data,
statements,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@

import pytest
import pytest_asyncio
from tests.conftest import TEST_TRANSFORM_DIR
from tests.conftest import TEST_TRANSFORMERS_DIR

from metakb.transform.civic import CivicTransform
from metakb.transformers.civic import CivicTransformer

DATA_DIR = TEST_TRANSFORM_DIR / "therapeutic"
DATA_DIR = TEST_TRANSFORMERS_DIR / "therapeutic"
FILENAME = "civic_cdm.json"


@pytest_asyncio.fixture(scope="module")
async def data(normalizers):
"""Create a CIViC Transform test fixture."""
"""Create a CIViC Transformer test fixture."""
harvester_path = DATA_DIR / "civic_harvester.json"
c = CivicTransform(
c = CivicTransformer(
data_dir=DATA_DIR, harvester_path=harvester_path, normalizers=normalizers
)
harvested_data = c.extract_harvested_data()
Expand All @@ -33,5 +33,5 @@ def studies(civic_eid2997_study, civic_eid816_study, civic_eid9851_study):


def test_civic_cdm(data, studies, check_transformed_cdm):
"""Test that civic transform works correctly."""
"""Test that civic transformation works correctly."""
check_transformed_cdm(data, studies, DATA_DIR / FILENAME)
Loading
Loading