diff --git a/cumulus/loaders/i2b2/loader.py b/cumulus/loaders/i2b2/loader.py index 10e72845..31899084 100644 --- a/cumulus/loaders/i2b2/loader.py +++ b/cumulus/loaders/i2b2/loader.py @@ -4,11 +4,10 @@ import os import tempfile from functools import partial -from typing import Callable, Iterable, Iterator, List, TypeVar +from typing import Callable, Iterable, List, TypeVar from fhirclient.models.resource import Resource -from cumulus import common from cumulus.loaders.base import Loader from cumulus.loaders.i2b2 import extract, schema, transform from cumulus.loaders.i2b2.oracle import extract as oracle_extract @@ -113,30 +112,18 @@ def _loop(self, i2b2_entries: Iterable[schema.Dimension], to_fhir: I2b2ToFhirCal # ################################################################################################################### - @staticmethod - def _extract_csv_files(extractor: CsvToI2b2Callable, csv_files: Iterable[str]) -> Iterator[schema.Dimension]: - """Generator method that lazily loads a list of input csv files""" - for csv_file in csv_files: - for entry in extractor(csv_file): - yield entry - - def _extract_csv_dir(self, folder: str, extractor: CsvToI2b2Callable) -> Iterator[schema.Dimension]: - """Generator method that lazily loads all input csv files in the given folder""" - csv_files = common.list_csv(folder) - return self._extract_csv_files(extractor, csv_files) - def _load_all_from_csv(self, resources: List[str]) -> tempfile.TemporaryDirectory: path = self.root.path return self._load_all_with_extractors( resources, - conditions=partial(self._extract_csv_dir, os.path.join(path, 'csv_diagnosis'), - extract.extract_csv_observation_facts), - observations=partial(self._extract_csv_dir, os.path.join(path, 'csv_lab'), - extract.extract_csv_observation_facts), - documentreferences=partial(self._extract_csv_dir, os.path.join(path, 'csv_note'), - extract.extract_csv_observation_facts), - patients=partial(self._extract_csv_dir, os.path.join(path, 'csv_patient'), extract.extract_csv_patients), - encounters=partial(self._extract_csv_dir, os.path.join(path, 'csv_visit'), extract.extract_csv_visits), + conditions=partial(extract.extract_csv_observation_facts, + os.path.join(path, 'observation_fact_diagnosis.csv')), + observations=partial(extract.extract_csv_observation_facts, + os.path.join(path, 'observation_fact_lab_views.csv')), + documentreferences=partial(extract.extract_csv_observation_facts, + os.path.join(path, 'observation_fact_notes.csv')), + patients=partial(extract.extract_csv_patients, os.path.join(path, 'patient_dimension.csv')), + encounters=partial(extract.extract_csv_visits, os.path.join(path, 'visit_dimension.csv')), ) ################################################################################################################### diff --git a/tests/data/simple/i2b2-input/csv_diagnosis/diagnosis1.csv b/tests/data/simple/i2b2-input/csv_diagnosis/diagnosis1.csv deleted file mode 100644 index 391351f1..00000000 --- a/tests/data/simple/i2b2-input/csv_diagnosis/diagnosis1.csv +++ /dev/null @@ -1,2 +0,0 @@ -"ENCOUNTER_NUM","PATIENT_NUM","CONCEPT_CD","PROVIDER_ID","START_DATE","MODIFIER_CD","INSTANCE_NUM","VALTYPE_CD","TVAL_CHAR","NVAL_NUM","VALUEFLAG_CD","QUANTITY_NUM","UNITS_CD","END_DATE","LOCATION_CD","OBSERVATION_BLOB","CONFIDENCE_NUM","UPDATE_DATE","DOWNLOAD_DATE","IMPORT_DATE","SOURCESYSTEM_CD","UPLOAD_ID","TEXT_SEARCH_INDEX" -21,312345,ICD10:R40.2412,"51",2010-03-01 10:00:00.000,ICD:S,41,T,Final,,@,,,,,"",,,,2021-03-20 00:00:00.000,DIAG_EPIC_ADT,, diff --git a/tests/data/simple/i2b2-input/csv_patient/patient1.csv b/tests/data/simple/i2b2-input/csv_patient/patient1.csv deleted file mode 100644 index 7a6d635d..00000000 --- a/tests/data/simple/i2b2-input/csv_patient/patient1.csv +++ /dev/null @@ -1,2 +0,0 @@ -"PATIENT_NUM","VITAL_STATUS_CD","BIRTH_DATE","DEATH_DATE","SEX_CD","AGE_IN_YEARS_NUM","LANGUAGE_CD","RACE_CD","MARITAL_STATUS_CD","RELIGION_CD","ZIP_CD","STATECITYZIP_PATH","INCOME_CD","PATIENT_BLOB","UPDATE_DATE","DOWNLOAD_DATE","IMPORT_DATE","SOURCESYSTEM_CD","UPLOAD_ID","PCP_PROVIDER_ID" -323456,"0",1982-10-16 12:00:00.000,,M,39,English,White,Single,Other,"02139",,,[NULL],,,,,,"1111111" diff --git a/tests/data/simple/i2b2-input/csv_diagnosis/diagnosis2.csv b/tests/data/simple/i2b2-input/observation_fact_diagnosis.csv similarity index 78% rename from tests/data/simple/i2b2-input/csv_diagnosis/diagnosis2.csv rename to tests/data/simple/i2b2-input/observation_fact_diagnosis.csv index 3a6c8d9f..1e20544c 100644 --- a/tests/data/simple/i2b2-input/csv_diagnosis/diagnosis2.csv +++ b/tests/data/simple/i2b2-input/observation_fact_diagnosis.csv @@ -1,2 +1,3 @@ "ENCOUNTER_NUM","PATIENT_NUM","CONCEPT_CD","PROVIDER_ID","START_DATE","MODIFIER_CD","INSTANCE_NUM","VALTYPE_CD","TVAL_CHAR","NVAL_NUM","VALUEFLAG_CD","QUANTITY_NUM","UNITS_CD","END_DATE","LOCATION_CD","OBSERVATION_BLOB","CONFIDENCE_NUM","UPDATE_DATE","DOWNLOAD_DATE","IMPORT_DATE","SOURCESYSTEM_CD","UPLOAD_ID","TEXT_SEARCH_INDEX" +21,312345,ICD10:R40.2412,"51",2010-03-01 10:00:00.000,ICD:S,41,T,Final,,@,,,,,"",,,,2021-03-20 00:00:00.000,DIAG_EPIC_ADT,, 212,3123456,ICD10:R40.2412,"512",2010-03-02 10:00:00.000,ICD:S,412,T,Final,,@,,,,,"",,,,2021-03-21 00:00:00.000,DIAG_EPIC_ADT,, diff --git a/tests/data/simple/i2b2-input/csv_lab/lab1.csv b/tests/data/simple/i2b2-input/observation_fact_lab_views.csv similarity index 100% rename from tests/data/simple/i2b2-input/csv_lab/lab1.csv rename to tests/data/simple/i2b2-input/observation_fact_lab_views.csv diff --git a/tests/data/simple/i2b2-input/csv_note/note1.csv b/tests/data/simple/i2b2-input/observation_fact_notes.csv similarity index 100% rename from tests/data/simple/i2b2-input/csv_note/note1.csv rename to tests/data/simple/i2b2-input/observation_fact_notes.csv diff --git a/tests/data/simple/i2b2-input/csv_patient/patient2.csv b/tests/data/simple/i2b2-input/patient_dimension.csv similarity index 79% rename from tests/data/simple/i2b2-input/csv_patient/patient2.csv rename to tests/data/simple/i2b2-input/patient_dimension.csv index 403dea01..eba811f3 100644 --- a/tests/data/simple/i2b2-input/csv_patient/patient2.csv +++ b/tests/data/simple/i2b2-input/patient_dimension.csv @@ -1,2 +1,3 @@ "PATIENT_NUM","VITAL_STATUS_CD","BIRTH_DATE","DEATH_DATE","SEX_CD","AGE_IN_YEARS_NUM","LANGUAGE_CD","RACE_CD","MARITAL_STATUS_CD","RELIGION_CD","ZIP_CD","STATECITYZIP_PATH","INCOME_CD","PATIENT_BLOB","UPDATE_DATE","DOWNLOAD_DATE","IMPORT_DATE","SOURCESYSTEM_CD","UPLOAD_ID","PCP_PROVIDER_ID" +323456,"0",1982-10-16 12:00:00.000,,M,39,English,White,Single,Other,"02139",,,[NULL],,,,,,"1111111" 3234567,"0",1983-10-16 12:00:00.000,,M,39,English,White,Single,Other,"02139",,,[NULL],,,,,,"1111111" diff --git a/tests/data/simple/i2b2-input/csv_visit/visit1.csv b/tests/data/simple/i2b2-input/visit_dimension.csv similarity index 100% rename from tests/data/simple/i2b2-input/csv_visit/visit1.csv rename to tests/data/simple/i2b2-input/visit_dimension.csv diff --git a/tests/test_etl.py b/tests/test_etl.py index 4b1d1fe4..ce0c9a12 100644 --- a/tests/test_etl.py +++ b/tests/test_etl.py @@ -438,7 +438,7 @@ def path_for_checksum(self, checksum): def test_stores_cached_json(self): self.run_etl(output_format='parquet') - notes_csv_path = os.path.join(self.input_path, 'csv_note', 'note1.csv') + notes_csv_path = os.path.join(self.input_path, 'observation_fact_notes.csv') facts = extract.extract_csv_observation_facts(notes_csv_path) for index, checksum in enumerate(self.expected_checksums):