Skip to content

Commit

Permalink
Merge pull request #106 from smart-on-fhir/mikix/i2b2-tweaks
Browse files Browse the repository at this point in the history
i2b2: change the expected csv filenames
  • Loading branch information
mikix authored Dec 21, 2022
2 parents 92e5168 + bd86b8d commit e000c23
Show file tree
Hide file tree
Showing 9 changed files with 12 additions and 27 deletions.
31 changes: 9 additions & 22 deletions cumulus/loaders/i2b2/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@
import os
import tempfile
from functools import partial
from typing import Callable, Iterable, Iterator, List, TypeVar
from typing import Callable, Iterable, List, TypeVar

from fhirclient.models.resource import Resource

from cumulus import common
from cumulus.loaders.base import Loader
from cumulus.loaders.i2b2 import extract, schema, transform
from cumulus.loaders.i2b2.oracle import extract as oracle_extract
Expand Down Expand Up @@ -113,30 +112,18 @@ def _loop(self, i2b2_entries: Iterable[schema.Dimension], to_fhir: I2b2ToFhirCal
#
###################################################################################################################

@staticmethod
def _extract_csv_files(extractor: CsvToI2b2Callable, csv_files: Iterable[str]) -> Iterator[schema.Dimension]:
"""Generator method that lazily loads a list of input csv files"""
for csv_file in csv_files:
for entry in extractor(csv_file):
yield entry

def _extract_csv_dir(self, folder: str, extractor: CsvToI2b2Callable) -> Iterator[schema.Dimension]:
"""Generator method that lazily loads all input csv files in the given folder"""
csv_files = common.list_csv(folder)
return self._extract_csv_files(extractor, csv_files)

def _load_all_from_csv(self, resources: List[str]) -> tempfile.TemporaryDirectory:
path = self.root.path
return self._load_all_with_extractors(
resources,
conditions=partial(self._extract_csv_dir, os.path.join(path, 'csv_diagnosis'),
extract.extract_csv_observation_facts),
observations=partial(self._extract_csv_dir, os.path.join(path, 'csv_lab'),
extract.extract_csv_observation_facts),
documentreferences=partial(self._extract_csv_dir, os.path.join(path, 'csv_note'),
extract.extract_csv_observation_facts),
patients=partial(self._extract_csv_dir, os.path.join(path, 'csv_patient'), extract.extract_csv_patients),
encounters=partial(self._extract_csv_dir, os.path.join(path, 'csv_visit'), extract.extract_csv_visits),
conditions=partial(extract.extract_csv_observation_facts,
os.path.join(path, 'observation_fact_diagnosis.csv')),
observations=partial(extract.extract_csv_observation_facts,
os.path.join(path, 'observation_fact_lab_views.csv')),
documentreferences=partial(extract.extract_csv_observation_facts,
os.path.join(path, 'observation_fact_notes.csv')),
patients=partial(extract.extract_csv_patients, os.path.join(path, 'patient_dimension.csv')),
encounters=partial(extract.extract_csv_visits, os.path.join(path, 'visit_dimension.csv')),
)

###################################################################################################################
Expand Down
2 changes: 0 additions & 2 deletions tests/data/simple/i2b2-input/csv_diagnosis/diagnosis1.csv

This file was deleted.

2 changes: 0 additions & 2 deletions tests/data/simple/i2b2-input/csv_patient/patient1.csv

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
"ENCOUNTER_NUM","PATIENT_NUM","CONCEPT_CD","PROVIDER_ID","START_DATE","MODIFIER_CD","INSTANCE_NUM","VALTYPE_CD","TVAL_CHAR","NVAL_NUM","VALUEFLAG_CD","QUANTITY_NUM","UNITS_CD","END_DATE","LOCATION_CD","OBSERVATION_BLOB","CONFIDENCE_NUM","UPDATE_DATE","DOWNLOAD_DATE","IMPORT_DATE","SOURCESYSTEM_CD","UPLOAD_ID","TEXT_SEARCH_INDEX"
21,312345,ICD10:R40.2412,"51",2010-03-01 10:00:00.000,ICD:S,41,T,Final,,@,,,,,"",,,,2021-03-20 00:00:00.000,DIAG_EPIC_ADT,,
212,3123456,ICD10:R40.2412,"512",2010-03-02 10:00:00.000,ICD:S,412,T,Final,,@,,,,,"",,,,2021-03-21 00:00:00.000,DIAG_EPIC_ADT,,
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
"PATIENT_NUM","VITAL_STATUS_CD","BIRTH_DATE","DEATH_DATE","SEX_CD","AGE_IN_YEARS_NUM","LANGUAGE_CD","RACE_CD","MARITAL_STATUS_CD","RELIGION_CD","ZIP_CD","STATECITYZIP_PATH","INCOME_CD","PATIENT_BLOB","UPDATE_DATE","DOWNLOAD_DATE","IMPORT_DATE","SOURCESYSTEM_CD","UPLOAD_ID","PCP_PROVIDER_ID"
323456,"0",1982-10-16 12:00:00.000,,M,39,English,White,Single,Other,"02139",,,[NULL],,,,,,"1111111"
3234567,"0",1983-10-16 12:00:00.000,,M,39,English,White,Single,Other,"02139",,,[NULL],,,,,,"1111111"
2 changes: 1 addition & 1 deletion tests/test_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def path_for_checksum(self, checksum):
def test_stores_cached_json(self):
self.run_etl(output_format='parquet')

notes_csv_path = os.path.join(self.input_path, 'csv_note', 'note1.csv')
notes_csv_path = os.path.join(self.input_path, 'observation_fact_notes.csv')
facts = extract.extract_csv_observation_facts(notes_csv_path)

for index, checksum in enumerate(self.expected_checksums):
Expand Down

0 comments on commit e000c23

Please sign in to comment.