Skip to content

Commit

Permalink
Allow output file to be zipped (#56)
Browse files Browse the repository at this point in the history
Allow output file to be compressed using a user-specified format
  • Loading branch information
pipliggins authored Jun 28, 2024
1 parent d803afa commit 7c56d21
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 0 deletions.
15 changes: 15 additions & 0 deletions fhirflat/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import argparse
import hashlib
import os
import shutil
import timeit
import warnings
from datetime import datetime
Expand Down Expand Up @@ -431,6 +432,7 @@ def convert_data_to_flat(
mapping_files_types: tuple[dict, dict] | None = None,
sheet_id: str | None = None,
subject_id="subjid",
compress_format: None | str = None,
):
"""
Takes raw clinical data (currently assumed to be a one-row-per-patient format like
Expand Down Expand Up @@ -458,6 +460,8 @@ def convert_data_to_flat(
be named by resource, and contain the mapping for that resource.
subject_id: str
The name of the column containing the subject ID in the data file.
compress_format: optional str
If the output folder should be zipped, and if so with what format.
"""

if not mapping_files_types and not sheet_id:
Expand Down Expand Up @@ -536,6 +540,9 @@ def convert_data_to_flat(
)

write_metadata(*generate_metadata(folder_name), Path(folder_name) / "fhirflat.toml")
if compress_format:
shutil.make_archive(folder_name, compress_format, folder_name)
shutil.rmtree(folder_name)


def main():
Expand Down Expand Up @@ -567,6 +574,13 @@ def main():
default="subjid",
)

parser.add_argument(
"-c",
"--compress",
help="Compress the output folder using this format",
choices=["zip", "tar", "gztar", "bztar", "xztar"],
)

args = parser.parse_args()

convert_data_to_flat(
Expand All @@ -576,6 +590,7 @@ def main():
folder_name=args.output,
sheet_id=args.sheet_id,
subject_id=args.subject_id,
compress_format=args.compress,
)


Expand Down
21 changes: 21 additions & 0 deletions tests/test_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,6 +998,27 @@ def test_convert_data_to_flat_local_mapping():
shutil.rmtree(output_folder)


def test_convert_data_to_flat_local_mapping_zipped():
output_folder = "tests/ingestion_output"
mappings = {
Encounter: "tests/dummy_data/encounter_dummy_mapping.csv",
}
resource_types = {"Encounter": "one-to-one"}

convert_data_to_flat(
"tests/dummy_data/combined_dummy_data.csv",
folder_name=output_folder,
date_format="%Y-%m-%d",
timezone="Brazil/East",
mapping_files_types=(mappings, resource_types),
compress_format="zip",
)

assert os.path.exists("tests/ingestion_output.zip")

os.remove("tests/ingestion_output.zip")


def test_ingest_to_flat_validation_errors():
df = pd.DataFrame(
{
Expand Down

0 comments on commit 7c56d21

Please sign in to comment.