From 8760f5162093edecf2fdd44b83e4051b5440131b Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Thu, 3 Oct 2024 13:13:16 -0700 Subject: [PATCH] Add option to split command that removes prefixes in filenames generated from field values --- src/koza/cli_utils.py | 4 ++++ src/koza/main.py | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/koza/cli_utils.py b/src/koza/cli_utils.py index 01d2f4b..e62b7e9 100644 --- a/src/koza/cli_utils.py +++ b/src/koza/cli_utils.py @@ -130,6 +130,7 @@ def _check_row_count(type: Literal["node", "edge"]): def split_file(file: str, fields: str, format: OutputFormat = OutputFormat.tsv, + remove_prefixes: bool = False, output_dir: str = "./output"): db = duckdb.connect(":memory:") @@ -146,6 +147,9 @@ def split_file(file: str, list_of_value_dicts = [dict(zip(keys, v)) for v in values] def clean_value_for_filename(value): + if remove_prefixes and ':' in value: + value = value.split(":")[-1] + return value.replace("biolink:", "").replace(" ", "_").replace(":", "_") def generate_filename_from_row(row): diff --git a/src/koza/main.py b/src/koza/main.py index 9997801..fa4a27b 100755 --- a/src/koza/main.py +++ b/src/koza/main.py @@ -69,10 +69,11 @@ def validate( def split( file: str = typer.Argument(..., help="Path to the source kgx file to be split"), fields: str = typer.Argument(..., help="Comma separated list of fields to split on"), + remove_prefixes: bool = typer.Option(False, help="Remove prefixes from the file names for values from the specified fields. (e.g, NCBIGene:9606 becomes 9606"), output_dir: str = typer.Option(default="output", help="Path to output directory"), ): """Split a file by fields""" - split_file(file, fields, output_dir=output_dir) + split_file(file, fields,remove_prefixes=remove_prefixes, output_dir=output_dir) if __name__ == "__main__": typer_app()