diff --git a/chart_review/__init__.py b/chart_review/__init__.py index 2b9210a..d32c983 100644 --- a/chart_review/__init__.py +++ b/chart_review/__init__.py @@ -1,3 +1,3 @@ """Chart Review public entry point""" -__version__ = "1.3.0" +__version__ = "2.0.0" diff --git a/chart_review/cli.py b/chart_review/cli.py index 88a71de..0fd1203 100644 --- a/chart_review/cli.py +++ b/chart_review/cli.py @@ -3,23 +3,18 @@ import argparse import sys -import chart_review -from chart_review.commands import accuracy, info +from chart_review.commands import accuracy, default, ids, labels def define_parser() -> argparse.ArgumentParser: """Fills out an argument parser with all the CLI options.""" parser = argparse.ArgumentParser() + default.make_subparser(parser) - parser.add_argument( - "--version", - action="version", - version=f"chart-review {chart_review.__version__}", - ) - - subparsers = parser.add_subparsers(required=True) - accuracy.make_subparser(subparsers.add_parser("accuracy")) - info.make_subparser(subparsers.add_parser("info")) + subparsers = parser.add_subparsers() + accuracy.make_subparser(subparsers.add_parser("accuracy", help="calculate F1 and Kappa scores")) + ids.make_subparser(subparsers.add_parser("ids", help="map Label Studio IDs to FHIR IDs")) + labels.make_subparser(subparsers.add_parser("labels", help="show label usage by annotator")) return parser diff --git a/chart_review/cli_utils.py b/chart_review/cli_utils.py index 7e441bd..ff63584 100644 --- a/chart_review/cli_utils.py +++ b/chart_review/cli_utils.py @@ -2,18 +2,30 @@ import argparse +from chart_review import cohort, config -def add_project_args(parser: argparse.ArgumentParser) -> None: + +def add_project_args(parser: argparse.ArgumentParser, is_global: bool = False) -> None: group = parser.add_argument_group("configuration") group.add_argument( "--project-dir", - default=".", + "-p", + default=None if is_global else argparse.SUPPRESS, metavar="DIR", help=( - "Directory holding project files, " + "directory holding project files, " "like labelstudio-export.json (default: current dir)" ), ) group.add_argument( - "--config", "-c", metavar="PATH", help="Config file (default: [project-dir]/config.yaml)" + "--config", + "-c", + default=None if is_global else argparse.SUPPRESS, + metavar="PATH", + help="config file (default: [project-dir]/config.yaml)", ) + + +def get_cohort_reader(args: argparse.Namespace) -> cohort.CohortReader: + proj_config = config.ProjectConfig(project_dir=args.project_dir, config_path=args.config) + return cohort.CohortReader(proj_config) diff --git a/chart_review/commands/default.py b/chart_review/commands/default.py new file mode 100644 index 0000000..17da27d --- /dev/null +++ b/chart_review/commands/default.py @@ -0,0 +1,47 @@ +"""Methods for showing config & calculated setup info.""" + +import argparse + +import rich +import rich.box +import rich.table + +import chart_review +from chart_review import cli_utils, console_utils + + +def print_info(args: argparse.Namespace) -> None: + """Show project information on the console.""" + reader = cli_utils.get_cohort_reader(args) + console = rich.get_console() + + # Charts + chart_table = rich.table.Table( + "Annotator", + "Chart Count", + "Chart IDs", + box=rich.box.ROUNDED, + ) + for annotator in sorted(reader.note_range): + notes = reader.note_range[annotator] + chart_table.add_row( + annotator, + str(len(notes)), + console_utils.pretty_note_range(notes), + ) + + console.print(chart_table) + console_utils.print_ignored_charts(reader) + + console.print() + console.print("Pass --help to see more options.") + + +def make_subparser(parser: argparse.ArgumentParser) -> None: + cli_utils.add_project_args(parser, is_global=True) + parser.add_argument( + "--version", + action="version", + version=f"chart-review {chart_review.__version__}", + ) + parser.set_defaults(func=print_info) diff --git a/chart_review/commands/ids.py b/chart_review/commands/ids.py new file mode 100644 index 0000000..780f934 --- /dev/null +++ b/chart_review/commands/ids.py @@ -0,0 +1,50 @@ +import argparse +import csv +import sys + +from chart_review import cli_utils + + +def make_subparser(parser: argparse.ArgumentParser) -> None: + cli_utils.add_project_args(parser) + parser.set_defaults(func=print_ids) + + +def print_ids(args: argparse.Namespace) -> None: + """ + Prints a mapping of all project IDs. + + Currently, this writes a CSV file to stdout. In the future, this could get fancier. + At the time of writing, it wasn't clear how to present the information in a way that + sensible to a casual console user - so I went with the more technical-oriented CSV file. + """ + reader = cli_utils.get_cohort_reader(args) + + writer = csv.writer(sys.stdout) + writer.writerow(["chart_id", "original_fhir_id", "anonymized_fhir_id"]) + + # IDS + for chart in reader.ls_export: + chart_id = str(chart["id"]) + chart_data = chart.get("data", {}) + printed = False + + # Grab encounters first + orig_id = f"Encounter/{chart_data['enc_id']}" if "enc_id" in chart_data else "" + anon_id = f"Encounter/{chart_data['anon_id']}" if "anon_id" in chart_data else "" + if orig_id or anon_id: + writer.writerow([chart_id, orig_id, anon_id]) + printed = True + + # Now each DocRef ID + for orig_id, anon_id in chart_data.get("docref_mappings", {}).items(): + writer.writerow( + [chart_id, f"DocumentReference/{orig_id}", f"DocumentReference/{anon_id}"] + ) + printed = True + + if not printed: + # Guarantee that every Chart ID shows up at least once - so it's clearer that the + # chart ID is included in the Label Studio export but that it does not have any + # IDs mapped to it. + writer.writerow([chart_id, None, None]) diff --git a/chart_review/commands/info.py b/chart_review/commands/info.py deleted file mode 100644 index 71fd1b3..0000000 --- a/chart_review/commands/info.py +++ /dev/null @@ -1,154 +0,0 @@ -"""Methods for showing config & calculated setup info.""" - -import argparse -import csv -import sys - -import rich -import rich.box -import rich.table -import rich.text -import rich.tree - -from chart_review import cli_utils, cohort, config, console_utils, types - - -def print_info(reader: cohort.CohortReader) -> None: - """ - Show project information on the console. - - :param reader: the cohort configuration - """ - console = rich.get_console() - - # Charts - chart_table = rich.table.Table( - "Annotator", - "Chart Count", - "Chart IDs", - box=rich.box.ROUNDED, - ) - for annotator in sorted(reader.note_range): - notes = reader.note_range[annotator] - chart_table.add_row( - annotator, - str(len(notes)), - console_utils.pretty_note_range(notes), - ) - - console.print(chart_table) - print_ignored_charts(reader) - - -def print_ids(reader: cohort.CohortReader) -> None: - """ - Prints a mapping of all project IDs. - - Currently, this writes a CSV file to stdout. In the future, this could get fancier. - At the time of writing, it wasn't clear how to present the information in a way that - sensible to a casual console user - so I went with the more technical-oriented CSV file. - """ - writer = csv.writer(sys.stdout) - writer.writerow(["chart_id", "original_fhir_id", "anonymized_fhir_id"]) - - # IDS - for chart in reader.ls_export: - chart_id = str(chart["id"]) - chart_data = chart.get("data", {}) - printed = False - - # Grab encounters first - orig_id = f"Encounter/{chart_data['enc_id']}" if "enc_id" in chart_data else "" - anon_id = f"Encounter/{chart_data['anon_id']}" if "anon_id" in chart_data else "" - if orig_id or anon_id: - writer.writerow([chart_id, orig_id, anon_id]) - printed = True - - # Now each DocRef ID - for orig_id, anon_id in chart_data.get("docref_mappings", {}).items(): - writer.writerow( - [chart_id, f"DocumentReference/{orig_id}", f"DocumentReference/{anon_id}"] - ) - printed = True - - if not printed: - # Guarantee that every Chart ID shows up at least once - so it's clearer that the - # chart ID is included in the Label Studio export but that it does not have any - # IDs mapped to it. - writer.writerow([chart_id, None, None]) - - -def print_labels(reader: cohort.CohortReader) -> None: - """ - Show label information on the console. - - :param reader: the cohort configuration - """ - # Calculate all label counts for each annotator - label_names = sorted(reader.class_labels, key=str.casefold) - label_notes: dict[str, dict[str, types.NoteSet]] = {} # annotator -> label -> note IDs - any_annotator_note_sets: dict[str, types.NoteSet] = {} - for annotator, mentions in reader.annotations.mentions.items(): - label_notes[annotator] = {} - for name in label_names: - note_ids = {note_id for note_id, labels in mentions.items() if name in labels} - label_notes[annotator][name] = note_ids - any_annotator_note_sets.setdefault(name, types.NoteSet()).update(note_ids) - - label_table = rich.table.Table( - "Annotator", - "Chart Count", - "Label", - box=rich.box.ROUNDED, - ) - - # First add summary entries, for counts across the union of all annotators - for name in label_names: - count = str(len(any_annotator_note_sets.get(name, {}))) - label_table.add_row(rich.text.Text("Any", style="italic"), count, name) - - # Now do each annotator as their own little boxed section - for annotator in sorted(label_notes.keys(), key=str.casefold): - label_table.add_section() - for name, note_set in label_notes[annotator].items(): - count = str(len(note_set)) - label_table.add_row(annotator, count, name) - - rich.get_console().print(label_table) - print_ignored_charts(reader) - - -def print_ignored_charts(reader: cohort.CohortReader): - """Prints a line about ignored charts, suitable for underlying a table""" - if not reader.ignored_notes: - return - - ignored_count = len(reader.ignored_notes) - chart_word = "chart" if ignored_count == 1 else "charts" - pretty_ranges = console_utils.pretty_note_range(reader.ignored_notes) - rich.get_console().print( - f" Ignoring {ignored_count} {chart_word} ({pretty_ranges})", - highlight=False, - style="italic", - ) - - -def make_subparser(parser: argparse.ArgumentParser) -> None: - cli_utils.add_project_args(parser) - mode = parser.add_mutually_exclusive_group() - mode.add_argument( - "--ids", action="store_true", help="Prints a CSV of ID mappings (chart & FHIR IDs)" - ) - mode.add_argument("--labels", action="store_true", help="Prints label info and usage") - parser.set_defaults(func=run_info) - - -def run_info(args: argparse.Namespace) -> None: - proj_config = config.ProjectConfig(args.project_dir, config_path=args.config) - reader = cohort.CohortReader(proj_config) - if args.ids: - print_ids(reader) - elif args.labels: - print_labels(reader) - else: - print_info(reader) diff --git a/chart_review/commands/labels.py b/chart_review/commands/labels.py new file mode 100644 index 0000000..29ed9f7 --- /dev/null +++ b/chart_review/commands/labels.py @@ -0,0 +1,51 @@ +import argparse + +import rich +import rich.box +import rich.table +import rich.text + +from chart_review import cli_utils, console_utils, types + + +def make_subparser(parser: argparse.ArgumentParser) -> None: + cli_utils.add_project_args(parser) + parser.set_defaults(func=print_labels) + + +def print_labels(args: argparse.Namespace) -> None: + """Show label information on the console.""" + reader = cli_utils.get_cohort_reader(args) + + # Calculate all label counts for each annotator + label_names = sorted(reader.class_labels, key=str.casefold) + label_notes: dict[str, dict[str, types.NoteSet]] = {} # annotator -> label -> note IDs + any_annotator_note_sets: dict[str, types.NoteSet] = {} + for annotator, mentions in reader.annotations.mentions.items(): + label_notes[annotator] = {} + for name in label_names: + note_ids = {note_id for note_id, labels in mentions.items() if name in labels} + label_notes[annotator][name] = note_ids + any_annotator_note_sets.setdefault(name, types.NoteSet()).update(note_ids) + + label_table = rich.table.Table( + "Annotator", + "Chart Count", + "Label", + box=rich.box.ROUNDED, + ) + + # First add summary entries, for counts across the union of all annotators + for name in label_names: + count = str(len(any_annotator_note_sets.get(name, {}))) + label_table.add_row(rich.text.Text("Any", style="italic"), count, name) + + # Now do each annotator as their own little boxed section + for annotator in sorted(label_notes.keys(), key=str.casefold): + label_table.add_section() + for name, note_set in label_notes[annotator].items(): + count = str(len(note_set)) + label_table.add_row(annotator, count, name) + + rich.get_console().print(label_table) + console_utils.print_ignored_charts(reader) diff --git a/chart_review/config.py b/chart_review/config.py index 0e3d2c2..e9c4aa2 100644 --- a/chart_review/config.py +++ b/chart_review/config.py @@ -4,6 +4,7 @@ import sys from typing import Iterable, Optional, Union +import rich.console import yaml from chart_review import types @@ -13,12 +14,25 @@ class ProjectConfig: _NUMBER_REGEX = re.compile(r"\d+") _RANGE_REGEX = re.compile(r"\d+-\d+") - def __init__(self, project_dir: str, config_path: Optional[str] = None): + def __init__(self, project_dir: Optional[str] = None, config_path: Optional[str] = None): """ :param project_dir: str like /opt/labelstudio/study_name """ - self.project_dir = project_dir - self._data = self._load_config(config_path) + self.project_dir = project_dir or "." + try: + self._data = self._load_config(config_path) + except FileNotFoundError as exc: + # Be very helpful - this is likely the user's first experience with this project. + stderr = rich.console.Console(stderr=True) + stderr.print(exc, style="bold red", highlight=False) + stderr.print() + stderr.print("This does not appear to be a chart-review project folder.") + stderr.print( + "See https://docs.smarthealthit.org/cumulus/chart-review/ to set up your project." + ) + stderr.print() + stderr.print("Or pass --help for usage info.") + sys.exit(2) # ** Annotators ** # Internally, we're often dealing with numeric ID as the primary annotator identifier, diff --git a/chart_review/console_utils.py b/chart_review/console_utils.py index c7e47fd..320d179 100644 --- a/chart_review/console_utils.py +++ b/chart_review/console_utils.py @@ -1,6 +1,8 @@ """Helper methods for printing to the console.""" -from chart_review import types +import rich + +from chart_review import cohort, types def pretty_note_range(notes: types.NoteSet) -> str: @@ -34,3 +36,18 @@ def end_range() -> None: end_range() return ", ".join(ranges) + + +def print_ignored_charts(reader: cohort.CohortReader): + """Prints a line about ignored charts, suitable for underlying a table""" + if not reader.ignored_notes: + return + + ignored_count = len(reader.ignored_notes) + chart_word = "chart" if ignored_count == 1 else "charts" + pretty_ranges = pretty_note_range(reader.ignored_notes) + rich.get_console().print( + f" Ignoring {ignored_count} {chart_word} ({pretty_ranges})", + highlight=False, + style="italic", + ) diff --git a/docs/accuracy.md b/docs/accuracy.md index 440a07b..7aa47bb 100644 --- a/docs/accuracy.md +++ b/docs/accuracy.md @@ -31,16 +31,6 @@ F1 Sens Spec PPV NPV Kappa TP FN TN FP Label ## Options -### `--config=PATH` - -Use this to point to a secondary (non-default) config file. -Useful if you have multiple label setups (e.g. one grouped into a binary label and one not). - -### `--project-dir=DIR` - -Use this to run `chart-review` outside of your project dir. -Config files, external annotations, etc will be looked for in that directory. - ### `--save` Use this to write a JSON and CSV file to the project directory, diff --git a/docs/ids.md b/docs/ids.md new file mode 100644 index 0000000..7a41ea4 --- /dev/null +++ b/docs/ids.md @@ -0,0 +1,35 @@ +--- +title: IDs Command +parent: Chart Review +nav_order: 6 +# audience: lightly technical folks +# type: how-to +--- + +# The IDs Command + +The `ids` command prints a mapping of chart & FHIR IDs to the console, in CSV format. +Redirect the output to a file to save it to disk. + +This is helpful when you are juggling anonymous IDs from Cumulus's Athena database +as well as original IDs from your EHR, on top of the Label Studio chart IDs. + +{: .note } +FHIR IDs could be considered PHI depending on how the EHR generates them. +Exercise appropriate caution when sharing the output of this command. + +## Examples + +```shell +$ chart-review ids > ids.csv +``` + +```shell +$ chart-review ids +chart_id,original_fhir_id,anonymized_fhir_id +1,Encounter/E123,Encounter/170a37476339af6f31ed7b1b0bbb4f11d5daacd79bf9f490d49f93742acfd2bd +1,DocumentReference/D123,DocumentReference/331ab320fe6264535a408aa1a7ecf1465fc0631580af5f3010bfecf71c99d141 +2,Encounter/E898,Encounter/8b0bd207147989492801b7c14eebc015564ab73a07bdabdf9aefc3425eeba982 +2,DocumentReference/D898,DocumentReference/b5e329b752067eca1584f9cd132f40c637d8a9ebd6f2a599794f9436fb83c2eb +2,DocumentReference/D899,DocumentReference/605338cd18c2617864db23fd5fd956f3e806af2021ffa6d11c34cac998eb3b6d +``` diff --git a/docs/info.md b/docs/info.md deleted file mode 100644 index 599b424..0000000 --- a/docs/info.md +++ /dev/null @@ -1,95 +0,0 @@ ---- -title: Info Command -parent: Chart Review -nav_order: 6 -# audience: lightly technical folks -# type: how-to ---- - -# The Info Command - -The `info` command will print information about your current project. - -This is helpful to examine the computed list of chart ID ranges or labels. - -## Example - -```shell -$ chart-review info -╭──────────┬─────────────┬──────────╮ -│Annotator │ Chart Count │ Chart IDs│ -├──────────┼─────────────┼──────────┤ -│jane │ 3 │ 1, 3–4 │ -│jill │ 4 │ 1–4 │ -│john │ 3 │ 1–2, 4 │ -╰──────────┴─────────────┴──────────╯ -``` - -## Options - -### `--ids` - -Prints a mapping of chart & FHIR IDs to the console, in CSV format. -Redirect the output to a file to save it to disk. - -This is helpful when you are juggling anonymous IDs from Cumulus's Athena database -as well as original IDs from your EHR, on top of the Label Studio chart IDs. - -{: .note } -FHIR IDs could be considered PHI depending on how the EHR generates them. -Exercise appropriate caution when sharing the output of this command. - -#### Examples - -```shell -$ chart-review info --ids > ids.csv -``` - -```shell -$ chart-review info --ids -chart_id,original_fhir_id,anonymized_fhir_id -1,Encounter/E123,Encounter/170a37476339af6f31ed7b1b0bbb4f11d5daacd79bf9f490d49f93742acfd2bd -1,DocumentReference/D123,DocumentReference/331ab320fe6264535a408aa1a7ecf1465fc0631580af5f3010bfecf71c99d141 -2,Encounter/E898,Encounter/8b0bd207147989492801b7c14eebc015564ab73a07bdabdf9aefc3425eeba982 -2,DocumentReference/D898,DocumentReference/b5e329b752067eca1584f9cd132f40c637d8a9ebd6f2a599794f9436fb83c2eb -2,DocumentReference/D899,DocumentReference/605338cd18c2617864db23fd5fd956f3e806af2021ffa6d11c34cac998eb3b6d -``` - -### `--labels` - -Prints some statistics on the project labels and how often each annotator used each label. - -#### Example - -```shell -$ chart-review info --labels -╭───────────┬─────────────┬──────────╮ -│ Annotator │ Chart Count │ Label │ -├───────────┼─────────────┼──────────┤ -│ Any │ 2 │ Cough │ -│ Any │ 3 │ Fatigue │ -│ Any │ 3 │ Headache │ -├───────────┼─────────────┼──────────┤ -│ jane │ 1 │ Cough │ -│ jane │ 2 │ Fatigue │ -│ jane │ 2 │ Headache │ -├───────────┼─────────────┼──────────┤ -│ jill │ 2 │ Cough │ -│ jill │ 3 │ Fatigue │ -│ jill │ 0 │ Headache │ -├───────────┼─────────────┼──────────┤ -│ john │ 1 │ Cough │ -│ john │ 2 │ Fatigue │ -│ john │ 2 │ Headache │ -╰───────────┴─────────────┴──────────╯ -``` - -### `--config=PATH` - -Use this to point to a secondary (non-default) config file. -Useful if you have multiple label setups (e.g. one grouped into a binary label and one not). - -### `--project-dir=DIR` - -Use this to run `chart-review` outside of your project dir. -Config files, external annotations, etc will be looked for in that directory. diff --git a/docs/labels.md b/docs/labels.md new file mode 100644 index 0000000..a547794 --- /dev/null +++ b/docs/labels.md @@ -0,0 +1,37 @@ +--- +title: Labels Command +parent: Chart Review +nav_order: 7 +# audience: lightly technical folks +# type: how-to +--- + +# The Labels Command + +The `labels` prints some statistics on the project labels +and how often each annotator used each label. + +## Example + +```shell +$ chart-review labels +╭───────────┬─────────────┬──────────╮ +│ Annotator │ Chart Count │ Label │ +├───────────┼─────────────┼──────────┤ +│ Any │ 2 │ Cough │ +│ Any │ 3 │ Fatigue │ +│ Any │ 3 │ Headache │ +├───────────┼─────────────┼──────────┤ +│ jane │ 1 │ Cough │ +│ jane │ 2 │ Fatigue │ +│ jane │ 2 │ Headache │ +├───────────┼─────────────┼──────────┤ +│ jill │ 2 │ Cough │ +│ jill │ 3 │ Fatigue │ +│ jill │ 0 │ Headache │ +├───────────┼─────────────┼──────────┤ +│ john │ 1 │ Cough │ +│ john │ 2 │ Fatigue │ +│ john │ 2 │ Headache │ +╰───────────┴─────────────┴──────────╯ +``` diff --git a/docs/setup.md b/docs/setup.md index b3a2136..b80088f 100644 --- a/docs/setup.md +++ b/docs/setup.md @@ -21,7 +21,6 @@ nav_order: 1 ## Run Chart Review -The only current command is `accuracy`, -which will print agreement statistics between two annotators. +Run `chart-review` in your project directory for some basic chart info. -Read more about it in its own [accuracy command documentation](accuracy.md). \ No newline at end of file +Or run `chart-review --help` for a list of commands. diff --git a/tests/test_cli.py b/tests/test_cli.py index f8fab0a..89aa2fb 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -20,3 +20,37 @@ def test_version(self): version = chart_review.__version__ self.assertEqual(f"chart-review {version}\n", stdout.getvalue()) + + def test_default_info(self): + stdout = self.run_cli("--project-dir", f"{self.DATA_DIR}/cold") + + self.assertEqual( + """╭───────────┬─────────────┬───────────╮ +│ Annotator │ Chart Count │ Chart IDs │ +├───────────┼─────────────┼───────────┤ +│ jane │ 3 │ 1, 3–4 │ +│ jill │ 4 │ 1–4 │ +│ john │ 3 │ 1–2, 4 │ +╰───────────┴─────────────┴───────────╯ + +Pass --help to see more options. +""", + stdout, + ) + + def test_default_info_ignored(self): + stdout = self.run_cli("--project-dir", f"{self.DATA_DIR}/ignore") + + self.assertEqual( + """╭───────────┬─────────────┬───────────╮ +│ Annotator │ Chart Count │ Chart IDs │ +├───────────┼─────────────┼───────────┤ +│ adam │ 2 │ 1–2 │ +│ allison │ 2 │ 1–2 │ +╰───────────┴─────────────┴───────────╯ + Ignoring 3 charts (3–5) + +Pass --help to see more options. +""", + stdout, + ) diff --git a/tests/test_ids.py b/tests/test_ids.py new file mode 100644 index 0000000..5ad1e8b --- /dev/null +++ b/tests/test_ids.py @@ -0,0 +1,115 @@ +"""Tests for commands/ids.py""" + +import tempfile + +from chart_review import common +from tests import base + + +class TestIDs(base.TestCase): + """Test case for the top-level ids code""" + + def test_ids_quoted(self): + """Verify that we quote the output when needed""" + with tempfile.TemporaryDirectory() as tmpdir: + common.write_json(f"{tmpdir}/config.json", {}) + common.write_json( + f"{tmpdir}/labelstudio-export.json", + [ + { + "id": 1, + "data": { + "enc_id": "Orig,\\ 'Enc", + "anon_id": 'Anon "Enc', + }, + }, + ], + ) + stdout = self.run_cli("ids", "--project-dir", tmpdir) + + lines = stdout.splitlines() + self.assertEqual(2, len(lines)) + self.assertEqual('1,"Encounter/Orig,\\ \'Enc","Encounter/Anon ""Enc"', lines[1]) + + def test_ids_sources(self): + """Verify that we pull IDs from all the places""" + with tempfile.TemporaryDirectory() as tmpdir: + common.write_json( + f"{tmpdir}/config.json", + { + "annotators": {"carl": 1}, + }, + ) + common.write_json( + f"{tmpdir}/labelstudio-export.json", + [ + { + "id": 1, + "annotations": [ + { + "completed_by": 1, + "result": [ + { + "value": { + "labels": [ + "My Label", + ], + }, + }, + ], + }, + ], + # only orig encounter, no docref mappings + "data": { + "enc_id": "Orig", + }, + }, + { + "id": 2, + # only anon encounter, a single docref mapping + "data": { + "anon_id": "Anon", + "docref_mappings": { + "Orig": "Anon", + }, + }, + }, + { + "id": 3, + # no encounter info, multiple docref mappings + "data": { + "docref_mappings": { + "Orig1": "Anon1", + "Orig2": "Anon2", + }, + }, + }, + { + "id": 4, + # full encounter info, no docref mappings + "data": { + "enc_id": "a", + "anon_id": "b", + }, + }, + { + "id": 5, + # no metadata at all + }, + ], + ) + stdout = self.run_cli("ids", "--project-dir", tmpdir) + + self.assertEqual( + [ + "chart_id,original_fhir_id,anonymized_fhir_id", + "1,Encounter/Orig,", + "2,,Encounter/Anon", + "2,DocumentReference/Orig,DocumentReference/Anon", + "3,DocumentReference/Orig1,DocumentReference/Anon1", + "3,DocumentReference/Orig2,DocumentReference/Anon2", + "4,Encounter/a,Encounter/b", + "5,,", + ], + stdout.splitlines(), + ) diff --git a/tests/test_info.py b/tests/test_info.py deleted file mode 100644 index c2d4143..0000000 --- a/tests/test_info.py +++ /dev/null @@ -1,224 +0,0 @@ -"""Tests for commands/info.py""" - -import tempfile - -from chart_review import common -from tests import base - - -class TestInfo(base.TestCase): - """Test case for the top-level info code""" - - def test_info(self): - stdout = self.run_cli("info", "--project-dir", f"{self.DATA_DIR}/cold") - - self.assertEqual( - """╭───────────┬─────────────┬───────────╮ -│ Annotator │ Chart Count │ Chart IDs │ -├───────────┼─────────────┼───────────┤ -│ jane │ 3 │ 1, 3–4 │ -│ jill │ 4 │ 1–4 │ -│ john │ 3 │ 1–2, 4 │ -╰───────────┴─────────────┴───────────╯ -""", - stdout, - ) - - def test_info_ignored(self): - stdout = self.run_cli("info", "--project-dir", f"{self.DATA_DIR}/ignore") - - self.assertEqual( - """╭───────────┬─────────────┬───────────╮ -│ Annotator │ Chart Count │ Chart IDs │ -├───────────┼─────────────┼───────────┤ -│ adam │ 2 │ 1–2 │ -│ allison │ 2 │ 1–2 │ -╰───────────┴─────────────┴───────────╯ - Ignoring 3 charts (3–5) -""", - stdout, - ) - - def test_ids_quoted(self): - """Verify that we quote the output when needed""" - with tempfile.TemporaryDirectory() as tmpdir: - common.write_json(f"{tmpdir}/config.json", {}) - common.write_json( - f"{tmpdir}/labelstudio-export.json", - [ - { - "id": 1, - "data": { - "enc_id": "Orig,\\ 'Enc", - "anon_id": 'Anon "Enc', - }, - }, - ], - ) - stdout = self.run_cli("info", "--ids", "--project-dir", tmpdir) - - lines = stdout.splitlines() - self.assertEqual(2, len(lines)) - self.assertEqual('1,"Encounter/Orig,\\ \'Enc","Encounter/Anon ""Enc"', lines[1]) - - def test_ids_sources(self): - """Verify that we pull IDs from all the places""" - with tempfile.TemporaryDirectory() as tmpdir: - common.write_json( - f"{tmpdir}/config.json", - { - "annotators": {"carl": 1}, - }, - ) - common.write_json( - f"{tmpdir}/labelstudio-export.json", - [ - { - "id": 1, - "annotations": [ - { - "completed_by": 1, - "result": [ - { - "value": { - "labels": [ - "My Label", - ], - }, - }, - ], - }, - ], - # only orig encounter, no docref mappings - "data": { - "enc_id": "Orig", - }, - }, - { - "id": 2, - # only anon encounter, a single docref mapping - "data": { - "anon_id": "Anon", - "docref_mappings": { - "Orig": "Anon", - }, - }, - }, - { - "id": 3, - # no encounter info, multiple docref mappings - "data": { - "docref_mappings": { - "Orig1": "Anon1", - "Orig2": "Anon2", - }, - }, - }, - { - "id": 4, - # full encounter info, no docref mappings - "data": { - "enc_id": "a", - "anon_id": "b", - }, - }, - { - "id": 5, - # no metadata at all - }, - ], - ) - stdout = self.run_cli("info", "--ids", "--project-dir", tmpdir) - - self.assertEqual( - [ - "chart_id,original_fhir_id,anonymized_fhir_id", - "1,Encounter/Orig,", - "2,,Encounter/Anon", - "2,DocumentReference/Orig,DocumentReference/Anon", - "3,DocumentReference/Orig1,DocumentReference/Anon1", - "3,DocumentReference/Orig2,DocumentReference/Anon2", - "4,Encounter/a,Encounter/b", - "5,,", - ], - stdout.splitlines(), - ) - - def test_labels(self): - stdout = self.run_cli("info", "--project-dir", f"{self.DATA_DIR}/cold", "--labels") - - self.assertEqual( - """╭───────────┬─────────────┬──────────╮ -│ Annotator │ Chart Count │ Label │ -├───────────┼─────────────┼──────────┤ -│ Any │ 2 │ Cough │ -│ Any │ 3 │ Fatigue │ -│ Any │ 3 │ Headache │ -├───────────┼─────────────┼──────────┤ -│ jane │ 1 │ Cough │ -│ jane │ 2 │ Fatigue │ -│ jane │ 2 │ Headache │ -├───────────┼─────────────┼──────────┤ -│ jill │ 2 │ Cough │ -│ jill │ 3 │ Fatigue │ -│ jill │ 0 │ Headache │ -├───────────┼─────────────┼──────────┤ -│ john │ 1 │ Cough │ -│ john │ 2 │ Fatigue │ -│ john │ 2 │ Headache │ -╰───────────┴─────────────┴──────────╯ -""", - stdout, - ) - - def test_labels_grouped(self): - """Verify that we only show final grouped labels, not intermediate ones""" - with tempfile.TemporaryDirectory() as tmpdir: - common.write_json( - f"{tmpdir}/config.json", - { - "labels": ["fever", "rash", "recent"], - "grouped-labels": {"symptoms": ["fever", "rash"]}, - }, - ) - common.write_json( - f"{tmpdir}/labelstudio-export.json", - [], - ) - stdout = self.run_cli("info", "--labels", "--project-dir", tmpdir) - - self.assertEqual( - """╭───────────┬─────────────┬──────────╮ -│ Annotator │ Chart Count │ Label │ -├───────────┼─────────────┼──────────┤ -│ Any │ 0 │ recent │ -│ Any │ 0 │ symptoms │ -╰───────────┴─────────────┴──────────╯ -""", - stdout, - ) - - def test_labels_ignored(self): - """Verify that we show info on ignored notes""" - with tempfile.TemporaryDirectory() as tmpdir: - common.write_json( - f"{tmpdir}/config.json", - { - "ignore": [3, 4, 6], - }, - ) - common.write_json( - f"{tmpdir}/labelstudio-export.json", - [ - {"id": 3}, - {"id": 4}, - {"id": 5}, - {"id": 6}, - ], - ) - stdout = self.run_cli("info", "--labels", "--project-dir", tmpdir) - - self.assertEqual( - "Ignoring 3 charts (3–4, 6)", - stdout.splitlines()[-1].strip(), - ) diff --git a/tests/test_labels.py b/tests/test_labels.py new file mode 100644 index 0000000..d957feb --- /dev/null +++ b/tests/test_labels.py @@ -0,0 +1,89 @@ +"""Tests for commands/labels.py""" + +import tempfile + +from chart_review import common +from tests import base + + +class TestLabels(base.TestCase): + """Test case for the top-level labels code""" + + def test_labels(self): + stdout = self.run_cli("--project-dir", f"{self.DATA_DIR}/cold", "labels") + + self.assertEqual( + """╭───────────┬─────────────┬──────────╮ +│ Annotator │ Chart Count │ Label │ +├───────────┼─────────────┼──────────┤ +│ Any │ 2 │ Cough │ +│ Any │ 3 │ Fatigue │ +│ Any │ 3 │ Headache │ +├───────────┼─────────────┼──────────┤ +│ jane │ 1 │ Cough │ +│ jane │ 2 │ Fatigue │ +│ jane │ 2 │ Headache │ +├───────────┼─────────────┼──────────┤ +│ jill │ 2 │ Cough │ +│ jill │ 3 │ Fatigue │ +│ jill │ 0 │ Headache │ +├───────────┼─────────────┼──────────┤ +│ john │ 1 │ Cough │ +│ john │ 2 │ Fatigue │ +│ john │ 2 │ Headache │ +╰───────────┴─────────────┴──────────╯ +""", + stdout, + ) + + def test_labels_grouped(self): + """Verify that we only show final grouped labels, not intermediate ones""" + with tempfile.TemporaryDirectory() as tmpdir: + common.write_json( + f"{tmpdir}/config.json", + { + "labels": ["fever", "rash", "recent"], + "grouped-labels": {"symptoms": ["fever", "rash"]}, + }, + ) + common.write_json( + f"{tmpdir}/labelstudio-export.json", + [], + ) + stdout = self.run_cli("labels", "--project-dir", tmpdir) + + self.assertEqual( + """╭───────────┬─────────────┬──────────╮ +│ Annotator │ Chart Count │ Label │ +├───────────┼─────────────┼──────────┤ +│ Any │ 0 │ recent │ +│ Any │ 0 │ symptoms │ +╰───────────┴─────────────┴──────────╯ +""", + stdout, + ) + + def test_labels_ignored(self): + """Verify that we show info on ignored notes""" + with tempfile.TemporaryDirectory() as tmpdir: + common.write_json( + f"{tmpdir}/config.json", + { + "ignore": [3, 4, 6], + }, + ) + common.write_json( + f"{tmpdir}/labelstudio-export.json", + [ + {"id": 3}, + {"id": 4}, + {"id": 5}, + {"id": 6}, + ], + ) + stdout = self.run_cli("labels", "--project-dir", tmpdir) + + self.assertEqual( + "Ignoring 3 charts (3–4, 6)", + stdout.splitlines()[-1].strip(), + )