feat!: reorganize CLI commands

"info" is no more - it's been split up and moved. - Default "info" output has been moved to the default "chart-review" mode without any subcommand. - "info --ids" has been moved to a toplevel "ids" subcommand. - "info --labels" has been moved to a toplevel "labels" subcommand.
smart-on-fhir · Jun 24, 2024 · 064d2a4 · 064d2a4
1 parent f049df3
commit 064d2a4
Show file tree

Hide file tree

Showing 18 changed files with 518 additions and 506 deletions.
diff --git a/chart_review/__init__.py b/chart_review/__init__.py
@@ -1,3 +1,3 @@
 """Chart Review public entry point"""
 
-__version__ = "1.3.0"
+__version__ = "2.0.0"
diff --git a/chart_review/cli.py b/chart_review/cli.py
@@ -3,23 +3,18 @@
 import argparse
 import sys
 
-import chart_review
-from chart_review.commands import accuracy, info
+from chart_review.commands import accuracy, default, ids, labels
 
 
 def define_parser() -> argparse.ArgumentParser:
     """Fills out an argument parser with all the CLI options."""
     parser = argparse.ArgumentParser()
+    default.make_subparser(parser)
 
-    parser.add_argument(
-        "--version",
-        action="version",
-        version=f"chart-review {chart_review.__version__}",
-    )
-
-    subparsers = parser.add_subparsers(required=True)
-    accuracy.make_subparser(subparsers.add_parser("accuracy"))
-    info.make_subparser(subparsers.add_parser("info"))
+    subparsers = parser.add_subparsers()
+    accuracy.make_subparser(subparsers.add_parser("accuracy", help="calculate F1 and Kappa scores"))
+    ids.make_subparser(subparsers.add_parser("ids", help="map Label Studio IDs to FHIR IDs"))
+    labels.make_subparser(subparsers.add_parser("labels", help="show label usage by annotator"))
 
     return parser
 

diff --git a/chart_review/cli_utils.py b/chart_review/cli_utils.py
@@ -2,18 +2,30 @@
 
 import argparse
 
+from chart_review import cohort, config
 
-def add_project_args(parser: argparse.ArgumentParser) -> None:
+
+def add_project_args(parser: argparse.ArgumentParser, is_global: bool = False) -> None:
     group = parser.add_argument_group("configuration")
     group.add_argument(
         "--project-dir",
-        default=".",
+        "-p",
+        default=None if is_global else argparse.SUPPRESS,
         metavar="DIR",
         help=(
-            "Directory holding project files, "
+            "directory holding project files, "
             "like labelstudio-export.json (default: current dir)"
         ),
     )
     group.add_argument(
-        "--config", "-c", metavar="PATH", help="Config file (default: [project-dir]/config.yaml)"
+        "--config",
+        "-c",
+        default=None if is_global else argparse.SUPPRESS,
+        metavar="PATH",
+        help="config file (default: [project-dir]/config.yaml)",
     )
+
+
+def get_cohort_reader(args: argparse.Namespace) -> cohort.CohortReader:
+    proj_config = config.ProjectConfig(project_dir=args.project_dir, config_path=args.config)
+    return cohort.CohortReader(proj_config)
diff --git a/chart_review/commands/default.py b/chart_review/commands/default.py
@@ -0,0 +1,47 @@
+"""Methods for showing config & calculated setup info."""
+
+import argparse
+
+import rich
+import rich.box
+import rich.table
+
+import chart_review
+from chart_review import cli_utils, console_utils
+
+
+def print_info(args: argparse.Namespace) -> None:
+    """Show project information on the console."""
+    reader = cli_utils.get_cohort_reader(args)
+    console = rich.get_console()
+
+    # Charts
+    chart_table = rich.table.Table(
+        "Annotator",
+        "Chart Count",
+        "Chart IDs",
+        box=rich.box.ROUNDED,
+    )
+    for annotator in sorted(reader.note_range):
+        notes = reader.note_range[annotator]
+        chart_table.add_row(
+            annotator,
+            str(len(notes)),
+            console_utils.pretty_note_range(notes),
+        )
+
+    console.print(chart_table)
+    console_utils.print_ignored_charts(reader)
+
+    console.print()
+    console.print("Pass --help to see more options.")
+
+
+def make_subparser(parser: argparse.ArgumentParser) -> None:
+    cli_utils.add_project_args(parser, is_global=True)
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=f"chart-review {chart_review.__version__}",
+    )
+    parser.set_defaults(func=print_info)
diff --git a/chart_review/commands/ids.py b/chart_review/commands/ids.py
@@ -0,0 +1,50 @@
+import argparse
+import csv
+import sys
+
+from chart_review import cli_utils
+
+
+def make_subparser(parser: argparse.ArgumentParser) -> None:
+    cli_utils.add_project_args(parser)
+    parser.set_defaults(func=print_ids)
+
+
+def print_ids(args: argparse.Namespace) -> None:
+    """
+    Prints a mapping of all project IDs.
+
+    Currently, this writes a CSV file to stdout. In the future, this could get fancier.
+    At the time of writing, it wasn't clear how to present the information in a way that
+    sensible to a casual console user - so I went with the more technical-oriented CSV file.
+    """
+    reader = cli_utils.get_cohort_reader(args)
+
+    writer = csv.writer(sys.stdout)
+    writer.writerow(["chart_id", "original_fhir_id", "anonymized_fhir_id"])
+
+    # IDS
+    for chart in reader.ls_export:
+        chart_id = str(chart["id"])
+        chart_data = chart.get("data", {})
+        printed = False
+
+        # Grab encounters first
+        orig_id = f"Encounter/{chart_data['enc_id']}" if "enc_id" in chart_data else ""
+        anon_id = f"Encounter/{chart_data['anon_id']}" if "anon_id" in chart_data else ""
+        if orig_id or anon_id:
+            writer.writerow([chart_id, orig_id, anon_id])
+            printed = True
+
+        # Now each DocRef ID
+        for orig_id, anon_id in chart_data.get("docref_mappings", {}).items():
+            writer.writerow(
+                [chart_id, f"DocumentReference/{orig_id}", f"DocumentReference/{anon_id}"]
+            )
+            printed = True
+
+        if not printed:
+            # Guarantee that every Chart ID shows up at least once - so it's clearer that the
+            # chart ID is included in the Label Studio export but that it does not have any
+            # IDs mapped to it.
+            writer.writerow([chart_id, None, None])
diff --git a/chart_review/commands/info.py b/chart_review/commands/info.py
diff --git a/chart_review/commands/labels.py b/chart_review/commands/labels.py
@@ -0,0 +1,51 @@
+import argparse
+
+import rich
+import rich.box
+import rich.table
+import rich.text
+
+from chart_review import cli_utils, console_utils, types
+
+
+def make_subparser(parser: argparse.ArgumentParser) -> None:
+    cli_utils.add_project_args(parser)
+    parser.set_defaults(func=print_labels)
+
+
+def print_labels(args: argparse.Namespace) -> None:
+    """Show label information on the console."""
+    reader = cli_utils.get_cohort_reader(args)
+
+    # Calculate all label counts for each annotator
+    label_names = sorted(reader.class_labels, key=str.casefold)
+    label_notes: dict[str, dict[str, types.NoteSet]] = {}  # annotator -> label -> note IDs
+    any_annotator_note_sets: dict[str, types.NoteSet] = {}
+    for annotator, mentions in reader.annotations.mentions.items():
+        label_notes[annotator] = {}
+        for name in label_names:
+            note_ids = {note_id for note_id, labels in mentions.items() if name in labels}
+            label_notes[annotator][name] = note_ids
+            any_annotator_note_sets.setdefault(name, types.NoteSet()).update(note_ids)
+
+    label_table = rich.table.Table(
+        "Annotator",
+        "Chart Count",
+        "Label",
+        box=rich.box.ROUNDED,
+    )
+
+    # First add summary entries, for counts across the union of all annotators
+    for name in label_names:
+        count = str(len(any_annotator_note_sets.get(name, {})))
+        label_table.add_row(rich.text.Text("Any", style="italic"), count, name)
+
+    # Now do each annotator as their own little boxed section
+    for annotator in sorted(label_notes.keys(), key=str.casefold):
+        label_table.add_section()
+        for name, note_set in label_notes[annotator].items():
+            count = str(len(note_set))
+            label_table.add_row(annotator, count, name)
+
+    rich.get_console().print(label_table)
+    console_utils.print_ignored_charts(reader)