From 3206698ad3ecc89c967f60d26818c7e50e561684 Mon Sep 17 00:00:00 2001
From: Michael Terry <michael.terry@childrens.harvard.edu>
Date: Mon, 24 Jun 2024 14:03:05 -0400
Subject: [PATCH] feat: add --csv flag to all existing commands

- Switch default `ids` output to be a table
- Hide the `accuracy --save` option now that there is a more obvious
  alternative in `--csv`. It's not clear that the JSON side of `--save`
  has any users, and I'd rather just add a `--json` output option down
  the road if we want that back. Plus, `--save` had the surprising
  behavior of "OK now there's a file on your disk" without the option
  of providing a filename or redirecting output.
---
 chart_review/cli_utils.py         |  40 +++++++++++
 chart_review/commands/accuracy.py | 112 +++++++++++++++---------------
 chart_review/commands/ids.py      |  22 +++---
 chart_review/commands/labels.py   |  15 ++--
 docs/accuracy.md                  |  47 +++++++++----
 docs/ids.md                       |  64 ++++++++++++++++-
 docs/index.md                     |  15 ++++
 docs/labels.md                    |  28 ++++++++
 tests/base.py                     |   3 +-
 tests/test_accuracy.py            |  85 +++++++++++++++++------
 tests/test_cli.py                 |  44 +++++++-----
 tests/test_ids.py                 |  69 +++++++++---------
 tests/test_labels.py              |  29 +++++++-
 13 files changed, 406 insertions(+), 167 deletions(-)

diff --git a/chart_review/cli_utils.py b/chart_review/cli_utils.py
index ff63584..dac3f6d 100644
--- a/chart_review/cli_utils.py
+++ b/chart_review/cli_utils.py
@@ -1,6 +1,11 @@
 """Helper methods for CLI parsing."""
 
 import argparse
+import csv
+import sys
+
+import rich.box
+import rich.table
 
 from chart_review import cohort, config
 
@@ -26,6 +31,41 @@ def add_project_args(parser: argparse.ArgumentParser, is_global: bool = False) -
     )
 
 
+def add_output_args(parser: argparse.ArgumentParser):
+    """Returns an exclusive option group if you want to add custom output arguments"""
+    group = parser.add_argument_group("output")
+    exclusive = group.add_mutually_exclusive_group()
+    exclusive.add_argument("--csv", action="store_true", help="print results in CSV format")
+    return exclusive
+
+
 def get_cohort_reader(args: argparse.Namespace) -> cohort.CohortReader:
     proj_config = config.ProjectConfig(project_dir=args.project_dir, config_path=args.config)
     return cohort.CohortReader(proj_config)
+
+
+def create_table(*headers) -> rich.table.Table:
+    """
+    Creates a table with standard chart-review formatting.
+
+    You can use your own table formatting if you have particular needs,
+    but this should be your default table creator.
+    """
+    table = rich.table.Table(box=rich.box.ROUNDED)
+    for header in headers:
+        table.add_column(header, overflow="fold")
+    return table
+
+
+def print_table_as_csv(table: rich.table.Table) -> None:
+    """Prints a Rich table as a CSV to stdout"""
+    writer = csv.writer(sys.stdout)
+
+    # First the headers
+    headers = [str(col.header).lower().replace(" ", "_") for col in table.columns]
+    writer.writerow(headers)
+
+    # And then each row
+    cells_by_row = zip(*[col.cells for col in table.columns])
+    for row in cells_by_row:
+        writer.writerow(row)
diff --git a/chart_review/commands/accuracy.py b/chart_review/commands/accuracy.py
index 94d80d3..477a547 100644
--- a/chart_review/commands/accuracy.py
+++ b/chart_review/commands/accuracy.py
@@ -8,27 +8,29 @@
 import rich.table
 import rich.text
 
-from chart_review import agree, cli_utils, cohort, common, config, console_utils
+from chart_review import agree, cli_utils, common, console_utils
 
 
-def accuracy(
-    reader: cohort.CohortReader,
-    truth: str,
-    annotator: str,
-    save: bool = False,
-    verbose: bool = False,
-) -> None:
+def make_subparser(parser: argparse.ArgumentParser) -> None:
+    cli_utils.add_project_args(parser)
+    output_group = cli_utils.add_output_args(parser)
+    output_group.add_argument("--save", action="store_true", help=argparse.SUPPRESS)
+    parser.add_argument("--verbose", action="store_true", help="show each chart’s labels")
+    parser.add_argument("truth_annotator")
+    parser.add_argument("annotator")
+    parser.set_defaults(func=print_accuracy)
+
+
+def print_accuracy(args: argparse.Namespace) -> None:
     """
     High-level accuracy calculation between two annotators.
 
     The results will be written to the project directory.
-
-    :param reader: the cohort configuration
-    :param truth: the truth annotator
-    :param annotator: the other annotator to compare against truth
-    :param save: whether to write the results to disk vs just printing them
-    :param verbose: whether to print per-chart/per-label classifications
     """
+    reader = cli_utils.get_cohort_reader(args)
+    truth = args.truth_annotator
+    annotator = args.annotator
+
     if truth not in reader.note_range:
         print(f"Unrecognized annotator '{truth}'")
         return
@@ -48,64 +50,60 @@ def accuracy(
         matrices[label] = reader.confusion_matrix(truth, annotator, note_range, label)
 
     # Now score them
-    scores = agree.score_matrix(matrices[None])
+    scores = {None: agree.score_matrix(matrices[None])}
     for label in labels:
         scores[label] = agree.score_matrix(matrices[label])
 
     console = rich.get_console()
 
-    note_count = len(note_range)
-    chart_word = "chart" if note_count == 1 else "charts"
-    pretty_ranges = f" ({console_utils.pretty_note_range(note_range)})" if note_count > 0 else ""
-    console.print(f"Comparing {note_count} {chart_word}{pretty_ranges}")
-    console.print(f"Truth: {truth}")
-    console.print(f"Annotator: {annotator}")
-
-    console.print()
-    if save:
-        # Write the results out to disk
-        output_stem = os.path.join(reader.project_dir, f"accuracy-{truth}-{annotator}")
-        common.write_json(f"{output_stem}.json", scores)
-        console.print(f"Wrote {output_stem}.json")
-        common.write_text(f"{output_stem}.csv", agree.csv_table(scores, reader.class_labels))
-        console.print(f"Wrote {output_stem}.csv")
-    else:
-        # Print the results out to the console
-        rich_table = rich.table.Table(*agree.csv_header(), "Label", box=None, pad_edge=False)
-        rich_table.add_row(*agree.csv_row_score(scores), "*")
-        for label in labels:
-            rich_table.add_row(*agree.csv_row_score(scores[label]), label)
-        console.print(rich_table)
-
-    if verbose:
+    if args.verbose:
         # Print a table of each chart/label combo - useful for reviewing where an annotator
         # went wrong.
-        verbose_table = rich.table.Table(
-            "Chart ID", "Label", "Classification", box=rich.box.ROUNDED
-        )
+        table = cli_utils.create_table("Chart ID", "Label", "Classification")
         for note_id in sorted(note_range):
-            verbose_table.add_section()
+            table.add_section()
             for label in labels:
                 for classification in ["TN", "TP", "FN", "FP"]:
                     if {note_id: label} in matrices[label][classification]:
                         style = "bold" if classification[0] == "F" else None  # highlight errors
                         class_text = rich.text.Text(classification, style=style)
-                        verbose_table.add_row(str(note_id), label, class_text)
+                        table.add_row(str(note_id), label, class_text)
                         break
-        console.print()
-        console.print(verbose_table)
+    else:
+        # Normal F1/Kappa scores
+        table = rich.table.Table(*agree.csv_header(), "Label", box=None, pad_edge=False)
+        table.add_row(*agree.csv_row_score(scores[None]), "*")
+        for label in labels:
+            table.add_row(*agree.csv_row_score(scores[label]), label)
 
+    if args.csv:
+        cli_utils.print_table_as_csv(table)
+        return
 
-def make_subparser(parser: argparse.ArgumentParser) -> None:
-    cli_utils.add_project_args(parser)
-    parser.add_argument("--save", action="store_true", help="Write stats to CSV & JSON files")
-    parser.add_argument("--verbose", action="store_true", help="Explain each chart’s labels")
-    parser.add_argument("truth_annotator")
-    parser.add_argument("annotator")
-    parser.set_defaults(func=run_accuracy)
+    # OK we aren't printing a CSV file to stdout, so we can include a bit more explanation
+    # as a little header to the real results.
+    note_count = len(note_range)
+    chart_word = "chart" if note_count == 1 else "charts"
+    pretty_ranges = f" ({console_utils.pretty_note_range(note_range)})" if note_count > 0 else ""
+    console.print(f"Comparing {note_count} {chart_word}{pretty_ranges}")
+    console.print(f"Truth: {truth}")
+    console.print(f"Annotator: {annotator}")
+    console.print()
 
+    if args.save:  # deprecated/hidden since 2.0, but still supported for now
+        output_stem = os.path.join(reader.project_dir, f"accuracy-{truth}-{annotator}")
 
-def run_accuracy(args: argparse.Namespace) -> None:
-    proj_config = config.ProjectConfig(args.project_dir, config_path=args.config)
-    reader = cohort.CohortReader(proj_config)
-    accuracy(reader, args.truth_annotator, args.annotator, save=args.save, verbose=args.verbose)
+        # JSON: Historically, this has been formatted with the global label results intermixed
+        # with the specific label names, so reproduce that historical formatting here.
+        # Note: this could bite us if the user ever has a label like "Kappa", which is why the
+        # above code avoids intermixing, but we'll keep this as-is for now.
+        scores.update(scores[None])
+        del scores[None]
+        common.write_json(f"{output_stem}.json", scores)
+        console.print(f"Wrote {output_stem}.json")
+
+        # CSV: we should really use a .tsv suffix here, but keeping .csv for historical reasons
+        common.write_text(f"{output_stem}.csv", agree.csv_table(scores, reader.class_labels))
+        console.print(f"Wrote {output_stem}.csv")
+    else:
+        console.print(table)
diff --git a/chart_review/commands/ids.py b/chart_review/commands/ids.py
index 780f934..2808e31 100644
--- a/chart_review/commands/ids.py
+++ b/chart_review/commands/ids.py
@@ -1,12 +1,13 @@
 import argparse
-import csv
-import sys
+
+import rich.table
 
 from chart_review import cli_utils
 
 
 def make_subparser(parser: argparse.ArgumentParser) -> None:
     cli_utils.add_project_args(parser)
+    cli_utils.add_output_args(parser)
     parser.set_defaults(func=print_ids)
 
 
@@ -20,10 +21,8 @@ def print_ids(args: argparse.Namespace) -> None:
     """
     reader = cli_utils.get_cohort_reader(args)
 
-    writer = csv.writer(sys.stdout)
-    writer.writerow(["chart_id", "original_fhir_id", "anonymized_fhir_id"])
+    table = cli_utils.create_table("Chart ID", "Original FHIR ID", "Anonymized FHIR ID")
 
-    # IDS
     for chart in reader.ls_export:
         chart_id = str(chart["id"])
         chart_data = chart.get("data", {})
@@ -33,18 +32,21 @@ def print_ids(args: argparse.Namespace) -> None:
         orig_id = f"Encounter/{chart_data['enc_id']}" if "enc_id" in chart_data else ""
         anon_id = f"Encounter/{chart_data['anon_id']}" if "anon_id" in chart_data else ""
         if orig_id or anon_id:
-            writer.writerow([chart_id, orig_id, anon_id])
+            table.add_row(chart_id, orig_id, anon_id)
             printed = True
 
         # Now each DocRef ID
         for orig_id, anon_id in chart_data.get("docref_mappings", {}).items():
-            writer.writerow(
-                [chart_id, f"DocumentReference/{orig_id}", f"DocumentReference/{anon_id}"]
-            )
+            table.add_row(chart_id, f"DocumentReference/{orig_id}", f"DocumentReference/{anon_id}")
             printed = True
 
         if not printed:
             # Guarantee that every Chart ID shows up at least once - so it's clearer that the
             # chart ID is included in the Label Studio export but that it does not have any
             # IDs mapped to it.
-            writer.writerow([chart_id, None, None])
+            table.add_row(chart_id, None, None)
+
+    if args.csv:
+        cli_utils.print_table_as_csv(table)
+    else:
+        rich.get_console().print(table)
diff --git a/chart_review/commands/labels.py b/chart_review/commands/labels.py
index 29ed9f7..82a417d 100644
--- a/chart_review/commands/labels.py
+++ b/chart_review/commands/labels.py
@@ -10,6 +10,7 @@
 
 def make_subparser(parser: argparse.ArgumentParser) -> None:
     cli_utils.add_project_args(parser)
+    cli_utils.add_output_args(parser)
     parser.set_defaults(func=print_labels)
 
 
@@ -28,12 +29,7 @@ def print_labels(args: argparse.Namespace) -> None:
             label_notes[annotator][name] = note_ids
             any_annotator_note_sets.setdefault(name, types.NoteSet()).update(note_ids)
 
-    label_table = rich.table.Table(
-        "Annotator",
-        "Chart Count",
-        "Label",
-        box=rich.box.ROUNDED,
-    )
+    label_table = cli_utils.create_table("Annotator", "Chart Count", "Label")
 
     # First add summary entries, for counts across the union of all annotators
     for name in label_names:
@@ -47,5 +43,8 @@ def print_labels(args: argparse.Namespace) -> None:
             count = str(len(note_set))
             label_table.add_row(annotator, count, name)
 
-    rich.get_console().print(label_table)
-    console_utils.print_ignored_charts(reader)
+    if args.csv:
+        cli_utils.print_table_as_csv(label_table)
+    else:
+        rich.get_console().print(label_table)
+        console_utils.print_ignored_charts(reader)
diff --git a/docs/accuracy.md b/docs/accuracy.md
index 7aa47bb..46aea41 100644
--- a/docs/accuracy.md
+++ b/docs/accuracy.md
@@ -31,13 +31,7 @@ F1     Sens  Spec  PPV  NPV   Kappa  TP  FN  TN  FP  Label
 
 ## Options
 
-### `--save`
-
-Use this to write a JSON and CSV file to the project directory,
-rather than printing to the console.
-Useful for passing results around in a machine-parsable format.
-
-### `--verbose`
+### --verbose
 
 Use this to also print out a table of per-chart/per-label classifications.
 This is helpful for investigating where specifically the two annotators agreed or not.
@@ -50,12 +44,6 @@ Comparing 3 charts (1, 3–4)
 Truth: jill
 Annotator: jane
 
-F1     Sens  Spec  PPV  NPV   Kappa  TP  FN  TN  FP  Label   
-0.667  0.75  0.6   0.6  0.75  0.341  3   1   3   2   *       
-0.667  0.5   1.0   1.0  0.5   0.4    1   1   1   0   Cough   
-1.0    1.0   1.0   1.0  1.0   1.0    2   0   1   0   Fatigue 
-0      0     0     0    0     0      0   0   1   2   Headache
-
 ╭──────────┬──────────┬────────────────╮
 │ Chart ID │ Label    │ Classification │
 ├──────────┼──────────┼────────────────┤
@@ -71,4 +59,35 @@ F1     Sens  Spec  PPV  NPV   Kappa  TP  FN  TN  FP  Label
 │ 4        │ Fatigue  │ TP             │
 │ 4        │ Headache │ FP             │
 ╰──────────┴──────────┴────────────────╯
-```
\ No newline at end of file
+```
+
+### --csv
+
+Print the accuracy chart in a machine-parseable CSV format.
+
+Can be used with both the default or verbose modes.
+
+#### Examples
+
+```shell
+$ chart-review accuracy jill jane --csv
+f1,sens,spec,ppv,npv,kappa,tp,fn,tn,fp,label
+0.667,0.75,0.6,0.6,0.75,0.341,3,1,3,2,*
+0.667,0.5,1.0,1.0,0.5,0.4,1,1,1,0,Cough
+1.0,1.0,1.0,1.0,1.0,1.0,2,0,1,0,Fatigue
+0,0,0,0,0,0,0,0,1,2,Headache
+```
+
+```shell
+$ chart-review accuracy jill jane --verbose --csv
+chart_id,label,classification
+1,Cough,TP
+1,Fatigue,TP
+1,Headache,FP
+3,Cough,TN
+3,Fatigue,TN
+3,Headache,TN
+4,Cough,FN
+4,Fatigue,TP
+4,Headache,FP
+```
diff --git a/docs/ids.md b/docs/ids.md
index 7a41ea4..2c73e1a 100644
--- a/docs/ids.md
+++ b/docs/ids.md
@@ -18,14 +18,72 @@ as well as original IDs from your EHR, on top of the Label Studio chart IDs.
 FHIR IDs could be considered PHI depending on how the EHR generates them.
 Exercise appropriate caution when sharing the output of this command.
 
-## Examples
+## Example
 
+<!--
+For the examples in this doc, you can use this as your labelstudio-export.json file:
+[
+  {
+    "id": 1,
+    "data": {
+      "enc_id": "E123",
+      "anon_id": "170a37476339af6f31ed7b1b0bbb4f11d5daacd79bf9f490d49f93742acfd2bd",
+      "docref_mappings": {
+        "D123": "331ab320fe6264535a408aa1a7ecf1465fc0631580af5f3010bfecf71c99d141"
+      }
+    }
+  },
+  {
+    "id": 2,
+    "data": {
+      "enc_id": "E898",
+      "anon_id": "8b0bd207147989492801b7c14eebc015564ab73a07bdabdf9aefc3425eeba982",
+      "docref_mappings": {
+        "D898": "b5e329b752067eca1584f9cd132f40c637d8a9ebd6f2a599794f9436fb83c2eb",
+        "D899": "605338cd18c2617864db23fd5fd956f3e806af2021ffa6d11c34cac998eb3b6d"
+      }
+    }
+  }
+]
+-->
+
+```shell
+$ chart-review ids
+╭──────────┬────────────────────────┬──────────────────────────────────────────╮
+│ Chart ID │ Original FHIR ID       │ Anonymized FHIR ID                       │
+├──────────┼────────────────────────┼──────────────────────────────────────────┤
+│ 1        │ Encounter/E123         │ Encounter/170a37476339af6f31ed7b1b0bbb4f │
+│          │                        │ 11d5daacd79bf9f490d49f93742acfd2bd       │
+│ 1        │ DocumentReference/D123 │ DocumentReference/331ab320fe6264535a408a │
+│          │                        │ a1a7ecf1465fc0631580af5f3010bfecf71c99d1 │
+│          │                        │ 41                                       │
+│ 2        │ Encounter/E898         │ Encounter/8b0bd207147989492801b7c14eebc0 │
+│          │                        │ 15564ab73a07bdabdf9aefc3425eeba982       │
+│ 2        │ DocumentReference/D898 │ DocumentReference/b5e329b752067eca1584f9 │
+│          │                        │ cd132f40c637d8a9ebd6f2a599794f9436fb83c2 │
+│          │                        │ eb                                       │
+│ 2        │ DocumentReference/D899 │ DocumentReference/605338cd18c2617864db23 │
+│          │                        │ fd5fd956f3e806af2021ffa6d11c34cac998eb3b │
+│          │                        │ 6d                                       │
+╰──────────┴────────────────────────┴──────────────────────────────────────────╯
+```
+
+## Options
+
+### --csv
+
+Print the IDs in a machine-parseable CSV format.
+
+Because IDs can be quite long, this is also probably the best approach for searching
+for a given ID, since line wraps won't appear in the middle of your ID.
+
+#### Examples
 ```shell
-$ chart-review ids > ids.csv
+$ chart-review ids --csv > ids.csv
 ```
 
 ```shell
-$ chart-review ids
+$ chart-review ids --csv
 chart_id,original_fhir_id,anonymized_fhir_id
 1,Encounter/E123,Encounter/170a37476339af6f31ed7b1b0bbb4f11d5daacd79bf9f490d49f93742acfd2bd
 1,DocumentReference/D123,DocumentReference/331ab320fe6264535a408aa1a7ecf1465fc0631580af5f3010bfecf71c99d141
diff --git a/docs/index.md b/docs/index.md
index e1ecdce..032fa65 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -51,6 +51,21 @@ chart-review --help
 
 Read the [first-time setup docs](setup.md) for more.
 
+## Example
+
+```shell
+$ chart-review
+╭───────────┬─────────────┬───────────╮
+│ Annotator │ Chart Count │ Chart IDs │
+├───────────┼─────────────┼───────────┤
+│ jane      │ 3           │ 1, 3–4    │
+│ jill      │ 4           │ 1–4       │
+│ john      │ 3           │ 1–2, 4    │
+╰───────────┴─────────────┴───────────╯
+
+Pass --help to see more options.
+```
+
 ## Source Code
 Chart Review is open source.
 If you'd like to browse its code or contribute changes yourself,
diff --git a/docs/labels.md b/docs/labels.md
index a547794..5f21c87 100644
--- a/docs/labels.md
+++ b/docs/labels.md
@@ -35,3 +35,31 @@ $ chart-review labels
 │ john      │ 2           │ Headache │
 ╰───────────┴─────────────┴──────────╯
 ```
+
+## Options
+
+### --csv
+
+Print the labels in a machine-parseable CSV format.
+
+#### Examples
+```shell
+$ chart-review labels --csv > labels.csv
+```
+
+```shell
+$ chart-review labels --csv
+annotator,chart_count,label
+Any,2,Cough
+Any,3,Fatigue
+Any,3,Headache
+jane,1,Cough
+jane,2,Fatigue
+jane,2,Headache
+jill,2,Cough
+jill,3,Fatigue
+jill,0,Headache
+john,1,Cough
+john,2,Fatigue
+john,2,Headache
+```
diff --git a/tests/base.py b/tests/base.py
index 28927fa..e74259c 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -18,7 +18,8 @@ def setUp(self):
         self.maxDiff = None
 
     @staticmethod
-    def run_cli(*args) -> str:
+    def run_cli(*args, path: str) -> str:
+        args = ("--project-dir", path) + args
         with TestCase.capture_stdout() as stdout:
             cli.main_cli(list(args))
         return stdout.getvalue()
diff --git a/tests/test_accuracy.py b/tests/test_accuracy.py
index b51cb7e..5a71197 100644
--- a/tests/test_accuracy.py
+++ b/tests/test_accuracy.py
@@ -10,10 +10,41 @@
 class TestAccuracy(base.TestCase):
     """Test case for the top-level accuracy code"""
 
-    def test_accuracy(self):
+    def test_default_output(self):
+        stdout = self.run_cli("accuracy", "jill", "jane", path=f"{self.DATA_DIR}/cold")
+
+        self.assertEqual(
+            """Comparing 3 charts (1, 3–4)
+Truth: jill
+Annotator: jane
+
+F1     Sens  Spec  PPV  NPV   Kappa  TP  FN  TN  FP  Label   
+0.667  0.75  0.6   0.6  0.75  0.341  3   1   3   2   *       
+0.667  0.5   1.0   1.0  0.5   0.4    1   1   1   0   Cough   
+1.0    1.0   1.0   1.0  1.0   1.0    2   0   1   0   Fatigue 
+0      0     0     0    0     0      0   0   1   2   Headache
+""",  # noqa: W291
+            stdout,
+        )
+
+    def test_csv(self):
+        stdout = self.run_cli("accuracy", "--csv", "jill", "jane", path=f"{self.DATA_DIR}/cold")
+
+        self.assertEqual(
+            [
+                "f1,sens,spec,ppv,npv,kappa,tp,fn,tn,fp,label",
+                "0.667,0.75,0.6,0.6,0.75,0.341,3,1,3,2,*",
+                "0.667,0.5,1.0,1.0,0.5,0.4,1,1,1,0,Cough",
+                "1.0,1.0,1.0,1.0,1.0,1.0,2,0,1,0,Fatigue",
+                "0,0,0,0,0,0,0,0,1,2,Headache",
+            ],
+            stdout.splitlines(),
+        )
+
+    def test_save(self):
         with tempfile.TemporaryDirectory() as tmpdir:
             shutil.copytree(f"{self.DATA_DIR}/cold", tmpdir, dirs_exist_ok=True)
-            self.run_cli("accuracy", "--project-dir", tmpdir, "--save", "jill", "jane")
+            self.run_cli("accuracy", "--save", "jill", "jane", path=tmpdir)
 
             accuracy_json = common.read_json(f"{tmpdir}/accuracy-jill-jane.json")
             self.assertEqual(
@@ -79,21 +110,21 @@ def test_accuracy(self):
                 accuracy_csv,
             )
 
+    def test_save_and_csv_conflict(self):
+        """Verify that --save and --csv can't run together"""
+        with self.assertRaises(SystemExit) as cm:
+            self.run_cli(
+                "accuracy", "--save", "--csv", "jill", "jane", path=f"{self.DATA_DIR}/cold"
+            )
+        self.assertEqual(2, cm.exception.code)
+
     def test_verbose(self):
-        output = self.run_cli(
-            "accuracy", "--project-dir", f"{self.DATA_DIR}/cold", "--verbose", "jill", "jane"
-        )
+        output = self.run_cli("accuracy", "--verbose", "jill", "jane", path=f"{self.DATA_DIR}/cold")
         self.assertEqual(
             """Comparing 3 charts (1, 3–4)
 Truth: jill
 Annotator: jane
 
-F1     Sens  Spec  PPV  NPV   Kappa  TP  FN  TN  FP  Label   
-0.667  0.75  0.6   0.6  0.75  0.341  3   1   3   2   *       
-0.667  0.5   1.0   1.0  0.5   0.4    1   1   1   0   Cough   
-1.0    1.0   1.0   1.0  1.0   1.0    2   0   1   0   Fatigue 
-0      0     0     0    0     0      0   0   1   2   Headache
-
 ╭──────────┬──────────┬────────────────╮
 │ Chart ID │ Label    │ Classification │
 ├──────────┼──────────┼────────────────┤
@@ -113,15 +144,23 @@ def test_verbose(self):
             output,
         )
 
-    def test_custom_config(self):
-        with tempfile.TemporaryDirectory() as tmpdir:
-            shutil.copy(f"{self.DATA_DIR}/cold/labelstudio-export.json", tmpdir)
-            self.run_cli(
-                "accuracy",
-                "--project-dir",
-                tmpdir,
-                "-c",
-                f"{self.DATA_DIR}/cold/config.yaml",
-                "jane",
-                "john",
-            )  # just confirm it doesn't error out
+    def test_verbose_cvs(self):
+        """Verify we can also print verbose results in CSV format"""
+        stdout = self.run_cli(
+            "accuracy", "--verbose", "--csv", "jill", "jane", path=f"{self.DATA_DIR}/cold"
+        )
+        self.assertEqual(
+            [
+                "chart_id,label,classification",
+                "1,Cough,TP",
+                "1,Fatigue,TP",
+                "1,Headache,FP",
+                "3,Cough,TN",
+                "3,Fatigue,TN",
+                "3,Headache,TN",
+                "4,Cough,FN",
+                "4,Fatigue,TP",
+                "4,Headache,FP",
+            ],
+            stdout.splitlines(),
+        )
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 89aa2fb..ca2a726 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1,5 +1,8 @@
 """Tests for cli.py"""
 
+import shutil
+import tempfile
+
 import chart_review
 from chart_review import cli
 from tests import base
@@ -8,6 +11,21 @@
 class TestCommandLine(base.TestCase):
     """Test case for the CLI entry point"""
 
+    def assert_cold_output(self, output):
+        self.assertEqual(
+            """╭───────────┬─────────────┬───────────╮
+│ Annotator │ Chart Count │ Chart IDs │
+├───────────┼─────────────┼───────────┤
+│ jane      │ 3           │ 1, 3–4    │
+│ jill      │ 4           │ 1–4       │
+│ john      │ 3           │ 1–2, 4    │
+╰───────────┴─────────────┴───────────╯
+
+Pass --help to see more options.
+""",
+            output,
+        )
+
     def test_version(self):
         # Manually capture stdout (rather than helper self.run_cli) because --version actually
         # exits the program, and we have to handle the exception rather than just grabbing the
@@ -22,24 +40,11 @@ def test_version(self):
         self.assertEqual(f"chart-review {version}\n", stdout.getvalue())
 
     def test_default_info(self):
-        stdout = self.run_cli("--project-dir", f"{self.DATA_DIR}/cold")
-
-        self.assertEqual(
-            """╭───────────┬─────────────┬───────────╮
-│ Annotator │ Chart Count │ Chart IDs │
-├───────────┼─────────────┼───────────┤
-│ jane      │ 3           │ 1, 3–4    │
-│ jill      │ 4           │ 1–4       │
-│ john      │ 3           │ 1–2, 4    │
-╰───────────┴─────────────┴───────────╯
-
-Pass --help to see more options.
-""",
-            stdout,
-        )
+        stdout = self.run_cli(path=f"{self.DATA_DIR}/cold")
+        self.assert_cold_output(stdout)
 
     def test_default_info_ignored(self):
-        stdout = self.run_cli("--project-dir", f"{self.DATA_DIR}/ignore")
+        stdout = self.run_cli(path=f"{self.DATA_DIR}/ignore")
 
         self.assertEqual(
             """╭───────────┬─────────────┬───────────╮
@@ -54,3 +59,10 @@ def test_default_info_ignored(self):
 """,
             stdout,
         )
+
+    def test_custom_config(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            shutil.copy(f"{self.DATA_DIR}/cold/labelstudio-export.json", tmpdir)
+            # mostly confirm it doesn't just error out
+            stdout = self.run_cli(f"--config={self.DATA_DIR}/cold/config.yaml", path=tmpdir)
+            self.assert_cold_output(stdout)
diff --git a/tests/test_ids.py b/tests/test_ids.py
index 5ad1e8b..78118d8 100644
--- a/tests/test_ids.py
+++ b/tests/test_ids.py
@@ -9,29 +9,7 @@
 class TestIDs(base.TestCase):
     """Test case for the top-level ids code"""
 
-    def test_ids_quoted(self):
-        """Verify that we quote the output when needed"""
-        with tempfile.TemporaryDirectory() as tmpdir:
-            common.write_json(f"{tmpdir}/config.json", {})
-            common.write_json(
-                f"{tmpdir}/labelstudio-export.json",
-                [
-                    {
-                        "id": 1,
-                        "data": {
-                            "enc_id": "Orig,\\ 'Enc",
-                            "anon_id": 'Anon "Enc',
-                        },
-                    },
-                ],
-            )
-            stdout = self.run_cli("ids", "--project-dir", tmpdir)
-
-        lines = stdout.splitlines()
-        self.assertEqual(2, len(lines))
-        self.assertEqual('1,"Encounter/Orig,\\ \'Enc","Encounter/Anon ""Enc"', lines[1])
-
-    def test_ids_sources(self):
+    def test_ids(self):
         """Verify that we pull IDs from all the places"""
         with tempfile.TemporaryDirectory() as tmpdir:
             common.write_json(
@@ -98,18 +76,45 @@ def test_ids_sources(self):
                     },
                 ],
             )
-            stdout = self.run_cli("ids", "--project-dir", tmpdir)
+            stdout = self.run_cli("ids", path=tmpdir)
 
         self.assertEqual(
             [
-                "chart_id,original_fhir_id,anonymized_fhir_id",
-                "1,Encounter/Orig,",
-                "2,,Encounter/Anon",
-                "2,DocumentReference/Orig,DocumentReference/Anon",
-                "3,DocumentReference/Orig1,DocumentReference/Anon1",
-                "3,DocumentReference/Orig2,DocumentReference/Anon2",
-                "4,Encounter/a,Encounter/b",
-                "5,,",
+                "╭──────────┬─────────────────────────┬─────────────────────────╮",
+                "│ Chart ID │ Original FHIR ID        │ Anonymized FHIR ID      │",
+                "├──────────┼─────────────────────────┼─────────────────────────┤",
+                "│ 1        │ Encounter/Orig          │                         │",
+                "│ 2        │                         │ Encounter/Anon          │",
+                "│ 2        │ DocumentReference/Orig  │ DocumentReference/Anon  │",
+                "│ 3        │ DocumentReference/Orig1 │ DocumentReference/Anon1 │",
+                "│ 3        │ DocumentReference/Orig2 │ DocumentReference/Anon2 │",
+                "│ 4        │ Encounter/a             │ Encounter/b             │",
+                "│ 5        │                         │                         │",
+                "╰──────────┴─────────────────────────┴─────────────────────────╯",
             ],
             stdout.splitlines(),
         )
+
+    def test_ids_csv(self):
+        """Verify that we can print CSV output"""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            common.write_json(f"{tmpdir}/config.json", {})
+            common.write_json(
+                f"{tmpdir}/labelstudio-export.json",
+                [
+                    {
+                        "id": 1,
+                        "data": {
+                            # Verify that we quote correctly
+                            "enc_id": "Orig,\\ 'Enc",
+                            "anon_id": 'Anon "Enc',
+                        },
+                    },
+                ],
+            )
+            stdout = self.run_cli("ids", "--csv", path=tmpdir)
+
+        lines = stdout.splitlines()
+        self.assertEqual(2, len(lines))
+        self.assertEqual("chart_id,original_fhir_id,anonymized_fhir_id", lines[0])
+        self.assertEqual('1,"Encounter/Orig,\\ \'Enc","Encounter/Anon ""Enc"', lines[1])
diff --git a/tests/test_labels.py b/tests/test_labels.py
index d957feb..7163f3d 100644
--- a/tests/test_labels.py
+++ b/tests/test_labels.py
@@ -10,7 +10,7 @@ class TestLabels(base.TestCase):
     """Test case for the top-level labels code"""
 
     def test_labels(self):
-        stdout = self.run_cli("--project-dir", f"{self.DATA_DIR}/cold", "labels")
+        stdout = self.run_cli("labels", path=f"{self.DATA_DIR}/cold")
 
         self.assertEqual(
             """╭───────────┬─────────────┬──────────╮
@@ -50,7 +50,7 @@ def test_labels_grouped(self):
                 f"{tmpdir}/labelstudio-export.json",
                 [],
             )
-            stdout = self.run_cli("labels", "--project-dir", tmpdir)
+            stdout = self.run_cli("labels", path=tmpdir)
 
         self.assertEqual(
             """╭───────────┬─────────────┬──────────╮
@@ -81,9 +81,32 @@ def test_labels_ignored(self):
                     {"id": 6},
                 ],
             )
-            stdout = self.run_cli("labels", "--project-dir", tmpdir)
+            stdout = self.run_cli("labels", path=tmpdir)
 
         self.assertEqual(
             "Ignoring 3 charts (3–4, 6)",
             stdout.splitlines()[-1].strip(),
         )
+
+    def test_labels_csv(self):
+        """Verify that can print in CSV format"""
+        stdout = self.run_cli("labels", "--csv", path=f"{self.DATA_DIR}/cold")
+
+        self.assertEqual(
+            [
+                "annotator,chart_count,label",
+                "Any,2,Cough",
+                "Any,3,Fatigue",
+                "Any,3,Headache",
+                "jane,1,Cough",
+                "jane,2,Fatigue",
+                "jane,2,Headache",
+                "jill,2,Cough",
+                "jill,3,Fatigue",
+                "jill,0,Headache",
+                "john,1,Cough",
+                "john,2,Fatigue",
+                "john,2,Headache",
+            ],
+            stdout.splitlines(),
+        )