Skip to content

Commit

Permalink
Merge pull request #47 from smart-on-fhir/mikix/mentions
Browse files Browse the repository at this point in the history
feat: add `mentions` command to show text for each label
  • Loading branch information
mikix authored Jun 25, 2024
2 parents a8b989b + b333a4d commit a8d9848
Show file tree
Hide file tree
Showing 10 changed files with 364 additions and 82 deletions.
3 changes: 2 additions & 1 deletion chart_review/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import argparse
import sys

from chart_review.commands import accuracy, default, ids, labels
from chart_review.commands import accuracy, default, ids, labels, mentions


def define_parser() -> argparse.ArgumentParser:
Expand All @@ -15,6 +15,7 @@ def define_parser() -> argparse.ArgumentParser:
accuracy.make_subparser(subparsers.add_parser("accuracy", help="calculate F1 and Kappa scores"))
ids.make_subparser(subparsers.add_parser("ids", help="map Label Studio IDs to FHIR IDs"))
labels.make_subparser(subparsers.add_parser("labels", help="show label usage by annotator"))
mentions.make_subparser(subparsers.add_parser("mentions", help="show each mention of a label"))

return parser

Expand Down
8 changes: 3 additions & 5 deletions chart_review/cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,9 @@ def __init__(self, proj_config: config.ProjectConfig):
# Calculate the final set of note ranges for each annotator
self.note_range, self.ignored_notes = self._collect_note_ranges(self.ls_export)

# Remove any ignored notes from the mentions table, for ease of consuming code
for mentions in self.annotations.mentions.values():
for note in self.ignored_notes:
if note in mentions:
del mentions[note]
# Remove any ignored notes from the annotations, for ease of consuming code
for note in self.ignored_notes:
self.annotations.remove(note)

def _collect_note_ranges(
self, exported_json: list[dict]
Expand Down
6 changes: 3 additions & 3 deletions chart_review/commands/labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,19 @@ def print_labels(args: argparse.Namespace) -> None:
label_notes[annotator][name] = note_ids
any_annotator_note_sets.setdefault(name, types.NoteSet()).update(note_ids)

label_table = cli_utils.create_table("Annotator", "Chart Count", "Label")
label_table = cli_utils.create_table("Annotator", "Label", "Chart Count")

# First add summary entries, for counts across the union of all annotators
for name in label_names:
count = str(len(any_annotator_note_sets.get(name, {})))
label_table.add_row(rich.text.Text("Any", style="italic"), count, name)
label_table.add_row(rich.text.Text("Any", style="italic"), name, count)

# Now do each annotator as their own little boxed section
for annotator in sorted(label_notes.keys(), key=str.casefold):
label_table.add_section()
for name, note_set in label_notes[annotator].items():
count = str(len(note_set))
label_table.add_row(annotator, count, name)
label_table.add_row(annotator, name, count)

if args.csv:
cli_utils.print_table_as_csv(label_table)
Expand Down
38 changes: 38 additions & 0 deletions chart_review/commands/mentions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import argparse

import rich
import rich.box
import rich.table
import rich.text

from chart_review import cli_utils, console_utils, types


def make_subparser(parser: argparse.ArgumentParser) -> None:
cli_utils.add_project_args(parser)
cli_utils.add_output_args(parser)
parser.set_defaults(func=print_mentions)


def print_mentions(args: argparse.Namespace) -> None:
"""
Print Label Studio export's mentions (text associated with the label).
"""
reader = cli_utils.get_cohort_reader(args)

table = cli_utils.create_table("Annotator", "Chart ID", "Mention", "Label")

for annotator in sorted(reader.annotations.original_text_mentions, key=str.casefold):
table.add_section()
mentions = reader.annotations.original_text_mentions[annotator]
for note_id, labeled_texts in mentions.items():
for label_text in labeled_texts:
for label in sorted(label_text.labels, key=str.casefold):
if label in reader.annotations.labels:
table.add_row(annotator, str(note_id), label_text.text, label)

if args.csv:
cli_utils.print_table_as_csv(table)
else:
rich.get_console().print(table)
console_utils.print_ignored_charts(reader)
10 changes: 9 additions & 1 deletion chart_review/console_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,15 @@ def end_range() -> None:


def print_ignored_charts(reader: cohort.CohortReader):
"""Prints a line about ignored charts, suitable for underlying a table"""
"""
Prints a line about ignored charts, suitable for underlying a table.
It's recommended that any CLI command that shows individual chart IDs
call this for their normal output view (i.e. not a formatted view like --csv).
For commands that just show aggregate chart numbers,
use your judgement if it helps or is just confusing extra info.
"""
if not reader.ignored_notes:
return

Expand Down
9 changes: 9 additions & 0 deletions chart_review/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,12 @@ class ProjectAnnotations:
original_text_mentions: dict[str, dict[int, list[LabeledText]]] = dataclasses.field(
default_factory=dict
)

def remove(self, chart_id: int):
# Remove any instance of this chart ID
for mentions in self.mentions.values():
if chart_id in mentions:
del mentions[chart_id]
for mentions in self.original_text_mentions.values():
if chart_id in mentions:
del mentions[chart_id]
66 changes: 33 additions & 33 deletions docs/labels.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,32 @@ nav_order: 7

# The Labels Command

The `labels` prints some statistics on the project labels
The `labels` command prints some statistics on the project labels
and how often each annotator used each label.

## Example

```shell
$ chart-review labels
╭───────────┬───────────────────────╮
│ Annotator │ Chart Count │ Label
├───────────┼───────────────────────┤
│ Any │ 2 │ Cough
│ Any │ 3 │ Fatigue
│ Any │ 3 │ Headache
├───────────┼───────────────────────┤
│ jane │ 1 │ Cough
│ jane │ 2 │ Fatigue
│ jane │ 2 │ Headache
├───────────┼───────────────────────┤
│ jill │ 2 │ Cough
│ jill │ 3 │ Fatigue
│ jill │ 0 │ Headache
├───────────┼───────────────────────┤
│ john │ 1 │ Cough
│ john │ 2 │ Fatigue
│ john │ 2 │ Headache
╰───────────┴───────────────────────╯
╭───────────┬───────────────────────╮
│ Annotator │ Label │ Chart Count
├───────────┼───────────────────────┤
│ Any │ Cough │ 2
│ Any │ Fatigue │ 3
│ Any │ Headache │ 3
├───────────┼───────────────────────┤
│ jane │ Cough │ 1
│ jane │ Fatigue │ 2
│ jane │ Headache │ 2
├───────────┼───────────────────────┤
│ jill │ Cough │ 2
│ jill │ Fatigue │ 3
│ jill │ Headache │ 0
├───────────┼───────────────────────┤
│ john │ Cough │ 1
│ john │ Fatigue │ 2
│ john │ Headache │ 2
╰───────────┴───────────────────────╯
```

## Options
Expand All @@ -49,17 +49,17 @@ $ chart-review labels --csv > labels.csv

```shell
$ chart-review labels --csv
annotator,chart_count,label
Any,2,Cough
Any,3,Fatigue
Any,3,Headache
jane,1,Cough
jane,2,Fatigue
jane,2,Headache
jill,2,Cough
jill,3,Fatigue
jill,0,Headache
john,1,Cough
john,2,Fatigue
john,2,Headache
annotator,label,chart_count
Any,Cough,2
Any,Fatigue,3
Any,Headache,3
jane,Cough,1
jane,Fatigue,2
jane,Headache,2
jill,Cough,2
jill,Fatigue,3
jill,Headache,0
john,Cough,1
john,Fatigue,2
john,Headache,2
```
70 changes: 70 additions & 0 deletions docs/mentions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
---
title: Mentions Command
parent: Chart Review
nav_order: 8
# audience: lightly technical folks
# type: how-to
---

# The Mentions Command

The `mentions` command prints each time a piece of text was labeled
and with what label.

## Example

```shell
$ chart-review mentions
╭───────────┬──────────┬─────────┬──────────╮
│ Annotator │ Chart ID │ Mention │ Label │
├───────────┼──────────┼─────────┼──────────┤
│ jane │ 1 │ achoo │ Cough │
│ jane │ 1 │ sigh │ Headache │
│ jane │ 1 │ sigh │ Fatigue │
│ jane │ 4 │ sleepy │ Fatigue │
│ jane │ 4 │ pain │ Headache │
├───────────┼──────────┼─────────┼──────────┤
│ jill │ 1 │ achoo │ Cough │
│ jill │ 1 │ sigh │ Fatigue │
│ jill │ 2 │ ouch │ Fatigue │
│ jill │ 4 │ sleepy │ Fatigue │
│ jill │ 4 │ pain │ Cough │
├───────────┼──────────┼─────────┼──────────┤
│ john │ 1 │ achoo │ Cough │
│ john │ 1 │ sigh │ Fatigue │
│ john │ 2 │ ouch │ Headache │
│ john │ 4 │ sleepy │ Fatigue │
│ john │ 4 │ pain │ Headache │
╰───────────┴──────────┴─────────┴──────────╯
```

## Options

### --csv

Print the mentions in a machine-parseable CSV format.

#### Examples
```shell
$ chart-review mentions --csv > mentions.csv
```

```shell
$ chart-review mentions --csv
annotator,chart_id,mention,label
jane,1,achoo,Cough
jane,1,sigh,Headache
jane,1,sigh,Fatigue
jane,4,sleepy,Fatigue
jane,4,pain,Headache
jill,1,achoo,Cough
jill,1,sigh,Fatigue
jill,2,ouch,Fatigue
jill,4,sleepy,Fatigue
jill,4,pain,Cough
john,1,achoo,Cough
john,1,sigh,Fatigue
john,2,ouch,Headache
john,4,sleepy,Fatigue
john,4,pain,Headache
```
74 changes: 35 additions & 39 deletions tests/test_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,25 @@ def test_labels(self):
stdout = self.run_cli("labels", path=f"{self.DATA_DIR}/cold")

self.assertEqual(
"""╭───────────┬───────────────────────╮
│ Annotator │ Chart Count │ Label
├───────────┼───────────────────────┤
│ Any │ 2 │ Cough
│ Any │ 3 │ Fatigue
│ Any │ 3 │ Headache
├───────────┼───────────────────────┤
│ jane │ 1 │ Cough
│ jane │ 2 │ Fatigue
│ jane │ 2 │ Headache
├───────────┼───────────────────────┤
│ jill │ 2 │ Cough
│ jill │ 3 │ Fatigue
│ jill │ 0 │ Headache
├───────────┼───────────────────────┤
│ john │ 1 │ Cough
│ john │ 2 │ Fatigue
│ john │ 2 │ Headache
╰───────────┴───────────────────────╯
"""╭───────────┬───────────────────────╮
│ Annotator │ Label │ Chart Count
├───────────┼───────────────────────┤
│ Any │ Cough │ 2
│ Any │ Fatigue │ 3
│ Any │ Headache │ 3
├───────────┼───────────────────────┤
│ jane │ Cough │ 1
│ jane │ Fatigue │ 2
│ jane │ Headache │ 2
├───────────┼───────────────────────┤
│ jill │ Cough │ 2
│ jill │ Fatigue │ 3
│ jill │ Headache │ 0
├───────────┼───────────────────────┤
│ john │ Cough │ 1
│ john │ Fatigue │ 2
│ john │ Headache │ 2
╰───────────┴───────────────────────╯
""",
stdout,
)
Expand All @@ -53,12 +53,12 @@ def test_labels_grouped(self):
stdout = self.run_cli("labels", path=tmpdir)

self.assertEqual(
"""╭───────────┬───────────────────────╮
│ Annotator │ Chart Count │ Label
├───────────┼───────────────────────┤
│ Any │ 0 │ recent
│ Any │ 0 │ symptoms
╰───────────┴───────────────────────╯
"""╭───────────┬───────────────────────╮
│ Annotator │ Label │ Chart Count
├───────────┼───────────────────────┤
│ Any │ recent │ 0
│ Any │ symptoms │ 0
╰───────────┴───────────────────────╯
""",
stdout,
)
Expand Down Expand Up @@ -90,23 +90,19 @@ def test_labels_ignored(self):

def test_labels_csv(self):
"""Verify that can print in CSV format"""
stdout = self.run_cli("labels", "--csv", path=f"{self.DATA_DIR}/cold")
stdout = self.run_cli("labels", "--csv", path=f"{self.DATA_DIR}/external")

self.assertEqual(
[
"annotator,chart_count,label",
"Any,2,Cough",
"Any,3,Fatigue",
"Any,3,Headache",
"jane,1,Cough",
"jane,2,Fatigue",
"jane,2,Headache",
"jill,2,Cough",
"jill,3,Fatigue",
"jill,0,Headache",
"john,1,Cough",
"john,2,Fatigue",
"john,2,Headache",
"annotator,label,chart_count",
"Any,happy,1",
"Any,sad,1",
"human,happy,1",
"human,sad,1",
"icd10-doc,happy,1",
"icd10-doc,sad,1",
"icd10-enc,happy,1",
"icd10-enc,sad,1",
],
stdout.splitlines(),
)
Loading

0 comments on commit a8d9848

Please sign in to comment.