From 766967e40360bd3baefbf45063a3ecd7eb1f50be Mon Sep 17 00:00:00 2001 From: Michael Terry Date: Wed, 26 Jun 2024 15:29:04 -0400 Subject: [PATCH] feat: add new `frequencies` command to show term frequencies This shows a count of each text/label combo, with a combined total for all annotators. It also flags when the same text has been labelled differently. Also: - Delete some unused code, notably the old unused term_freq code. - For the `mentions` command, don't show duplicate mentions - it's just noise. - For the `mentions` command, sort output by text (rather than using the order that the text appears in the document - which makes less sense when we're removing duplicates) --- chart_review/agree.py | 51 ------- chart_review/cli.py | 5 +- chart_review/cohort.py | 57 +------ chart_review/commands/default.py | 2 +- chart_review/commands/frequency.py | 77 ++++++++++ chart_review/commands/labels.py | 2 +- chart_review/commands/mentions.py | 19 ++- chart_review/common.py | 11 -- chart_review/term_freq.py | 100 ------------ docs/frequency.md | 93 +++++++++++ docs/ids.md | 2 +- docs/labels.md | 2 +- docs/mentions.md | 2 +- tests/data/cold/labelstudio-export.json | 24 +++ tests/test_frequency.py | 195 ++++++++++++++++++++++++ tests/test_mentions.py | 60 +++++++- tests/test_term_freq.py | 28 ---- 17 files changed, 465 insertions(+), 265 deletions(-) create mode 100644 chart_review/commands/frequency.py delete mode 100644 chart_review/term_freq.py create mode 100644 docs/frequency.md create mode 100644 tests/test_frequency.py delete mode 100644 tests/test_term_freq.py diff --git a/chart_review/agree.py b/chart_review/agree.py index c6611f8..3bee7fb 100644 --- a/chart_review/agree.py +++ b/chart_review/agree.py @@ -151,38 +151,6 @@ def score_matrix(matrix: dict, sig_digits=3) -> dict: } -def avg_scores(first: dict, second: dict, sig_digits=3) -> dict: - merged = {} - for header in csv_header(): - added = first[header] + second[header] - if header in ["TP", "FP", "FN", "TN"]: - merged[header] = added - else: - merged[header] = round(added / 2, sig_digits) - return merged - - -def score_reviewer( - annotations: types.ProjectAnnotations, - truth: str, - annotator: str, - note_range: Collection[int], - labels: Iterable[str] = None, -) -> dict: - """ - Score reliability of an annotator against a truth annotator. - - :param annotations: prepared map of annotators and mentions - :param truth: annotator to use as the ground truth - :param annotator: another annotator to compare with truth - :param note_range: collection of LabelStudio document ID - :param labels: (optional) set of labels to score - :return: dict, keys f1, precision, recall and vals= %score - """ - truth_matrix = confusion_matrix(annotations, truth, annotator, note_range, labels=labels) - return score_matrix(truth_matrix) - - def csv_table(score: dict, class_labels: types.LabelSet): table = list() table.append(csv_header(False, True)) @@ -229,22 +197,3 @@ def csv_row_score( row.append(pick_label if pick_label else "*") return "\t".join(row) - - -def true_prevalence(prevalence_apparent: float, sensitivity: float, specificity: float): - """ - See paper: "The apparent prevalence, the true prevalence" - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9195606 - - Using Eq. 4. it can be calculated: - True prevalence = (Apparent prevalence + Sp - 1)/(Se + Sp - 1) - - :param prevalence_apparent: estimated prevalence, concretely: - the %NLP labled positives / cohort - - :param: sensitivity: of the class label (where prevalence was measured) - :param: specificity: of the class label (where prevalence was measured) - - :return: float adjusted prevalence - """ - return round((prevalence_apparent + specificity - 1) / (sensitivity + specificity - 1), 5) diff --git a/chart_review/cli.py b/chart_review/cli.py index 88507b9..3ff518d 100644 --- a/chart_review/cli.py +++ b/chart_review/cli.py @@ -3,7 +3,7 @@ import argparse import sys -from chart_review.commands import accuracy, default, ids, labels, mentions +from chart_review.commands import accuracy, default, frequency, ids, labels, mentions def define_parser() -> argparse.ArgumentParser: @@ -13,6 +13,9 @@ def define_parser() -> argparse.ArgumentParser: subparsers = parser.add_subparsers() accuracy.make_subparser(subparsers.add_parser("accuracy", help="calculate F1 and Kappa scores")) + frequency.make_subparser( + subparsers.add_parser("frequency", help="show counts of each text mention") + ) ids.make_subparser(subparsers.add_parser("ids", help="map Label Studio IDs to FHIR IDs")) labels.make_subparser(subparsers.add_parser("labels", help="show label usage by annotator")) mentions.make_subparser(subparsers.add_parser("mentions", help="show each mention of a label")) diff --git a/chart_review/cohort.py b/chart_review/cohort.py index a19071d..2d3f0dd 100644 --- a/chart_review/cohort.py +++ b/chart_review/cohort.py @@ -1,7 +1,7 @@ from typing import Iterable -from chart_review.common import guard_str, guard_iter, guard_in -from chart_review import agree, common, config, errors, external, term_freq, simplify, types +from chart_review.common import guard_iter, guard_in +from chart_review import agree, common, config, errors, external, simplify, types class CohortReader: @@ -84,25 +84,6 @@ def _collect_note_ranges( def class_labels(self): return self.annotations.labels - def calc_term_freq(self, annotator) -> dict: - """ - Calculate Term Frequency of highlighted mentions. - :param annotator: an annotator name - :return: dict key=TERM val= {label, list of chart_id} - """ - return term_freq.calc_term_freq(self.annotations, guard_str(annotator)) - - def calc_label_freq(self, annotator) -> dict: - """ - Calculate Term Frequency of highlighted mentions. - :param annotator: an annotator name - :return: dict key=TERM val= {label, list of chart_id} - """ - return term_freq.calc_label_freq(self.calc_term_freq(annotator)) - - def calc_term_label_confusion(self, annotator) -> dict: - return term_freq.calc_term_label_confusion(self.calc_term_freq(annotator)) - def _select_labels(self, label_pick: str = None) -> Iterable[str]: if label_pick: guard_in(label_pick, self.class_labels) @@ -131,37 +112,3 @@ def confusion_matrix( note_range, labels=labels, ) - - def score_reviewer(self, truth: str, annotator: str, note_range, label_pick: str = None): - """ - Score reliability of rater at the level of all symptom *PREVALENCE* - :param truth: annotator to use as the ground truth - :param annotator: another annotator to compare with truth - :param note_range: default= all in corpus - :param label_pick: (optional) of the CLASS_LABEL to score separately - :return: dict, keys f1, precision, recall and vals= %score - """ - labels = self._select_labels(label_pick) - note_range = set(guard_iter(note_range)) - return agree.score_reviewer(self.annotations, truth, annotator, note_range, labels=labels) - - def score_reviewer_table_csv(self, truth: str, annotator: str, note_range) -> str: - table = list() - table.append(agree.csv_header(False, True)) - - score = self.score_reviewer(truth, annotator, note_range) - table.append(agree.csv_row_score(score, as_string=True)) - - for label in self.class_labels: - score = self.score_reviewer(truth, annotator, note_range, label) - table.append(agree.csv_row_score(score, label, as_string=True)) - - return "\n".join(table) + "\n" - - def score_reviewer_table_dict(self, truth, annotator, note_range) -> dict: - table = self.score_reviewer(truth, annotator, note_range) - - for label in self.class_labels: - table[label] = self.score_reviewer(truth, annotator, note_range, label) - - return table diff --git a/chart_review/commands/default.py b/chart_review/commands/default.py index 17da27d..bf1e2b4 100644 --- a/chart_review/commands/default.py +++ b/chart_review/commands/default.py @@ -26,7 +26,7 @@ def print_info(args: argparse.Namespace) -> None: notes = reader.note_range[annotator] chart_table.add_row( annotator, - str(len(notes)), + f"{len(notes):,}", console_utils.pretty_note_range(notes), ) diff --git a/chart_review/commands/frequency.py b/chart_review/commands/frequency.py new file mode 100644 index 0000000..e81a5a6 --- /dev/null +++ b/chart_review/commands/frequency.py @@ -0,0 +1,77 @@ +import argparse + +import rich +import rich.box +import rich.table +import rich.text + +from chart_review import cli_utils, console_utils, types + + +def make_subparser(parser: argparse.ArgumentParser) -> None: + cli_utils.add_project_args(parser) + cli_utils.add_output_args(parser) + parser.set_defaults(func=print_frequency) + + +def print_frequency(args: argparse.Namespace) -> None: + """ + Print counts of each text mention. + """ + reader = cli_utils.get_cohort_reader(args) + + frequencies = {} # annotator -> label -> text -> count + all_annotator_frequencies = {} # label -> text -> count + text_labels = {} # text -> labelset (to flag term confusion) + for annotator in reader.annotations.original_text_mentions: + annotator_mentions = reader.annotations.original_text_mentions[annotator] + for labeled_texts in annotator_mentions.values(): + for labeled_text in labeled_texts: + text = (labeled_text.text or "").strip().casefold() + for label in labeled_text.labels: + if label in reader.annotations.labels: + # Count the mention for this annotator + label_to_text = frequencies.setdefault(annotator, {}) + text_to_count = label_to_text.setdefault(label, {}) + text_to_count[text] = text_to_count.get(text, 0) + 1 + + # Count the mention for our running all-annotators total + all_text_to_count = all_annotator_frequencies.setdefault(label, {}) + all_text_to_count[text] = all_text_to_count.get(text, 0) + 1 + + # And finally, add it to our running term-confusion tracker + text_labels.setdefault(text, types.LabelSet()).add(label) + + # Now group up the data into a formatted table + table = cli_utils.create_table("Annotator", "Label", "Mention", "Count") + has_term_confusion = False # whether multiple labels are used for the same text + + # Helper method to add all the info for a single annotator to our table + def add_annotator_to_table(name, label_to_text: dict) -> None: + nonlocal has_term_confusion + table.add_section() + for label in sorted(label_to_text, key=str.casefold): + text_to_count = label_to_text[label] + for text, count in sorted( + text_to_count.items(), key=lambda t: (t[1], t[0]), reverse=True + ): + is_confused = not args.csv and text and len(text_labels[text]) > 1 + if is_confused: + text = rich.text.Text(text + "*", style="bold") + has_term_confusion = True + table.add_row(name, label, text, f"{count:,}") + + # Add each annotator + add_annotator_to_table(rich.text.Text("All", style="italic"), all_annotator_frequencies) + for annotator in sorted(frequencies, key=str.casefold): + add_annotator_to_table(annotator, frequencies[annotator]) + + if args.csv: + cli_utils.print_table_as_csv(table) + else: + rich.get_console().print(table) + console_utils.print_ignored_charts(reader) + if has_term_confusion: + rich.get_console().print( + f" * This text has multiple associated labels.", style="italic" + ) diff --git a/chart_review/commands/labels.py b/chart_review/commands/labels.py index 96a4cd7..1942d7a 100644 --- a/chart_review/commands/labels.py +++ b/chart_review/commands/labels.py @@ -33,7 +33,7 @@ def print_labels(args: argparse.Namespace) -> None: # First add summary entries, for counts across the union of all annotators for name in label_names: - count = str(len(any_annotator_note_sets.get(name, {}))) + count = f"{len(any_annotator_note_sets.get(name, {})):,}" label_table.add_row(rich.text.Text("Any", style="italic"), name, count) # Now do each annotator as their own little boxed section diff --git a/chart_review/commands/mentions.py b/chart_review/commands/mentions.py index 1d8bf9a..49f8c36 100644 --- a/chart_review/commands/mentions.py +++ b/chart_review/commands/mentions.py @@ -5,7 +5,7 @@ import rich.table import rich.text -from chart_review import cli_utils, console_utils, types +from chart_review import cli_utils, console_utils def make_subparser(parser: argparse.ArgumentParser) -> None: @@ -24,12 +24,19 @@ def print_mentions(args: argparse.Namespace) -> None: for annotator in sorted(reader.annotations.original_text_mentions, key=str.casefold): table.add_section() - mentions = reader.annotations.original_text_mentions[annotator] - for note_id, labeled_texts in mentions.items(): - for label_text in labeled_texts: - for label in sorted(label_text.labels, key=str.casefold): + annotator_mentions = reader.annotations.original_text_mentions[annotator] + for note_id, labeled_texts in annotator_mentions.items(): + # Gather all combos of text/label (i.e. all mentions) in this note + note_mentions = set() + for labeled_text in labeled_texts: + text = labeled_text.text and labeled_text.text.casefold() + for label in labeled_text.labels: if label in reader.annotations.labels: - table.add_row(annotator, str(note_id), label_text.text, label) + note_mentions.add((text, label)) + + # Now add each mention to the table + for note_mention in sorted(note_mentions, key=lambda m: (m[0], m[1].casefold())): + table.add_row(annotator, str(note_id), note_mention[0], note_mention[1]) if args.csv: cli_utils.print_table_as_csv(table) diff --git a/chart_review/common.py b/chart_review/common.py index 568805f..f23c542 100644 --- a/chart_review/common.py +++ b/chart_review/common.py @@ -104,17 +104,6 @@ def print_line(heading=None) -> None: ############################################################################### # Helper Functions: enum type smoothing ############################################################################### -def guard_str(object) -> str: - if isinstance(object, Enum): - return str(object.name) - elif isinstance(object, EnumMeta): - return str(object.name) - elif isinstance(object, str): - return object - else: - raise Exception(f"expected str|Enum but got {type(object)}") - - def guard_iter(object) -> Iterable: if isinstance(object, Enum): return guard_iter(object.value) diff --git a/chart_review/term_freq.py b/chart_review/term_freq.py deleted file mode 100644 index 4535130..0000000 --- a/chart_review/term_freq.py +++ /dev/null @@ -1,100 +0,0 @@ -from ctakesclient.typesystem import Span - -from chart_review import types - - -def calc_term_freq(annotations: types.ProjectAnnotations, annotator: str) -> dict: - """ - Calculate the frequency of TERMS highlighted for each LABEL (Cough, Dyspnea, etc). - :param annotations: prepared map of mentions - :param annotator: an annotator name - :return: dict key=TERM val= {label, list of chart_id} - """ - original_text_mentions = annotations.original_text_mentions.get(annotator, {}) - - term_freq = {} - for note_id, text_labels in original_text_mentions.items(): - for text_label in text_labels: - if not text_label.text: - continue - - term = text_label.text.upper() - term_counts = term_freq.setdefault(term, {}) - - for label in text_label.labels: - term_label_counts = term_counts.setdefault(label, []) - term_label_counts.append(note_id) - - return term_freq - - -def calc_term_label_confusion(term_freq: dict) -> dict: - """ - Calculate term mentions that have more than one label associated. - Usually due to user error in LabelStudio. - - @param term_freq: output of 'calc_term_freq' - @return: dict filtered by only confusing TERMs - """ - confusing = dict() - for term in term_freq.keys(): - if len(term_freq[term].keys()) > 1: - confusing[term] = term_freq[term] - return confusing - - -def calc_label_freq(term_freq: dict) -> dict: - unique = dict() - for term in term_freq.keys(): - for label in term_freq[term].keys(): - if label not in unique.keys(): - unique[label] = dict() - if term not in unique[label].keys(): - unique[label][term] = list() - for note_id in term_freq[term][label]: - unique[label][term].append(note_id) - tf = dict() - for label in unique.keys(): - for term in unique[label].keys(): - if label not in tf.keys(): - tf[label] = dict() - tf[label][term] = len(unique[label][term]) - return tf - - -def intersect(span1: Span, span2: Span) -> set: - """ - TODO Refactor to ctakes-client: - https://github.com/Machine-Learning-for-Medical-Language/ctakes-client-py/issues/55 - - Get char text positions where overlaps exist. - - :param span1: 1st text Span - :param span2: 2nd text Span - :return: set of CHAR positions (convertible to range or Span) - """ - range1 = range(span1.begin, span1.end) - range2 = range(span2.begin, span2.end) - return set(range1).intersection(set(range2)) - - -def overlaps(span1: Span, span2: Span, min_length=2, max_length=20) -> bool: - """ - TODO Refactor to ctakes-client: - https://github.com/Machine-Learning-for-Medical-Language/ctakes-client-py/issues/55 - - True/False text overlap exists between two spans of 'highlighted' text. - - :param span1: 1st text Span - :param span2: 2nd text Span - :param min_length: MIN length of comparison, default 2 chars - :param max_length: MAX length of comparison, default 20 chars (or equals) - :return: true/false the two spans overlap - """ - shared = intersect(span1, span2) - if len(shared) == len(range(span1.begin, span1.end)): - return True - elif (len(shared) >= min_length) and (len(shared) <= max_length): - return True - else: - return False diff --git a/docs/frequency.md b/docs/frequency.md new file mode 100644 index 0000000..ea49318 --- /dev/null +++ b/docs/frequency.md @@ -0,0 +1,93 @@ +--- +title: Frequency Command +parent: Chart Review +nav_order: 6 +# audience: lightly technical folks +# type: how-to +--- + +# The Frequency Command + +The `frequency` command prints the number of times a piece of text was labeled +a certain way. + +It prints a combined count for all annotators as well as a per-annotator breakdown. + +Phrases that been labelled different ways are flagged for attention, +because there might be a mistaken label. + +## Example + +```shell +$ chart-review frequency +╭───────────┬──────────┬─────────┬───────╮ +│ Annotator │ Label │ Mention │ Count │ +├───────────┼──────────┼─────────┼───────┤ +│ All │ Cough │ achoo │ 3 │ +│ All │ Cough │ pain* │ 1 │ +│ All │ Fatigue │ sigh* │ 5 │ +│ All │ Fatigue │ sleepy │ 3 │ +│ All │ Fatigue │ ouch* │ 2 │ +│ All │ Headache │ pain* │ 2 │ +│ All │ Headache │ sigh* │ 1 │ +│ All │ Headache │ ouch* │ 1 │ +├───────────┼──────────┼─────────┼───────┤ +│ jane │ Cough │ achoo │ 1 │ +│ jane │ Fatigue │ sigh* │ 2 │ +│ jane │ Fatigue │ sleepy │ 1 │ +│ jane │ Headache │ sigh* │ 1 │ +│ jane │ Headache │ pain* │ 1 │ +├───────────┼──────────┼─────────┼───────┤ +│ jill │ Cough │ pain* │ 1 │ +│ jill │ Cough │ achoo │ 1 │ +│ jill │ Fatigue │ ouch* │ 2 │ +│ jill │ Fatigue │ sleepy │ 1 │ +│ jill │ Fatigue │ sigh* │ 1 │ +├───────────┼──────────┼─────────┼───────┤ +│ john │ Cough │ achoo │ 1 │ +│ john │ Fatigue │ sigh* │ 2 │ +│ john │ Fatigue │ sleepy │ 1 │ +│ john │ Headache │ pain* │ 1 │ +│ john │ Headache │ ouch* │ 1 │ +╰───────────┴──────────┴─────────┴───────╯ + * This text has multiple associated labels. +``` + +## Options + +### --csv + +Print the frequencies in a machine-parseable CSV format. + +#### Examples +```shell +$ chart-review frequency --csv > frequency.csv +``` + +```shell +$ chart-review frequency --csv +annotator,label,mention,count +All,Cough,achoo,3 +All,Cough,pain,1 +All,Fatigue,sigh,5 +All,Fatigue,sleepy,3 +All,Fatigue,ouch,2 +All,Headache,pain,2 +All,Headache,sigh,1 +All,Headache,ouch,1 +jane,Cough,achoo,1 +jane,Fatigue,sigh,2 +jane,Fatigue,sleepy,1 +jane,Headache,sigh,1 +jane,Headache,pain,1 +jill,Cough,pain,1 +jill,Cough,achoo,1 +jill,Fatigue,ouch,2 +jill,Fatigue,sleepy,1 +jill,Fatigue,sigh,1 +john,Cough,achoo,1 +john,Fatigue,sigh,2 +john,Fatigue,sleepy,1 +john,Headache,pain,1 +john,Headache,ouch,1 +``` diff --git a/docs/ids.md b/docs/ids.md index 2c73e1a..8283a3a 100644 --- a/docs/ids.md +++ b/docs/ids.md @@ -1,7 +1,7 @@ --- title: IDs Command parent: Chart Review -nav_order: 6 +nav_order: 7 # audience: lightly technical folks # type: how-to --- diff --git a/docs/labels.md b/docs/labels.md index 13b5f9f..90cd687 100644 --- a/docs/labels.md +++ b/docs/labels.md @@ -1,7 +1,7 @@ --- title: Labels Command parent: Chart Review -nav_order: 7 +nav_order: 8 # audience: lightly technical folks # type: how-to --- diff --git a/docs/mentions.md b/docs/mentions.md index 99369bd..78b9f4b 100644 --- a/docs/mentions.md +++ b/docs/mentions.md @@ -1,7 +1,7 @@ --- title: Mentions Command parent: Chart Review -nav_order: 8 +nav_order: 9 # audience: lightly technical folks # type: how-to --- diff --git a/tests/data/cold/labelstudio-export.json b/tests/data/cold/labelstudio-export.json index aa58cdd..4a8183a 100644 --- a/tests/data/cold/labelstudio-export.json +++ b/tests/data/cold/labelstudio-export.json @@ -39,6 +39,14 @@ ] } }, + { + "value": { + "text": "sigh", + "labels": [ + "Fatigue" + ] + } + }, { "value": { "text": "sigh", @@ -99,6 +107,14 @@ "id": 602, "completed_by": 6, "result": [ + { + "value": { + "text": "ouch", + "labels": [ + "Fatigue" + ] + } + }, { "value": { "text": "ouch", @@ -141,6 +157,14 @@ ] } }, + { + "value": { + "text": "sigh", + "labels": [ + "Fatigue" + ] + } + }, { "value": { "text": "pain", diff --git a/tests/test_frequency.py b/tests/test_frequency.py new file mode 100644 index 0000000..8eef431 --- /dev/null +++ b/tests/test_frequency.py @@ -0,0 +1,195 @@ +"""Tests for commands/frequency.py""" + +import tempfile + +from chart_review import common +from tests import base + + +class TestFrequency(base.TestCase): + """Test case for the top-level frequency code""" + + def test_basic_output(self): + stdout = self.run_cli("frequency", path=f"{self.DATA_DIR}/cold") + + self.assertEqual( + """╭───────────┬──────────┬─────────┬───────╮ +│ Annotator │ Label │ Mention │ Count │ +├───────────┼──────────┼─────────┼───────┤ +│ All │ Cough │ achoo │ 3 │ +│ All │ Cough │ pain* │ 1 │ +│ All │ Fatigue │ sigh* │ 5 │ +│ All │ Fatigue │ sleepy │ 3 │ +│ All │ Fatigue │ ouch* │ 2 │ +│ All │ Headache │ pain* │ 2 │ +│ All │ Headache │ sigh* │ 1 │ +│ All │ Headache │ ouch* │ 1 │ +├───────────┼──────────┼─────────┼───────┤ +│ jane │ Cough │ achoo │ 1 │ +│ jane │ Fatigue │ sigh* │ 2 │ +│ jane │ Fatigue │ sleepy │ 1 │ +│ jane │ Headache │ sigh* │ 1 │ +│ jane │ Headache │ pain* │ 1 │ +├───────────┼──────────┼─────────┼───────┤ +│ jill │ Cough │ pain* │ 1 │ +│ jill │ Cough │ achoo │ 1 │ +│ jill │ Fatigue │ ouch* │ 2 │ +│ jill │ Fatigue │ sleepy │ 1 │ +│ jill │ Fatigue │ sigh* │ 1 │ +├───────────┼──────────┼─────────┼───────┤ +│ john │ Cough │ achoo │ 1 │ +│ john │ Fatigue │ sigh* │ 2 │ +│ john │ Fatigue │ sleepy │ 1 │ +│ john │ Headache │ pain* │ 1 │ +│ john │ Headache │ ouch* │ 1 │ +╰───────────┴──────────┴─────────┴───────╯ + * This text has multiple associated labels. +""", + stdout, + ) + + def test_ignored(self): + """Verify that we show info on ignored notes""" + stdout = self.run_cli("frequency", path=f"{self.DATA_DIR}/ignore") + + # Showing empty mentions felt like the most reasonable approach to the edge case of + # "no text in the annotation" - but also disabling the term confusion warning for + # empty mentions. + self.assertEqual( + """╭───────────┬───────┬─────────┬───────╮ +│ Annotator │ Label │ Mention │ Count │ +├───────────┼───────┼─────────┼───────┤ +│ All │ A │ │ 2 │ +│ All │ B │ │ 2 │ +├───────────┼───────┼─────────┼───────┤ +│ adam │ A │ │ 1 │ +│ adam │ B │ │ 1 │ +├───────────┼───────┼─────────┼───────┤ +│ allison │ A │ │ 1 │ +│ allison │ B │ │ 1 │ +╰───────────┴───────┴─────────┴───────╯ + Ignoring 3 charts (3–5) +""", + stdout, + ) + + def test_external(self): + """Verify that we don't show external annotators""" + stdout = self.run_cli("frequency", path=f"{self.DATA_DIR}/external") + + self.assertEqual( + """╭───────────┬───────┬─────────┬───────╮ +│ Annotator │ Label │ Mention │ Count │ +├───────────┼───────┼─────────┼───────┤ +│ All │ happy │ woo │ 1 │ +│ All │ sad │ sigh │ 1 │ +├───────────┼───────┼─────────┼───────┤ +│ human │ happy │ woo │ 1 │ +│ human │ sad │ sigh │ 1 │ +╰───────────┴───────┴─────────┴───────╯ +""", + stdout, + ) + + def test_unused_labels(self): + """Verify that we don't list mentions for labels that aren't in consideration""" + with tempfile.TemporaryDirectory() as tmpdir: + common.write_json( + f"{tmpdir}/config.json", + { + "annotators": {"chris": 1}, + "labels": ["Valid"], + }, + ) + common.write_json( + f"{tmpdir}/labelstudio-export.json", + [ + { + "id": 1, + "annotations": [ + { + "completed_by": 1, + "result": [ + {"value": {"text": "good", "labels": ["Valid"]}}, + {"value": {"text": "bad", "labels": ["Invalid"]}}, + ], + }, + ], + }, + ], + ) + stdout = self.run_cli("frequency", path=tmpdir) + + self.assertEqual( + """╭───────────┬───────┬─────────┬───────╮ +│ Annotator │ Label │ Mention │ Count │ +├───────────┼───────┼─────────┼───────┤ +│ All │ Valid │ good │ 1 │ +├───────────┼───────┼─────────┼───────┤ +│ chris │ Valid │ good │ 1 │ +╰───────────┴───────┴─────────┴───────╯ +""", + stdout, + ) + + def test_spaces(self): + """Verify that we handle text with surrounding spaces etc""" + with tempfile.TemporaryDirectory() as tmpdir: + common.write_json( + f"{tmpdir}/config.json", + { + "annotators": {"chris": 1}, + "labels": ["LabelA"], + }, + ) + common.write_json( + f"{tmpdir}/labelstudio-export.json", + [ + { + "id": 1, + "annotations": [ + { + "completed_by": 1, + "result": [ + {"value": {"text": "extra SPACES ", "labels": ["LabelA"]}}, + {"value": {"text": "\nextra spaces", "labels": ["LabelA"]}}, + { + "value": { + "text": " Extra Spaces ", + "labels": ["LabelA"], + } + }, + ], + }, + ], + }, + ], + ) + stdout = self.run_cli("frequency", path=tmpdir) + + self.assertEqual( + """╭───────────┬────────┬──────────────┬───────╮ +│ Annotator │ Label │ Mention │ Count │ +├───────────┼────────┼──────────────┼───────┤ +│ All │ LabelA │ extra spaces │ 3 │ +├───────────┼────────┼──────────────┼───────┤ +│ chris │ LabelA │ extra spaces │ 3 │ +╰───────────┴────────┴──────────────┴───────╯ +""", + stdout, + ) + + def test_csv(self): + """Verify that can print in CSV format""" + stdout = self.run_cli("frequency", "--csv", path=f"{self.DATA_DIR}/external") + + self.assertEqual( + [ + "annotator,label,mention,count", + "All,happy,woo,1", + "All,sad,sigh,1", + "human,happy,woo,1", + "human,sad,sigh,1", + ], + stdout.splitlines(), + ) diff --git a/tests/test_mentions.py b/tests/test_mentions.py index 930553b..a02da62 100644 --- a/tests/test_mentions.py +++ b/tests/test_mentions.py @@ -19,20 +19,21 @@ def test_basic_output(self): │ jane │ 1 │ achoo │ Cough │ │ jane │ 1 │ sigh │ Fatigue │ │ jane │ 1 │ sigh │ Headache │ -│ jane │ 4 │ sleepy │ Fatigue │ │ jane │ 4 │ pain │ Headache │ +│ jane │ 4 │ sigh │ Fatigue │ +│ jane │ 4 │ sleepy │ Fatigue │ ├───────────┼──────────┼─────────┼──────────┤ │ jill │ 1 │ achoo │ Cough │ │ jill │ 1 │ sigh │ Fatigue │ │ jill │ 2 │ ouch │ Fatigue │ -│ jill │ 4 │ sleepy │ Fatigue │ │ jill │ 4 │ pain │ Cough │ +│ jill │ 4 │ sleepy │ Fatigue │ ├───────────┼──────────┼─────────┼──────────┤ │ john │ 1 │ achoo │ Cough │ │ john │ 1 │ sigh │ Fatigue │ │ john │ 2 │ ouch │ Headache │ -│ john │ 4 │ sleepy │ Fatigue │ │ john │ 4 │ pain │ Headache │ +│ john │ 4 │ sleepy │ Fatigue │ ╰───────────┴──────────┴─────────┴──────────╯ """, stdout, @@ -67,8 +68,8 @@ def test_external(self): """╭───────────┬──────────┬─────────┬───────╮ │ Annotator │ Chart ID │ Mention │ Label │ ├───────────┼──────────┼─────────┼───────┤ -│ human │ 1 │ woo │ happy │ │ human │ 1 │ sigh │ sad │ +│ human │ 1 │ woo │ happy │ ╰───────────┴──────────┴─────────┴───────╯ """, stdout, @@ -101,9 +102,9 @@ def test_odd_text(self): """╭───────────┬──────────┬────────────┬───────╮ │ Annotator │ Chart ID │ Mention │ Label │ ├───────────┼──────────┼────────────┼───────┤ -│ chris │ 1 │ Cute Li🦁n │ Cat │ -│ chris │ 1 │ Multi │ Cat │ -│ │ │ Line-on │ │ +│ chris │ 1 │ cute li🦁n │ Cat │ +│ chris │ 1 │ multi │ Cat │ +│ │ │ line-on │ │ ╰───────────┴──────────┴────────────┴───────╯ """, stdout, @@ -144,6 +145,49 @@ def test_unused_labels(self): ├───────────┼──────────┼─────────┼───────┤ │ chris │ 1 │ good │ Valid │ ╰───────────┴──────────┴─────────┴───────╯ +""", + stdout, + ) + + def test_duplicate_mention(self): + """Verify that we don't show two copies of the same information""" + with tempfile.TemporaryDirectory() as tmpdir: + common.write_json( + f"{tmpdir}/config.json", + { + "annotators": {"chris": 1}, + "labels": ["LabelA", "LabelB"], + }, + ) + common.write_json( + f"{tmpdir}/labelstudio-export.json", + [ + { + "id": 1, + "annotations": [ + { + "completed_by": 1, + "result": [ + {"value": {"text": "dup", "labels": ["LabelA"]}}, + {"value": {"text": "dup", "labels": ["LabelA"]}}, + {"value": {"text": "new", "labels": ["LabelA"]}}, + {"value": {"text": "new", "labels": ["LabelB"]}}, + ], + }, + ], + }, + ], + ) + stdout = self.run_cli("mentions", path=tmpdir) + + self.assertEqual( + """╭───────────┬──────────┬─────────┬────────╮ +│ Annotator │ Chart ID │ Mention │ Label │ +├───────────┼──────────┼─────────┼────────┤ +│ chris │ 1 │ dup │ LabelA │ +│ chris │ 1 │ new │ LabelA │ +│ chris │ 1 │ new │ LabelB │ +╰───────────┴──────────┴─────────┴────────╯ """, stdout, ) @@ -155,8 +199,8 @@ def test_csv(self): self.assertEqual( [ "annotator,chart_id,mention,label", - "human,1,woo,happy", "human,1,sigh,sad", + "human,1,woo,happy", ], stdout.splitlines(), ) diff --git a/tests/test_term_freq.py b/tests/test_term_freq.py deleted file mode 100644 index ec780b8..0000000 --- a/tests/test_term_freq.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Tests for term_freq.py""" - -from chart_review import term_freq, types -from tests import base - - -class TestMentions(base.TestCase): - """Test case for term frequency calculations""" - - def test_calc_term_freq(self): - annotations = types.ProjectAnnotations( - original_text_mentions={ - "hank": { - 1: [ - types.LabeledText("achoo", {"Cough", "Onomatopoeia"}), - types.LabeledText(None, {"Fever"}), - types.LabeledText("cough", {"Cough"}), - ] - }, - }, - ) - self.assertEqual( - { - "ACHOO": {"Cough": [1], "Onomatopoeia": [1]}, - "COUGH": {"Cough": [1]}, - }, - term_freq.calc_term_freq(annotations, "hank"), - )