diff --git a/chart_review/cohort.py b/chart_review/cohort.py index 06259cb..cf58330 100644 --- a/chart_review/cohort.py +++ b/chart_review/cohort.py @@ -1,5 +1,4 @@ import os -import sys from typing import Iterable, Optional from chart_review.common import guard_str, guard_iter, guard_in diff --git a/chart_review/common.py b/chart_review/common.py index fd0167b..ee041d8 100644 --- a/chart_review/common.py +++ b/chart_review/common.py @@ -2,9 +2,6 @@ from enum import Enum, EnumMeta from typing import Optional from collections.abc import Iterable -import warnings -import functools -import datetime import logging import json diff --git a/chart_review/covid_symptom/__init__.py b/chart_review/covid_symptom/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/chart_review/covid_symptom/config.py b/chart_review/covid_symptom/config.py deleted file mode 100644 index a7e7ce9..0000000 --- a/chart_review/covid_symptom/config.py +++ /dev/null @@ -1,81 +0,0 @@ -from enum import Enum -import ctakesclient -from chart_review import common - -############################################################################### -# COVID Symptom Class Labels -# -# ctakesclient.filesystem.covid_symptoms() - - -CLASS_LABELS = [ - "Congestion or runny nose", - "Cough", - "Diarrhea", - "Dyspnea", - "Fatigue", - "Fever or chills", - "Headache", - "Loss of taste or smell", - "Muscle or body aches", - "Nausea or vomiting", - "Sore throat", -] - -############################################################################### -# Export directory and LabelStudio JSON files - -PROJECT_DIR = "/opt/labelstudio/covid" - -############################################################################### -# -# study-specific LabelStudio "Annotator" users and Labelstudio id "NoteRanges" - - -class Annotator(Enum): - """ - LabelStudio annotator (reviewer) ID - """ - - andy = 2 - amy = 3 - alon = 6 - ctakes = 7 # mcmurry.andy - icd10 = 0 - - -class NoteRange(Enum): - """ - LabelStudio list of ED Notes - """ - - corpus = range(782, 1006) - amy = range(782, 895) - andy = range(895, 1006) - andy_alon = range(979, 1006) - amy_alon = range(864, 891) - alon = set(range(864, 891)).union(set(range(979, 1006))) - icd10_missing = [ - 782, - 791, - 793, - 799, - 811, - 824, - 826, - 828, - 833, - 837, - 859, - 860, - 870, - 877, - 882, - 886, - 921, - 959, - 985, - 986, - 994, - 1004, - ] diff --git a/chart_review/covid_symptom/config.yaml b/chart_review/covid_symptom/config.yaml deleted file mode 100644 index 43158e7..0000000 --- a/chart_review/covid_symptom/config.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# This is a converted version of config.py -# I'm keeping both around for now, as we transition. - -labels: - - Congestion or runny nose - - Cough - - Diarrhea - - Dyspnea - - Fatigue - - Fever or chills - - Headache - - Loss of taste or smell - - Muscle or body aches - - Nausea or vomiting - - Sore throat - -annotators: - andy: 2 - amy: 3 - alon: 6 - ctakes: 7 # mcmurry.andy - icd10: 0 - -ranges: - corpus: 782-1006 - amy: 782-895 - andy: 895-1006 - andy_alon: 979-1006 - amy_alon: 864-891 - alon: [amy_alon, andy_alon] - icd10_missing: [782, 791, 793, 799, 811, 824, 826, 828, 833, 837, 859, 860, 870, 877, 882, 886, 921, 959, 985, 986, 994, 1004] diff --git a/chart_review/covid_symptom/paper.py b/chart_review/covid_symptom/paper.py deleted file mode 100644 index 1a20ad8..0000000 --- a/chart_review/covid_symptom/paper.py +++ /dev/null @@ -1,115 +0,0 @@ -from chart_review.covid_symptom import config -from chart_review.covid_symptom.config import Annotator, NoteRange -from chart_review import agree -from chart_review import cohort -from chart_review import common - - -def table2_accuracy_ctakes(self): - truth = Annotator.amy - annotator = Annotator.andy - - study_cohort = cohort.CohortReader() - - andy = study_cohort.confusion_matrix( - Annotator.andy.name, Annotator.ctakes.name, NoteRange.andy.value - ) - amy = study_cohort.confusion_matrix( - Annotator.amy.name, Annotator.ctakes.name, NoteRange.amy.value - ) - matrix = agree.append_matrix(amy, andy) - table2 = agree.score_matrix(matrix) - - for label in study_cohort.class_labels: - andy = study_cohort.confusion_matrix( - Annotator.andy.name, Annotator.ctakes.name, NoteRange.andy.value, label - ) - amy = study_cohort.confusion_matrix( - Annotator.amy.name, Annotator.ctakes.name, NoteRange.amy.value, label - ) - matrix = agree.append_matrix(amy, andy) - table2[label] = agree.score_matrix(matrix) - - common.write_json(self.getpath(truth, annotator, "table2_ctakes", "json"), table2) - - common.write_text( - self.getpath(truth, annotator, "table2_ctakes", "csv"), - agree.csv_table(table2, study_cohort.class_labels), - ) - - -def table2_accuracy_icd10(self): - truth = Annotator.amy - annotator = Annotator.andy - - andy = self.confusion_matrix(Annotator.andy, Annotator.icd10, NoteRange.andy.value) - amy = self.confusion_matrix(Annotator.amy, Annotator.icd10, NoteRange.amy.value) - matrix = agree.append_matrix(amy, andy) - table2 = agree.score_matrix(matrix) - - common.write_json(self.getpath(truth, annotator, "append_matrix", "json"), matrix) - - for label in self.class_labels: - andy = self.confusion_matrix(Annotator.andy, Annotator.icd10, NoteRange.andy.value, label) - amy = self.confusion_matrix(Annotator.amy, Annotator.icd10, NoteRange.amy.value, label) - matrix = agree.append_matrix(amy, andy) - table2[label] = agree.score_matrix(matrix) - - common.write_json(self.getpath(truth, annotator, "table2_icd10", "json"), table2) - - common.write_text( - self.getpath(truth, annotator, "table2_icd10", "csv"), - agree.csv_table(table2, self.class_labels), - ) - - -def table3_true_prevalence(self): - score = common.read_json(config.path("publish_table2_ctakes.amy.andy.json")) - table3 = [ - "variant_era,covid_symptom,cnt,prevalence_apparent,prevalence_true,sensitivity,specificity" - ] - - with open(config.PREVALENCE_COVID_POS, "r") as f: - for line in f.readlines(): - if not line.startswith("variant_era"): - [variant_era, covid_symptom, cnt, prct] = line.strip().split(",") - prevelance = 0 - if covid_symptom in score.keys(): - sensitivity = score[covid_symptom]["Sens"] - specificity = score[covid_symptom]["Spec"] - prevelance = agree.true_prevalence( - float(prct), float(sensitivity), float(specificity) - ) - table3.append( - f"{variant_era},{covid_symptom},{cnt},{prct}," - f"{prevelance},{sensitivity},{specificity}" - ) - - table3 = "\n".join(table3) + "\n" - filepath = config.path("table3_true_prevalence.csv") - print(filepath) - common.write_text(filepath, table3) - - -def publish_supplement(self): - # Human - self.table(Annotator.amy, Annotator.alon, NoteRange.amy_alon.value, "human") - self.table(Annotator.andy, Annotator.alon, NoteRange.andy_alon.value, "human") - - # NLP - self.score(Annotator.amy, Annotator.ctakes, NoteRange.amy.value, "nlp") - self.table(Annotator.andy, Annotator.ctakes, NoteRange.andy.value, "nlp") - self.table(Annotator.alon, Annotator.ctakes, NoteRange.alon.value, "nlp") - - # ICD10 - icd10_range = set(NoteRange.corpus.value).difference(NoteRange.icd10_missing.value) - icd10_range_andy = icd10_range.intersection(set(NoteRange.andy.value)) - icd10_range_amy = icd10_range.intersection(set(NoteRange.amy.value)) - icd10_range_alon = icd10_range.intersection( - set(list(NoteRange.amy_alon.value) + list(NoteRange.andy_alon.value)) - ) - - self.table(Annotator.amy, Annotator.icd10, icd10_range_amy, "icd10") - self.table(Annotator.andy, Annotator.icd10, icd10_range_andy, "icd10") - self.table(Annotator.alon, Annotator.icd10, icd10_range_alon, "icd10") - self.table(Annotator.ctakes, Annotator.icd10, icd10_range, "icd10") diff --git a/chart_review/external.py b/chart_review/external.py index 945b30f..7a81a9c 100644 --- a/chart_review/external.py +++ b/chart_review/external.py @@ -112,8 +112,8 @@ def merge_external( for row in exported_json: if "docref_mappings" not in row.get("data", {}): sys.exit( - f"Your Label Studio export does not include DocRef/Encounter ID mapping metadata!\n" - f"Consider re-uploading your notes using Cumulus ETL's chart-review command." + "Your Label Studio export does not include DocRef/Encounter ID mapping metadata!\n" + "Consider re-uploading your notes using Cumulus ETL's chart-review command." ) break # just inspect one diff --git a/chart_review/suicide_icd10/__init__.py b/chart_review/suicide_icd10/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/chart_review/suicide_icd10/config.py b/chart_review/suicide_icd10/config.py deleted file mode 100644 index 49452ea..0000000 --- a/chart_review/suicide_icd10/config.py +++ /dev/null @@ -1,41 +0,0 @@ -from enum import Enum - -############################################################################### -# Class labels of suicidality -CLASS_LABELS = ["ideation-present", "action-present", "ideation-past", "action-past"] - -PROJECT_DIR = "/opt/labelstudio/suicide-icd10" - -############################################################################### -# study-specific LabelStudio "Annotator" users and Labelstudio id "NoteRanges" - - -class Annotator(Enum): - """ - LabelStudio annotator (reviewer) ID - """ - - andy = 2 - rena = 3 - alon = 4 - - -class NoteRangeCallibration(Enum): - """ - LabelStudio list of Note IDs - """ - - corpus = range(351, 371) - rena = corpus - andy = corpus - alon = corpus - - -class NoteRangePSMAug14(Enum): - """ - PSM August 14th corpus, 50 notes - """ - - corpus = range(1226, 1277) - rena = corpus - andy = corpus diff --git a/chart_review/suicide_icd10/config.yaml b/chart_review/suicide_icd10/config.yaml deleted file mode 100644 index 4212c57..0000000 --- a/chart_review/suicide_icd10/config.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# This is a converted version of config.py -# I'm keeping both around for now, as we transition. - -labels: - - ideation-present - - action-present - - ideation-past - - action-past - -annotators: - andy: 2 - amy: 3 - alon: 4 - -ranges: - corpus: 1226-1277 - andy: corpus - amy: corpus - alon: corpus diff --git a/tests/test_agree.py b/tests/test_agree.py index 8c62648..6e92f56 100644 --- a/tests/test_agree.py +++ b/tests/test_agree.py @@ -1,7 +1,5 @@ """Tests for agree.py""" -import os -import tempfile import unittest import ddt