asyml · tanyuqian · Mar 4, 2022 · Feb 25, 2022 · Feb 25, 2022 · Feb 25, 2022
diff --git a/examples/mimic_iii/README.md b/examples/mimic_iii/README.md
@@ -10,35 +10,19 @@ In command line, we run
 pip install git+https://[email protected]/asyml/forte-wrappers#egg=forte-wrappers[elastic,spacy]
 ```
 
-## Run indexer
-First, you should start an Elastic Indexer backend.
-
-Second, you can run the following command to parse some files and index them.
+## Run demo
+You can run the following command to parse some files like the MIMIC3 discharge notes.
 ```bash
-python medical_pipeline.py /path/to/mimiciii/1.4/NOTEEVENTS.csv.gz /path_to_sample_output 1000 False
+python medical_pipeline.py /path/to/mimiciii/1.4/NOTEEVENTS.csv.gz /path_to_sample_output 1000 True
 ```
 
-Tha last command line argument is ```singlePack```, it informs the script whether it should run the pipeline on MIMIC3 files located at '/path/to/mimiciii/1.4/NOTEEVENTS.csv.gz' (```False```) or just process a single pack using a single string of discharge note already present in the code (```True```).
-
-Here, we also write out the raw data pack to `/path_to_sample_output`, and only
-index the first 1k notes. Remove the `1000` parameter to index all documents.
-
-After the indexing is done, we are ready with the data processing part. Let's start the GUI.
+Tha last command line argument is ```use_mimic3_data```, it informs the script whether it should run the pipeline on MIMIC3 files located at ```/path/to/mimiciii/1.4/NOTEEVENTS.csv.gz``` (```True```) or just process a single pack using a single text file of discharge notes already present in the code (```False```). 
+The path of the text file has to be provided to run the pipeline with ```use_mimic3_data``` as ```False```.
 
-## Stave 
-First, set up Stave following the instructions.
-
-Second, create an empty project with the [default ontology](https://github.com/asyml/forte/blob/master/forte/ontology_specs/base_ontology.json),
- now record the project id.
-
-Set up the following environment variables:
 ```bash
-export stave_db_path=[path_to_stave]/simple_backend/db.sqlite3
-export url_stub=http://localhost:3000
-export query_result_project_id=[the project id above]
+python medical_pipeline.py sample_data/ /path_to_sample_output 1000 False
 ```
 
-Now, create another project with default ontology.
-
-Upload the `*.json` file (you can find it in the directory of the README) to the project.
+Here, we also write out the raw data pack to `/path_to_sample_output`, and only
+process the first 1000 notes. Remove the `1000` parameter to index all documents.
 
diff --git a/examples/mimic_iii/config.yml b/examples/mimic_iii/config.yml
@@ -0,0 +1,6 @@
+Spacy:
+  processors: ["sentence", "tokenize", "pos", "ner", "umls_link"]
+  lang: "en_ner_bionlp13cg_md"
+
+Negation:
+  negation_rules_path: ""
diff --git a/examples/mimic_iii/medical_pipeline.py b/examples/mimic_iii/medical_pipeline.py
@@ -1,39 +1,45 @@
 import sys
+import yaml
 from termcolor import colored
 
+from forte.common.configuration import Config
 from forte.data.data_pack import DataPack
-from forte.data.readers import StringReader
+from forte.data.readers import PlainTextReader
 from forte.pipeline import Pipeline
 from forte.processors.writers import PackIdJsonPackWriter
-from ftx.medical.clinical import MedicalEntityMention
+from ftx.medical.clinical_ontology import NegationContext, MedicalEntityMention
 
 from ft.onto.base_ontology import (
     Token,
     Sentence,
     EntityMention,
 )
 from fortex.spacy import SpacyProcessor
-from fortex.elastic import ElasticSearchPackIndexProcessor
 
 from forte_medical.readers.mimic3_note_reader import Mimic3DischargeNoteReader
+from forte_medical.processors.negation_context_analyzer import (
+    NegationContextAnalyzer,
+)
 
 
-def main(input_path: str, output_path: str, max_packs: int = -1, singlePack: bool = True):
+def main(
+    input_path: str,
+    output_path: str,
+    max_packs: int = -1,
+    use_mimic3_reader: bool = True,
+):
     pl = Pipeline[DataPack]()
 
-    if singlePack is True:
-        pl.set_reader(StringReader())
+    if use_mimic3_reader is False:
+        pl.set_reader(PlainTextReader())
     else:
         pl.set_reader(
             Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs}
         )
-    configSpacy = {
-        "processors": ["sentence", "tokenize", "pos", "ner", "umls_link"],
-        "lang": "en_ner_bionlp13cg_md",
-    }
 
-    pl.add(SpacyProcessor(), configSpacy)
-    pl.add(ElasticSearchPackIndexProcessor())
+    config = Config(yaml.safe_load(open("config.yml", "r")), None)
+    pl.add(SpacyProcessor(), config.Spacy)
+    pl.add(NegationContextAnalyzer(), config.Negation)
 
     pl.add(
         PackIdJsonPackWriter(),
@@ -48,26 +54,9 @@ def main(input_path: str, output_path: str, max_packs: int = -1, singlePack: boo
 
     pl.initialize()
 
-    text = (
-        "Dr. Amanda, "
-        "Medical Nutrition Therapy for Hyperlipidemia. "
-        "Referral from: Julie Tester, RD, LD, CNSD "
-        "Diet: General "
-        "Daily Calorie needs (kcals): 1500 calories, assessed as HB + 20 for activity. "
-        "Daily Protein needs: 40 grams, assessed as 1.0 g/kg. "
-        "Pt has been on a 3-day calorie count and has had an average intake of 1100 calories. "
-        "She was instructed to drink 2-3 cans of liquid supplement to help promote weight gain. "
-        "She agrees with the plan and has my number for further assessment. May want a Resting "
-        "Metabolic Rate as well. She takes an aspirin a day for knee pain."
-    )
-
-    if singlePack is True:
-        pack = pl.process(text)
+    packs = pl.process_dataset(input_path)
+    for pack in packs:
         showData(pack)
-    else:
-        packs = pl.process_dataset(input_path)
-        for pack in packs:
-            showData(pack)
 
 
 def showData(pack: DataPack):
@@ -88,11 +77,25 @@ def showData(pack: DataPack):
             for ent in entity.umls_entities:
                 medical_entities.append(ent)
 
+        negation_contexts = [
+            (negation_context.text, negation_context.polarity)
+            for negation_context in pack.get(NegationContext, sentence)
+        ]
+
         print(colored("Tokens:", "red"), tokens, "\n")
-        print(colored("EntityMentions:", "red"), entities, "\n")
-        print(colored("Medical Entity Mentions:", "cyan"), medical_entities, "\n")
+        print(colored("Entity Mentions:", "red"), entities, "\n")
+        print(
+            colored("UMLS Entity Mentions detected:", "cyan"),
+            medical_entities,
+            "\n",
+        )
+        print(
+            colored("Entity Negation Contexts:", "cyan"),
+            negation_contexts,
+            "\n",
+        )
 
         input(colored("Press ENTER to continue...\n", "green"))
 
 
-main(sys.argv[1], sys.argv[2], int(sys.argv[3]), sys.argv[4])
+main(sys.argv[1], sys.argv[2], int(sys.argv[3]), sys.argv[4].lower() == "true")
diff --git a/examples/mimic_iii/sample_data/notes.txt b/examples/mimic_iii/sample_data/notes.txt
@@ -0,0 +1,6 @@
+ADDENDUM:
+RADIOLOGIC STUDIES: Radiologic studies also included a chest CT, which confirmed cavitary lesions in the left lung apex consistent with infectious process/tuberculosis.
+This also moderate-sized left pleural effusion.
+HEAD CT: Head CT showed no intracranial hemorrhage and no mass effect, but old infarction consistent with past medical history.
+ABDOMINAL CT:  Abdominal CT showed no lesions of T10 and sacrum most likely secondary to steoporosis.
+These can be followed by repeat imaging as an outpatient. 
diff --git a/forte_medical/ontology_specs/clinical_ontology.json b/forte_medical/ontology_specs/clinical_ontology.json
@@ -308,6 +308,66 @@
         "entry_name": "ftx.medical.clinical_ontology.DrugLookupWindowAnnotation",
         "parent_entry": "forte.data.ontology.top.Annotation",
         "description": "Similar to LookupWindowAnnotation however, these annotations are restricted to the segments/sections specified in the parameter - sectionOverrideSet - in DrugCNP2LookupWindow"
+      },
+      {
+        "entry_name": "ftx.medical.clinical_ontology.NegationContext",
+        "parent_entry": "forte.data.ontology.top.Annotation",
+        "description": "A span based annotation `NegationContext`, used to represent the negation context of a named entity.",
+        "attributes": [
+          {
+            "name": "polarity",
+            "type": "bool"
+          }
+        ]
+      },
+      {
+        "entry_name": "ftx.medical.clinical_ontology.UMLSConceptLink",
+        "parent_entry": "forte.data.ontology.top.Generics",
+        "description": "A umls concept entity, used to represent basic information of a umls concept",
+        "attributes": [
+          {
+              "name": "cui",
+              "type": "str"
+          },
+          {
+              "name": "name",
+              "type": "str"
+          },
+          {
+              "name": "definition",
+              "type": "str"
+          },
+          {
+              "name": "tuis",
+              "type": "List",
+              "item_type": "str"
+          },
+          {
+              "name": "aliases",
+              "type": "List",
+              "item_type": "str"
+          },
+          {
+              "name": "score",
+              "type": "str"
+          }
+        ]
+      },
+      {
+        "entry_name": "ftx.medical.clinical_ontology.MedicalEntityMention",
+        "parent_entry": "ft.onto.base_ontology.EntityMention",
+        "description": "A span based annotation class MedicalEntityMention, used to represent an Entity Mention in medical domain",
+        "attributes": [
+          {
+              "name": "umls_link",
+              "type": "str"
+          },
+          {
+              "name": "umls_entities",
+              "type": "List",
+              "item_type": "ftx.medical.clinical_ontology.UMLSConceptLink"
+          }
+        ]
       }
     ]
   }