-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
71b8d78
commit f8dfef6
Showing
3 changed files
with
111 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Spinal and bulbar muscular atrophy (SBMA) is an inherited motor neuron disease caused by the expansion of a polyglutamine tract within the androgen receptor (AR). SBMA can be caused by this easily. | ||
Keystone plant species such as fig trees are good for the soil. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import sys | ||
from termcolor import colored | ||
|
||
from forte.data.data_pack import DataPack | ||
from forte.data.readers import PlainTextReader | ||
from forte.pipeline import Pipeline | ||
from forte.processors.writers import PackIdJsonPackWriter | ||
|
||
from ft.onto.base_ontology import ( | ||
Token, | ||
) | ||
from fortex.spacy import SpacyProcessor | ||
|
||
from ftx.medical.clinical_ontology import Hyponym, Abbreviation, Phrase | ||
from fortex.health.processors.scispacy_processor import ( | ||
ScispaCyProcessor, | ||
) | ||
|
||
|
||
def main( | ||
input_path: str, # Path to mimic3 data if use_mimic3_reader=True else path to notes directory | ||
output_path: str, # Path to output directory | ||
max_packs: int = -1, # Max number of notes to read from mimic3 dataset. Set to -1 to read all. | ||
use_mimic3_reader: bool = True, # Read from mimic3 dataset or plain text | ||
): | ||
pl = Pipeline[DataPack]() | ||
|
||
if use_mimic3_reader is False: | ||
pl.set_reader(PlainTextReader()) | ||
else: | ||
pl.set_reader(Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs}) | ||
|
||
pl.add( | ||
SpacyProcessor(), | ||
{"processors": ["sentence"], "lang": "en_ner_bionlp13cg_md"}, | ||
) | ||
pl.add( | ||
ICDCodingProcessor(), | ||
{ | ||
"entry_type": "ft.onto.base_ontology.Document", | ||
"attribute_name": "classification", | ||
"multi_class": True, | ||
"model_name": "AkshatSurolia/ICD-10-Code-Prediction", # You can use other ICD predictors here. | ||
"cuda_devices": -1, | ||
}, | ||
) | ||
pl.add( | ||
PackIdJsonPackWriter(), | ||
{ | ||
"output_dir": output_path, | ||
"indent": 2, | ||
"overwrite": True, | ||
"drop_record": True, | ||
"zip_pack": True, | ||
}, | ||
) | ||
|
||
pl.initialize() | ||
|
||
packs = pl.process_dataset(input_path) | ||
for pack in packs: | ||
show_data(pack) | ||
|
||
|
||
def show_data(pack: DataPack): | ||
# The ICD processor predicts ICD code for each article. | ||
# The result is stored as article.icd_code. | ||
# The articles are packed into DataPack. | ||
# Therefore, we first extract articles from DataPack and then get their ICD codes. | ||
|
||
for article in pack.get(MedicalArticle): | ||
article_text = article.text | ||
|
||
# get the ICD code and its coding version | ||
icd_code = article.icd_code | ||
icd_version = article.icd_version | ||
|
||
print(colored("Article:", "red"), article_text, "\n") | ||
print(colored(f"ICD-{icd_version} Code:", "cyan"), icd_code, "\n") | ||
|
||
input(colored("Press ENTER to continue...\n", "green")) | ||
|
||
|
||
# Examples: | ||
# | ||
# Read from MIMIC3: | ||
# python icd_coding.py /path/to/mimiciii/1.4/NOTEEVENTS.csv.gz /path_to_sample_output 1000 True | ||
# | ||
# Read from sample_data: | ||
# python icd_coding.py sample_data/ /path_to_sample_output 1000 False | ||
main(sys.argv[1], sys.argv[2], int(sys.argv[3]), sys.argv[4].lower() == "true") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import spacy | ||
from timexy import Timexy | ||
|
||
nlp = spacy.load("en_core_web_sm") | ||
|
||
# Optionally add config if varying from default values | ||
config = { | ||
"kb_id_type": "timex3", # possible values: 'timex3'(default), 'timestamp' | ||
"label": "timexy", # default: 'timexy' | ||
"overwrite": False, # default: False | ||
} | ||
nlp.add_pipe("timexy", config=config, before="ner") | ||
|
||
doc = nlp( | ||
"Today is the 10.10.2010. I was in Paris for six years. 2 pm 3 days ago" | ||
) | ||
for e in doc.ents: | ||
print(f"{e.text}\t{e.label_}\t{e.kb_id_}") |