From 1e0a20eb54e43ca9a88e23319e3586bd16553ff2 Mon Sep 17 00:00:00 2001 From: nikhilranjan7 Date: Wed, 19 Oct 2022 14:00:57 +0400 Subject: [PATCH 01/12] start example --- .../sample_data/notes.txt | 2 + .../medical_text_understanding/scispacy.py | 91 +++++++++++++++++++ fortex/health/processors/test.py | 18 ++++ 3 files changed, 111 insertions(+) create mode 100644 examples/medical_text_understanding/sample_data/notes.txt create mode 100644 examples/medical_text_understanding/scispacy.py create mode 100644 fortex/health/processors/test.py diff --git a/examples/medical_text_understanding/sample_data/notes.txt b/examples/medical_text_understanding/sample_data/notes.txt new file mode 100644 index 00000000..96ff5feb --- /dev/null +++ b/examples/medical_text_understanding/sample_data/notes.txt @@ -0,0 +1,2 @@ +Spinal and bulbar muscular atrophy (SBMA) is an inherited motor neuron disease caused by the expansion of a polyglutamine tract within the androgen receptor (AR). SBMA can be caused by this easily. +Keystone plant species such as fig trees are good for the soil. \ No newline at end of file diff --git a/examples/medical_text_understanding/scispacy.py b/examples/medical_text_understanding/scispacy.py new file mode 100644 index 00000000..f503b60e --- /dev/null +++ b/examples/medical_text_understanding/scispacy.py @@ -0,0 +1,91 @@ +import sys +from termcolor import colored + +from forte.data.data_pack import DataPack +from forte.data.readers import PlainTextReader +from forte.pipeline import Pipeline +from forte.processors.writers import PackIdJsonPackWriter + +from ft.onto.base_ontology import ( + Token, +) +from fortex.spacy import SpacyProcessor + +from ftx.medical.clinical_ontology import Hyponym, Abbreviation, Phrase +from fortex.health.processors.scispacy_processor import ( + ScispaCyProcessor, +) + + +def main( + input_path: str, # Path to mimic3 data if use_mimic3_reader=True else path to notes directory + output_path: str, # Path to output directory + max_packs: int = -1, # Max number of notes to read from mimic3 dataset. Set to -1 to read all. + use_mimic3_reader: bool = True, # Read from mimic3 dataset or plain text +): + pl = Pipeline[DataPack]() + + if use_mimic3_reader is False: + pl.set_reader(PlainTextReader()) + else: + pl.set_reader(Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs}) + + pl.add( + SpacyProcessor(), + {"processors": ["sentence"], "lang": "en_ner_bionlp13cg_md"}, + ) + pl.add( + ICDCodingProcessor(), + { + "entry_type": "ft.onto.base_ontology.Document", + "attribute_name": "classification", + "multi_class": True, + "model_name": "AkshatSurolia/ICD-10-Code-Prediction", # You can use other ICD predictors here. + "cuda_devices": -1, + }, + ) + pl.add( + PackIdJsonPackWriter(), + { + "output_dir": output_path, + "indent": 2, + "overwrite": True, + "drop_record": True, + "zip_pack": True, + }, + ) + + pl.initialize() + + packs = pl.process_dataset(input_path) + for pack in packs: + show_data(pack) + + +def show_data(pack: DataPack): + # The ICD processor predicts ICD code for each article. + # The result is stored as article.icd_code. + # The articles are packed into DataPack. + # Therefore, we first extract articles from DataPack and then get their ICD codes. + + for article in pack.get(MedicalArticle): + article_text = article.text + + # get the ICD code and its coding version + icd_code = article.icd_code + icd_version = article.icd_version + + print(colored("Article:", "red"), article_text, "\n") + print(colored(f"ICD-{icd_version} Code:", "cyan"), icd_code, "\n") + + input(colored("Press ENTER to continue...\n", "green")) + + +# Examples: +# +# Read from MIMIC3: +# python icd_coding.py /path/to/mimiciii/1.4/NOTEEVENTS.csv.gz /path_to_sample_output 1000 True +# +# Read from sample_data: +# python icd_coding.py sample_data/ /path_to_sample_output 1000 False +main(sys.argv[1], sys.argv[2], int(sys.argv[3]), sys.argv[4].lower() == "true") diff --git a/fortex/health/processors/test.py b/fortex/health/processors/test.py new file mode 100644 index 00000000..d9fd9f86 --- /dev/null +++ b/fortex/health/processors/test.py @@ -0,0 +1,18 @@ +import spacy +from timexy import Timexy + +nlp = spacy.load("en_core_web_sm") + +# Optionally add config if varying from default values +config = { + "kb_id_type": "timex3", # possible values: 'timex3'(default), 'timestamp' + "label": "timexy", # default: 'timexy' + "overwrite": False, # default: False +} +nlp.add_pipe("timexy", config=config, before="ner") + +doc = nlp( + "Today is the 10.10.2010. I was in Paris for six years. 2 pm 3 days ago" +) +for e in doc.ents: + print(f"{e.text}\t{e.label_}\t{e.kb_id_}") From ff8f48ba2c750e4169eec68bec9d4773aebca56a Mon Sep 17 00:00:00 2001 From: nikhilranjan7 Date: Mon, 7 Nov 2022 16:13:42 +0400 Subject: [PATCH 02/12] Add scispacy processor to example --- ...spacy.py => medical_text_understanding.py} | 66 ++++++++++--------- .../sample_data/notes.txt | 2 +- 2 files changed, 35 insertions(+), 33 deletions(-) rename examples/medical_text_understanding/{scispacy.py => medical_text_understanding.py} (56%) diff --git a/examples/medical_text_understanding/scispacy.py b/examples/medical_text_understanding/medical_text_understanding.py similarity index 56% rename from examples/medical_text_understanding/scispacy.py rename to examples/medical_text_understanding/medical_text_understanding.py index f503b60e..4e6a817e 100644 --- a/examples/medical_text_understanding/scispacy.py +++ b/examples/medical_text_understanding/medical_text_understanding.py @@ -9,57 +9,58 @@ from ft.onto.base_ontology import ( Token, ) -from fortex.spacy import SpacyProcessor from ftx.medical.clinical_ontology import Hyponym, Abbreviation, Phrase -from fortex.health.processors.scispacy_processor import ( +# from fortex.health.processors.scispacy_processor import ( +# ScispaCyProcessor, +# ) + +import sys +sys.path.append("/Users/nikhil.ranjan/Desktop/ForteHealth/fortex/health/processors") +from scispacy_processor import ( ScispaCyProcessor, ) - def main( input_path: str, # Path to mimic3 data if use_mimic3_reader=True else path to notes directory output_path: str, # Path to output directory - max_packs: int = -1, # Max number of notes to read from mimic3 dataset. Set to -1 to read all. - use_mimic3_reader: bool = True, # Read from mimic3 dataset or plain text ): pl = Pipeline[DataPack]() + pl.set_reader(PlainTextReader()) - if use_mimic3_reader is False: - pl.set_reader(PlainTextReader()) - else: - pl.set_reader(Mimic3DischargeNoteReader(), config={"max_num_notes": max_packs}) - - pl.add( - SpacyProcessor(), - {"processors": ["sentence"], "lang": "en_ner_bionlp13cg_md"}, - ) + # pl.add( + # SpacyProcessor(), + # {"processors": ["sentence"], "lang": "en_core_sci_sm"}, + # ) pl.add( - ICDCodingProcessor(), + ScispaCyProcessor(), { "entry_type": "ft.onto.base_ontology.Document", - "attribute_name": "classification", - "multi_class": True, - "model_name": "AkshatSurolia/ICD-10-Code-Prediction", # You can use other ICD predictors here. - "cuda_devices": -1, - }, - ) - pl.add( - PackIdJsonPackWriter(), - { - "output_dir": output_path, - "indent": 2, - "overwrite": True, - "drop_record": True, - "zip_pack": True, + "model_name": "en_core_sci_sm", + "pipe_name": "abbreviation_detector", + "prefer_gpu": True, + "require_gpu": False, + "gpu_id": 0, }, ) + # pl.add( + # PackIdJsonPackWriter(), + # { + # "output_dir": output_path, + # "indent": 2, + # "overwrite": True, + # "drop_record": True, + # "zip_pack": True, + # }, + # ) pl.initialize() packs = pl.process_dataset(input_path) + for pack in packs: - show_data(pack) + for idx, abv_item in enumerate(pack.get(Abbreviation)): + print(abv_item.long_form.text) def show_data(pack: DataPack): @@ -87,5 +88,6 @@ def show_data(pack: DataPack): # python icd_coding.py /path/to/mimiciii/1.4/NOTEEVENTS.csv.gz /path_to_sample_output 1000 True # # Read from sample_data: -# python icd_coding.py sample_data/ /path_to_sample_output 1000 False -main(sys.argv[1], sys.argv[2], int(sys.argv[3]), sys.argv[4].lower() == "true") +# python medical_text_understanding.py sample_data/ /path_to_sample_output 1000 False +# python medical_text_understanding.py sample_data/ . +main(sys.argv[1], sys.argv[2]) diff --git a/examples/medical_text_understanding/sample_data/notes.txt b/examples/medical_text_understanding/sample_data/notes.txt index 96ff5feb..6c7cd6df 100644 --- a/examples/medical_text_understanding/sample_data/notes.txt +++ b/examples/medical_text_understanding/sample_data/notes.txt @@ -1,2 +1,2 @@ Spinal and bulbar muscular atrophy (SBMA) is an inherited motor neuron disease caused by the expansion of a polyglutamine tract within the androgen receptor (AR). SBMA can be caused by this easily. -Keystone plant species such as fig trees are good for the soil. \ No newline at end of file +Keystone Alpha Deca Docile (ADD) plant species such as fig trees are good for the soil. MOM. \ No newline at end of file From a3f951d689daf1f77b06f76e64516a0972c24d5e Mon Sep 17 00:00:00 2001 From: nikhilranjan7 Date: Tue, 8 Nov 2022 12:21:25 +0400 Subject: [PATCH 03/12] add abbreviation detection --- .../medical_text_understanding.py | 49 ++----------------- .../sample_data/notes.txt | 3 +- 2 files changed, 5 insertions(+), 47 deletions(-) diff --git a/examples/medical_text_understanding/medical_text_understanding.py b/examples/medical_text_understanding/medical_text_understanding.py index 4e6a817e..36876f1e 100644 --- a/examples/medical_text_understanding/medical_text_understanding.py +++ b/examples/medical_text_understanding/medical_text_understanding.py @@ -22,16 +22,10 @@ ) def main( - input_path: str, # Path to mimic3 data if use_mimic3_reader=True else path to notes directory - output_path: str, # Path to output directory + input_path: str, ): pl = Pipeline[DataPack]() pl.set_reader(PlainTextReader()) - - # pl.add( - # SpacyProcessor(), - # {"processors": ["sentence"], "lang": "en_core_sci_sm"}, - # ) pl.add( ScispaCyProcessor(), { @@ -43,16 +37,6 @@ def main( "gpu_id": 0, }, ) - # pl.add( - # PackIdJsonPackWriter(), - # { - # "output_dir": output_path, - # "indent": 2, - # "overwrite": True, - # "drop_record": True, - # "zip_pack": True, - # }, - # ) pl.initialize() @@ -60,34 +44,9 @@ def main( for pack in packs: for idx, abv_item in enumerate(pack.get(Abbreviation)): - print(abv_item.long_form.text) - - -def show_data(pack: DataPack): - # The ICD processor predicts ICD code for each article. - # The result is stored as article.icd_code. - # The articles are packed into DataPack. - # Therefore, we first extract articles from DataPack and then get their ICD codes. - - for article in pack.get(MedicalArticle): - article_text = article.text - - # get the ICD code and its coding version - icd_code = article.icd_code - icd_version = article.icd_version - - print(colored("Article:", "red"), article_text, "\n") - print(colored(f"ICD-{icd_version} Code:", "cyan"), icd_code, "\n") - - input(colored("Press ENTER to continue...\n", "green")) - + print(colored("Long form text:", "red"), colored( abv_item.long_form.text, "blue"), "\n") # Examples: -# -# Read from MIMIC3: -# python icd_coding.py /path/to/mimiciii/1.4/NOTEEVENTS.csv.gz /path_to_sample_output 1000 True -# # Read from sample_data: -# python medical_text_understanding.py sample_data/ /path_to_sample_output 1000 False -# python medical_text_understanding.py sample_data/ . -main(sys.argv[1], sys.argv[2]) +# python medical_text_understanding.py sample_data/ +main(sys.argv[1]) diff --git a/examples/medical_text_understanding/sample_data/notes.txt b/examples/medical_text_understanding/sample_data/notes.txt index 6c7cd6df..abef4a83 100644 --- a/examples/medical_text_understanding/sample_data/notes.txt +++ b/examples/medical_text_understanding/sample_data/notes.txt @@ -1,2 +1 @@ -Spinal and bulbar muscular atrophy (SBMA) is an inherited motor neuron disease caused by the expansion of a polyglutamine tract within the androgen receptor (AR). SBMA can be caused by this easily. -Keystone Alpha Deca Docile (ADD) plant species such as fig trees are good for the soil. MOM. \ No newline at end of file +Spinal and bulbar muscular atrophy (SBMA) is an inherited motor neuron disease caused by the expansion of a polyglutamine tract within the androgen receptor (AR). SBMA can be caused by this easily. \ No newline at end of file From cf02ab5009b1ab8142f2b9295b4fc915f8fbf1eb Mon Sep 17 00:00:00 2001 From: nikhilranjan7 Date: Tue, 8 Nov 2022 16:22:41 +0400 Subject: [PATCH 04/12] Add hyponym example code --- .../medical_text_understanding.py | 61 +++++++++++++------ .../sample_data/notes.txt | 3 +- .../health/processors/scispacy_processor.py | 3 + 3 files changed, 49 insertions(+), 18 deletions(-) diff --git a/examples/medical_text_understanding/medical_text_understanding.py b/examples/medical_text_understanding/medical_text_understanding.py index 36876f1e..8abc9f2d 100644 --- a/examples/medical_text_understanding/medical_text_understanding.py +++ b/examples/medical_text_understanding/medical_text_understanding.py @@ -10,7 +10,8 @@ Token, ) -from ftx.medical.clinical_ontology import Hyponym, Abbreviation, Phrase +from ftx.medical.clinical_ontology import Hyponym, Abbreviation + # from fortex.health.processors.scispacy_processor import ( # ScispaCyProcessor, # ) @@ -23,30 +24,56 @@ def main( input_path: str, + process: str, ): pl = Pipeline[DataPack]() pl.set_reader(PlainTextReader()) - pl.add( - ScispaCyProcessor(), - { - "entry_type": "ft.onto.base_ontology.Document", - "model_name": "en_core_sci_sm", - "pipe_name": "abbreviation_detector", - "prefer_gpu": True, - "require_gpu": False, - "gpu_id": 0, - }, - ) + if process == "abb": + pl.add( + ScispaCyProcessor(), + { + "entry_type": "ft.onto.base_ontology.Document", + "model_name": "en_core_sci_sm", + "pipe_name": "abbreviation_detector", + "prefer_gpu": True, + "require_gpu": False, + "gpu_id": 0, + }, + ) + elif process == "hyp": + pl.add( + ScispaCyProcessor(), + { + "entry_type": "ft.onto.base_ontology.Document", + "model_name": "en_core_sci_sm", + "pipe_name": "hyponym_detector", + "prefer_gpu": True, + "require_gpu": False, + "gpu_id": 0, + }, + ) pl.initialize() packs = pl.process_dataset(input_path) for pack in packs: - for idx, abv_item in enumerate(pack.get(Abbreviation)): - print(colored("Long form text:", "red"), colored( abv_item.long_form.text, "blue"), "\n") + if process == "abb": + for idx, abv_item in enumerate(pack.get(Abbreviation)): + print(colored("Long form text:", "red"), colored( abv_item.long_form.text, "blue"), "\n") + elif process == "hyp": + for idx, detected in enumerate(pack.get(Hyponym)): + print("hyponym_link:", detected.hyponym_link) + print("hyponym_parent:", detected.parent) + print("hyponym_child:", detected.child) + # Examples: -# Read from sample_data: -# python medical_text_understanding.py sample_data/ -main(sys.argv[1]) +''' +Read from sample_data: +Abbreviation: + python medical_text_understanding.py sample_data/ abb +Hyponym detection: + python medical_text_understanding.py sample_data/ hyp +''' +main(sys.argv[1], sys.argv[2]) diff --git a/examples/medical_text_understanding/sample_data/notes.txt b/examples/medical_text_understanding/sample_data/notes.txt index abef4a83..96ff5feb 100644 --- a/examples/medical_text_understanding/sample_data/notes.txt +++ b/examples/medical_text_understanding/sample_data/notes.txt @@ -1 +1,2 @@ -Spinal and bulbar muscular atrophy (SBMA) is an inherited motor neuron disease caused by the expansion of a polyglutamine tract within the androgen receptor (AR). SBMA can be caused by this easily. \ No newline at end of file +Spinal and bulbar muscular atrophy (SBMA) is an inherited motor neuron disease caused by the expansion of a polyglutamine tract within the androgen receptor (AR). SBMA can be caused by this easily. +Keystone plant species such as fig trees are good for the soil. \ No newline at end of file diff --git a/fortex/health/processors/scispacy_processor.py b/fortex/health/processors/scispacy_processor.py index daa37073..34e6aa0b 100644 --- a/fortex/health/processors/scispacy_processor.py +++ b/fortex/health/processors/scispacy_processor.py @@ -110,11 +110,14 @@ def _process(self, input_pack: DataPack): ) = self.find_index( input_pack.text, item[2].start, item[2].end ) + print(general_concept.text) + print(item[2]) specific_concept = Phrase( pack=input_pack, begin=specific_concept_start, end=specific_concept_end, ) + #print(specific_concept.text) hlink = Hyponym( pack=input_pack, parent=general_concept, From ec04e1b3ee6a52eff0fe07b27b75ba878afc6590 Mon Sep 17 00:00:00 2001 From: nikhilranjan7 Date: Thu, 8 Dec 2022 15:41:41 +0400 Subject: [PATCH 05/12] Add abbreviation --- .../medical_text_understanding/medical_text_understanding.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/medical_text_understanding/medical_text_understanding.py b/examples/medical_text_understanding/medical_text_understanding.py index 8abc9f2d..594c834c 100644 --- a/examples/medical_text_understanding/medical_text_understanding.py +++ b/examples/medical_text_understanding/medical_text_understanding.py @@ -60,7 +60,8 @@ def main( for pack in packs: if process == "abb": for idx, abv_item in enumerate(pack.get(Abbreviation)): - print(colored("Long form text:", "red"), colored( abv_item.long_form.text, "blue"), "\n") + print(colored("Abbreviation:", "blue"), colored( abv_item.text, "green"), "\n") + print(colored("Long form text:", "blue"), colored( abv_item.long_form.text, "green"), "\n") elif process == "hyp": for idx, detected in enumerate(pack.get(Hyponym)): From c1577c7d413bb2a72f4b7f12d6567ebf2af741f1 Mon Sep 17 00:00:00 2001 From: nikhilranjan7 Date: Thu, 8 Dec 2022 15:46:43 +0400 Subject: [PATCH 06/12] fix old commits --- fortex/health/processors/scispacy_processor.py | 3 --- fortex/health/processors/test.py | 18 ------------------ 2 files changed, 21 deletions(-) delete mode 100644 fortex/health/processors/test.py diff --git a/fortex/health/processors/scispacy_processor.py b/fortex/health/processors/scispacy_processor.py index 34e6aa0b..daa37073 100644 --- a/fortex/health/processors/scispacy_processor.py +++ b/fortex/health/processors/scispacy_processor.py @@ -110,14 +110,11 @@ def _process(self, input_pack: DataPack): ) = self.find_index( input_pack.text, item[2].start, item[2].end ) - print(general_concept.text) - print(item[2]) specific_concept = Phrase( pack=input_pack, begin=specific_concept_start, end=specific_concept_end, ) - #print(specific_concept.text) hlink = Hyponym( pack=input_pack, parent=general_concept, diff --git a/fortex/health/processors/test.py b/fortex/health/processors/test.py deleted file mode 100644 index d9fd9f86..00000000 --- a/fortex/health/processors/test.py +++ /dev/null @@ -1,18 +0,0 @@ -import spacy -from timexy import Timexy - -nlp = spacy.load("en_core_web_sm") - -# Optionally add config if varying from default values -config = { - "kb_id_type": "timex3", # possible values: 'timex3'(default), 'timestamp' - "label": "timexy", # default: 'timexy' - "overwrite": False, # default: False -} -nlp.add_pipe("timexy", config=config, before="ner") - -doc = nlp( - "Today is the 10.10.2010. I was in Paris for six years. 2 pm 3 days ago" -) -for e in doc.ents: - print(f"{e.text}\t{e.label_}\t{e.kb_id_}") From 39edf56fb76996f99e58559d84b766eea787ff1e Mon Sep 17 00:00:00 2001 From: nikhilranjan7 Date: Thu, 8 Dec 2022 16:00:32 +0400 Subject: [PATCH 07/12] Add medical text understanding example --- .../medical_text_understanding.py | 24 +++++++------------ .../sample_data/notes.txt | 2 -- .../sample_data_abb/abb.txt | 2 ++ .../sample_data_hyp/hyp.txt | 1 + 4 files changed, 12 insertions(+), 17 deletions(-) delete mode 100644 examples/medical_text_understanding/sample_data/notes.txt create mode 100644 examples/medical_text_understanding/sample_data_abb/abb.txt create mode 100644 examples/medical_text_understanding/sample_data_hyp/hyp.txt diff --git a/examples/medical_text_understanding/medical_text_understanding.py b/examples/medical_text_understanding/medical_text_understanding.py index 594c834c..52ce2731 100644 --- a/examples/medical_text_understanding/medical_text_understanding.py +++ b/examples/medical_text_understanding/medical_text_understanding.py @@ -11,14 +11,7 @@ ) from ftx.medical.clinical_ontology import Hyponym, Abbreviation - -# from fortex.health.processors.scispacy_processor import ( -# ScispaCyProcessor, -# ) - -import sys -sys.path.append("/Users/nikhil.ranjan/Desktop/ForteHealth/fortex/health/processors") -from scispacy_processor import ( +from fortex.health.processors.scispacy_processor import ( ScispaCyProcessor, ) @@ -59,22 +52,23 @@ def main( for pack in packs: if process == "abb": + print(colored("Input Sentence:", "blue"), colored( pack.text, "red")) for idx, abv_item in enumerate(pack.get(Abbreviation)): - print(colored("Abbreviation:", "blue"), colored( abv_item.text, "green"), "\n") + print(colored("Abbreviation:", "blue"), colored( abv_item.text, "green")) print(colored("Long form text:", "blue"), colored( abv_item.long_form.text, "green"), "\n") elif process == "hyp": + print(colored("Input Sentence:", "blue"), colored( pack.text, "red")) for idx, detected in enumerate(pack.get(Hyponym)): - print("hyponym_link:", detected.hyponym_link) - print("hyponym_parent:", detected.parent) - print("hyponym_child:", detected.child) - + print(colored("hyponym_link:", "blue"), colored(detected.hyponym_link, "green")) + print(colored("hypernym:", "blue"), colored(detected.general, "green")) + print(colored("hyponym:", "blue"), colored(detected.specific, "green")) # Examples: ''' Read from sample_data: Abbreviation: - python medical_text_understanding.py sample_data/ abb + python medical_text_understanding.py sample_data_abb/ abb Hyponym detection: - python medical_text_understanding.py sample_data/ hyp + python medical_text_understanding.py sample_data_hyp/ hyp ''' main(sys.argv[1], sys.argv[2]) diff --git a/examples/medical_text_understanding/sample_data/notes.txt b/examples/medical_text_understanding/sample_data/notes.txt deleted file mode 100644 index 96ff5feb..00000000 --- a/examples/medical_text_understanding/sample_data/notes.txt +++ /dev/null @@ -1,2 +0,0 @@ -Spinal and bulbar muscular atrophy (SBMA) is an inherited motor neuron disease caused by the expansion of a polyglutamine tract within the androgen receptor (AR). SBMA can be caused by this easily. -Keystone plant species such as fig trees are good for the soil. \ No newline at end of file diff --git a/examples/medical_text_understanding/sample_data_abb/abb.txt b/examples/medical_text_understanding/sample_data_abb/abb.txt new file mode 100644 index 00000000..0f8600f5 --- /dev/null +++ b/examples/medical_text_understanding/sample_data_abb/abb.txt @@ -0,0 +1,2 @@ +Spinal and bulbar muscular atrophy (SBMA) is an inherited motor neuron disease caused by the expansion of a polyglutamine tract within the androgen +receptor (AR). SBMA can be caused by this easily. diff --git a/examples/medical_text_understanding/sample_data_hyp/hyp.txt b/examples/medical_text_understanding/sample_data_hyp/hyp.txt new file mode 100644 index 00000000..b707de89 --- /dev/null +++ b/examples/medical_text_understanding/sample_data_hyp/hyp.txt @@ -0,0 +1 @@ +Keystone plant species such as fig trees are good for the soil. From 4c0c4bd077b51260851082d4d2c901b4979b0b0a Mon Sep 17 00:00:00 2001 From: bhaskar-IITB Date: Mon, 19 Dec 2022 12:11:56 +0400 Subject: [PATCH 08/12] Update xray_image_reader.py --- fortex/health/readers/xray_image_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fortex/health/readers/xray_image_reader.py b/fortex/health/readers/xray_image_reader.py index 39934197..a3bf8796 100644 --- a/fortex/health/readers/xray_image_reader.py +++ b/fortex/health/readers/xray_image_reader.py @@ -64,7 +64,7 @@ def default_configs(cls): Here: - - file_ext (str): The file extension to find the target audio files + - file_ext (str): The file extension to find the target files under a specific directory path. Default value is ".jpeg". - read_kwargs (dict): A dictionary containing all the keyword From a35b58b77b0c8341f710cab2ab7f1caf1650df2f Mon Sep 17 00:00:00 2001 From: bhaskar-IITB Date: Mon, 19 Dec 2022 12:12:27 +0400 Subject: [PATCH 09/12] Update xray_image_reader.py --- fortex/health/readers/xray_image_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fortex/health/readers/xray_image_reader.py b/fortex/health/readers/xray_image_reader.py index a3bf8796..819150bd 100644 --- a/fortex/health/readers/xray_image_reader.py +++ b/fortex/health/readers/xray_image_reader.py @@ -26,7 +26,7 @@ class XrayImageReader(PackReader): - r""":class:`ImageReader` is designed to read image files from a given folder.""" + r""":class:`XrayImageReader` is designed to read image files from a given folder.""" def __init__(self): super().__init__() From c936d59121801da43a2cb3f6f2f48f4a03f1f828 Mon Sep 17 00:00:00 2001 From: bhaskar-IITB Date: Wed, 21 Dec 2022 14:51:19 +0400 Subject: [PATCH 10/12] Update main.yml remove python 3.6 downgrade numpy version add deps for x-ray processor in work flow --- .github/workflows/main.yml | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 20909fed..23d79079 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ 3.6, 3.7, 3.8, 3.9 ] + python-version: [ 3.7, 3.8, 3.9 ] torch-version: [ 1.5.0, 1.6.0, 1.7.1, 1.8.1 ] tensorflow-version: [ 1.15.0, 2.2.0, 2.5.0 ] test-details: @@ -21,8 +21,8 @@ jobs: - { dep: forte-wrapper, testfile: tests/fortex/health/processors/negation_context_analysis_test.py } - { dep: scispacy, testfile: tests/fortex/health/processors/scispacy_processor_test.py } - { testfile: tests/fortex/health/readers/mimic3_note_reader_test.py } - - { testfile: tests/fortex/health/readers/xray_image_reader_test.py } - - { testfile: tests/fortex/health/processors/xray_processor_test.py } + - { dep: xray_image_processor,testfile: tests/fortex/health/readers/xray_image_reader_test.py } + - { dep: xray_image_processor,testfile: tests/fortex/health/processors/xray_processor_test.py } exclude: - python-version: 3.6 torch-version: 1.7.1 @@ -72,7 +72,6 @@ jobs: restore-keys: | ${{ runner.os }}-pip- ${{ runner.os }}- - - name: Install dependencies run: | sudo apt-get install -y libsndfile1-dev @@ -81,35 +80,28 @@ jobs: pip install --progress-bar off pylint==2.10.2 importlib-metadata==4.8.0 flake8==3.9.2 mypy==0.931 pytest==5.1.3 black==20.8b1 click==8.0.1 pip install --progress-bar off types-PyYAML==5.4.8 types-typed-ast==1.4.4 types-requests==2.25.6 types-dataclasses==0.1.7 pip install --progress-bar off coverage codecov - - name: Install deep learning frameworks run: | pip install --progress-bar off torch==${{ matrix.torch-version }} pip install --progress-bar off tensorflow==${{ matrix.tensorflow-version }} - - name: Format check with Black run: | black --line-length 80 --check fortex/ tests/ - - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names flake8 fortex/ examples/ tests/ ftx/ --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 fortex/ examples/ ftx/ tests/ --ignore E203,W503 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Lint with pylint run: | pylint fortex/ tests/ - - name: Lint main code with mypy when torch version is not 1.5.0 and python is 3.9 run: | if [[ ${{ matrix.torch-version }} != "1.5.0" && ${{ matrix.python-version }} == "3.9" ]]; then mypy fortex/ tests/ ; fi - - name: Install requirements for tests run: | pip install --progress-bar off .[test] - - name: Install Forte-wrappers-spacy if: ${{ matrix.test-details.dep == 'forte-wrapper' || contains(matrix.test-details.dep, 'forte-wrapper') }} @@ -117,13 +109,18 @@ jobs: git clone https://github.com/asyml/forte-wrappers.git cd forte-wrappers pip install src/spacy - - name: Install scispacy if: ${{ matrix.test-details.dep == 'scispacy' || contains(matrix.test-details.dep, 'scispacy') }} run: | pip install --progress-bar off .[scispacy_processor] - + + - name: Install xray_image_processor + if: ${{ matrix.test-details.dep == 'xray_image_processor' || + contains(matrix.test-details.dep, 'xray_image_processor') }} + run: | + pip install --progress-bar off .[xray_image_processor] + - name: Test with pytest and run coverage run: | coverage run -m pytest ${{ matrix.test-details.testfile}} @@ -152,7 +149,6 @@ jobs: restore-keys: | ${{ runner.os }}-pip- ${{ runner.os }}- - - name: Install dependencies run: | python -m pip install --progress-bar off --upgrade pip @@ -160,7 +156,6 @@ jobs: pip install --progress-bar off -r requirements.txt pip install --progress-bar off -r docs/requirements.txt pip install --progress-bar off . - - name: Build Docs run: | cd docs From 7dc5af2ee680b2cb943756c5b387cef67d10c1f9 Mon Sep 17 00:00:00 2001 From: bhaskar-IITB Date: Wed, 21 Dec 2022 15:30:37 +0400 Subject: [PATCH 11/12] Update setup.py add xray processor deps and change numpy version --- setup.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 515d989b..b4e55063 100644 --- a/setup.py +++ b/setup.py @@ -28,15 +28,19 @@ "test": [ "ddt", "testfixtures", - "transformers==4.18.0", + "transformers==4.2.2", "protobuf==3.19.4", - "Pillow==8.4.0", + "numpy==1.21.6", 'forte @ git+https://github.com/asyml/forte', ], "scispacy_processor": [ "scispacy==0.5.0", "en-core-sci-sm @ https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_core_sci_sm-0.5.0.tar.gz" ], + "xray_image_processor": [ + "Pillow==8.4.0", + "transformers==4.18.0", + ], }, include_package_data=True, python_requires='>=3.6', From 1ddd3c2204a8561e94f2f624bb2a8b9c6027fb6d Mon Sep 17 00:00:00 2001 From: nikhilranjan7 Date: Tue, 27 Dec 2022 12:47:39 +0400 Subject: [PATCH 12/12] remove python 3.6 from exclude --- .github/workflows/main.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 23d79079..53433f7c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -24,14 +24,6 @@ jobs: - { dep: xray_image_processor,testfile: tests/fortex/health/readers/xray_image_reader_test.py } - { dep: xray_image_processor,testfile: tests/fortex/health/processors/xray_processor_test.py } exclude: - - python-version: 3.6 - torch-version: 1.7.1 - - python-version: 3.6 - torch-version: 1.8.1 - - python-version: 3.6 - tensorflow-version: 2.2.0 - - python-version: 3.6 - tensorflow-version: 2.5.0 - python-version: 3.7 torch-version: 1.7.1 - python-version: 3.7