Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

65 Medical text understanding PR #66

Closed
wants to merge 12 commits into from
33 changes: 10 additions & 23 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,17 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ 3.6, 3.7, 3.8, 3.9 ]
python-version: [ 3.7, 3.8, 3.9 ]
torch-version: [ 1.5.0, 1.6.0, 1.7.1, 1.8.1 ]
tensorflow-version: [ 1.15.0, 2.2.0, 2.5.0 ]
test-details:
- { dep: forte-wrapper, testfile: tests/fortex/health/processors/icd_coding_processor_test.py }
- { dep: forte-wrapper, testfile: tests/fortex/health/processors/negation_context_analysis_test.py }
- { dep: scispacy, testfile: tests/fortex/health/processors/scispacy_processor_test.py }
- { testfile: tests/fortex/health/readers/mimic3_note_reader_test.py }
- { testfile: tests/fortex/health/readers/xray_image_reader_test.py }
- { testfile: tests/fortex/health/processors/xray_processor_test.py }
- { dep: xray_image_processor,testfile: tests/fortex/health/readers/xray_image_reader_test.py }
- { dep: xray_image_processor,testfile: tests/fortex/health/processors/xray_processor_test.py }
exclude:
- python-version: 3.6
torch-version: 1.7.1
- python-version: 3.6
torch-version: 1.8.1
- python-version: 3.6
tensorflow-version: 2.2.0
- python-version: 3.6
tensorflow-version: 2.5.0
- python-version: 3.7
torch-version: 1.7.1
- python-version: 3.7
Expand Down Expand Up @@ -72,7 +64,6 @@ jobs:
restore-keys: |
${{ runner.os }}-pip-
${{ runner.os }}-

- name: Install dependencies
run: |
sudo apt-get install -y libsndfile1-dev
Expand All @@ -81,49 +72,47 @@ jobs:
pip install --progress-bar off pylint==2.10.2 importlib-metadata==4.8.0 flake8==3.9.2 mypy==0.931 pytest==5.1.3 black==20.8b1 click==8.0.1
pip install --progress-bar off types-PyYAML==5.4.8 types-typed-ast==1.4.4 types-requests==2.25.6 types-dataclasses==0.1.7
pip install --progress-bar off coverage codecov

- name: Install deep learning frameworks
run: |
pip install --progress-bar off torch==${{ matrix.torch-version }}
pip install --progress-bar off tensorflow==${{ matrix.tensorflow-version }}

- name: Format check with Black
run: |
black --line-length 80 --check fortex/ tests/

- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 fortex/ examples/ tests/ ftx/ --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 fortex/ examples/ ftx/ tests/ --ignore E203,W503 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics

- name: Lint with pylint
run: |
pylint fortex/ tests/

- name: Lint main code with mypy when torch version is not 1.5.0 and python is 3.9
run: |
if [[ ${{ matrix.torch-version }} != "1.5.0" && ${{ matrix.python-version }} == "3.9" ]]; then mypy fortex/ tests/ ; fi

- name: Install requirements for tests
run: |
pip install --progress-bar off .[test]

- name: Install Forte-wrappers-spacy
if: ${{ matrix.test-details.dep == 'forte-wrapper' ||
contains(matrix.test-details.dep, 'forte-wrapper') }}
run: |
git clone https://github.com/asyml/forte-wrappers.git
cd forte-wrappers
pip install src/spacy

- name: Install scispacy
if: ${{ matrix.test-details.dep == 'scispacy' ||
contains(matrix.test-details.dep, 'scispacy') }}
run: |
pip install --progress-bar off .[scispacy_processor]


- name: Install xray_image_processor
if: ${{ matrix.test-details.dep == 'xray_image_processor' ||
contains(matrix.test-details.dep, 'xray_image_processor') }}
run: |
pip install --progress-bar off .[xray_image_processor]

- name: Test with pytest and run coverage
run: |
coverage run -m pytest ${{ matrix.test-details.testfile}}
Expand Down Expand Up @@ -152,15 +141,13 @@ jobs:
restore-keys: |
${{ runner.os }}-pip-
${{ runner.os }}-

- name: Install dependencies
run: |
python -m pip install --progress-bar off --upgrade pip
sudo apt-get install pandoc
pip install --progress-bar off -r requirements.txt
pip install --progress-bar off -r docs/requirements.txt
pip install --progress-bar off .

- name: Build Docs
run: |
cd docs
Expand Down
74 changes: 74 additions & 0 deletions examples/medical_text_understanding/medical_text_understanding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import sys
from termcolor import colored

from forte.data.data_pack import DataPack
from forte.data.readers import PlainTextReader
from forte.pipeline import Pipeline
from forte.processors.writers import PackIdJsonPackWriter

from ft.onto.base_ontology import (
Token,
)

from ftx.medical.clinical_ontology import Hyponym, Abbreviation
from fortex.health.processors.scispacy_processor import (
ScispaCyProcessor,
)

def main(
input_path: str,
process: str,
):
pl = Pipeline[DataPack]()
pl.set_reader(PlainTextReader())
if process == "abb":
pl.add(
ScispaCyProcessor(),
{
"entry_type": "ft.onto.base_ontology.Document",
"model_name": "en_core_sci_sm",
"pipe_name": "abbreviation_detector",
"prefer_gpu": True,
"require_gpu": False,
"gpu_id": 0,
},
)
elif process == "hyp":
pl.add(
ScispaCyProcessor(),
{
"entry_type": "ft.onto.base_ontology.Document",
"model_name": "en_core_sci_sm",
"pipe_name": "hyponym_detector",
"prefer_gpu": True,
"require_gpu": False,
"gpu_id": 0,
},
)

pl.initialize()

packs = pl.process_dataset(input_path)

for pack in packs:
if process == "abb":
print(colored("Input Sentence:", "blue"), colored( pack.text, "red"))
for idx, abv_item in enumerate(pack.get(Abbreviation)):
print(colored("Abbreviation:", "blue"), colored( abv_item.text, "green"))
print(colored("Long form text:", "blue"), colored( abv_item.long_form.text, "green"), "\n")

elif process == "hyp":
print(colored("Input Sentence:", "blue"), colored( pack.text, "red"))
for idx, detected in enumerate(pack.get(Hyponym)):
print(colored("hyponym_link:", "blue"), colored(detected.hyponym_link, "green"))
print(colored("hypernym:", "blue"), colored(detected.general, "green"))
print(colored("hyponym:", "blue"), colored(detected.specific, "green"))
# Examples:
'''
Read from sample_data:
Abbreviation:
python medical_text_understanding.py sample_data_abb/ abb
Hyponym detection:
python medical_text_understanding.py sample_data_hyp/ hyp
'''
main(sys.argv[1], sys.argv[2])
2 changes: 2 additions & 0 deletions examples/medical_text_understanding/sample_data_abb/abb.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Spinal and bulbar muscular atrophy (SBMA) is an inherited motor neuron disease caused by the expansion of a polyglutamine tract within the androgen
receptor (AR). SBMA can be caused by this easily.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Keystone plant species such as fig trees are good for the soil.
4 changes: 2 additions & 2 deletions fortex/health/readers/xray_image_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@


class XrayImageReader(PackReader):
r""":class:`ImageReader` is designed to read image files from a given folder."""
r""":class:`XrayImageReader` is designed to read image files from a given folder."""

def __init__(self):
super().__init__()
Expand Down Expand Up @@ -64,7 +64,7 @@ def default_configs(cls):

Here:

- file_ext (str): The file extension to find the target audio files
- file_ext (str): The file extension to find the target files
under a specific directory path. Default value is ".jpeg".

- read_kwargs (dict): A dictionary containing all the keyword
Expand Down
8 changes: 6 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,19 @@
"test": [
"ddt",
"testfixtures",
"transformers==4.18.0",
"transformers==4.2.2",
"protobuf==3.19.4",
"Pillow==8.4.0",
"numpy==1.21.6",
'forte @ git+https://github.com/asyml/forte',
],
"scispacy_processor": [
"scispacy==0.5.0",
"en-core-sci-sm @ https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_core_sci_sm-0.5.0.tar.gz"
],
"xray_image_processor": [
"Pillow==8.4.0",
"transformers==4.18.0",
],
},
include_package_data=True,
python_requires='>=3.6',
Expand Down