diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 20909fed..53433f7c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ 3.6, 3.7, 3.8, 3.9 ] + python-version: [ 3.7, 3.8, 3.9 ] torch-version: [ 1.5.0, 1.6.0, 1.7.1, 1.8.1 ] tensorflow-version: [ 1.15.0, 2.2.0, 2.5.0 ] test-details: @@ -21,17 +21,9 @@ jobs: - { dep: forte-wrapper, testfile: tests/fortex/health/processors/negation_context_analysis_test.py } - { dep: scispacy, testfile: tests/fortex/health/processors/scispacy_processor_test.py } - { testfile: tests/fortex/health/readers/mimic3_note_reader_test.py } - - { testfile: tests/fortex/health/readers/xray_image_reader_test.py } - - { testfile: tests/fortex/health/processors/xray_processor_test.py } + - { dep: xray_image_processor,testfile: tests/fortex/health/readers/xray_image_reader_test.py } + - { dep: xray_image_processor,testfile: tests/fortex/health/processors/xray_processor_test.py } exclude: - - python-version: 3.6 - torch-version: 1.7.1 - - python-version: 3.6 - torch-version: 1.8.1 - - python-version: 3.6 - tensorflow-version: 2.2.0 - - python-version: 3.6 - tensorflow-version: 2.5.0 - python-version: 3.7 torch-version: 1.7.1 - python-version: 3.7 @@ -72,7 +64,6 @@ jobs: restore-keys: | ${{ runner.os }}-pip- ${{ runner.os }}- - - name: Install dependencies run: | sudo apt-get install -y libsndfile1-dev @@ -81,35 +72,28 @@ jobs: pip install --progress-bar off pylint==2.10.2 importlib-metadata==4.8.0 flake8==3.9.2 mypy==0.931 pytest==5.1.3 black==20.8b1 click==8.0.1 pip install --progress-bar off types-PyYAML==5.4.8 types-typed-ast==1.4.4 types-requests==2.25.6 types-dataclasses==0.1.7 pip install --progress-bar off coverage codecov - - name: Install deep learning frameworks run: | pip install --progress-bar off torch==${{ matrix.torch-version }} pip install --progress-bar off tensorflow==${{ matrix.tensorflow-version }} - - name: Format check with Black run: | black --line-length 80 --check fortex/ tests/ - - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names flake8 fortex/ examples/ tests/ ftx/ --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 fortex/ examples/ ftx/ tests/ --ignore E203,W503 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Lint with pylint run: | pylint fortex/ tests/ - - name: Lint main code with mypy when torch version is not 1.5.0 and python is 3.9 run: | if [[ ${{ matrix.torch-version }} != "1.5.0" && ${{ matrix.python-version }} == "3.9" ]]; then mypy fortex/ tests/ ; fi - - name: Install requirements for tests run: | pip install --progress-bar off .[test] - - name: Install Forte-wrappers-spacy if: ${{ matrix.test-details.dep == 'forte-wrapper' || contains(matrix.test-details.dep, 'forte-wrapper') }} @@ -117,13 +101,18 @@ jobs: git clone https://github.com/asyml/forte-wrappers.git cd forte-wrappers pip install src/spacy - - name: Install scispacy if: ${{ matrix.test-details.dep == 'scispacy' || contains(matrix.test-details.dep, 'scispacy') }} run: | pip install --progress-bar off .[scispacy_processor] - + + - name: Install xray_image_processor + if: ${{ matrix.test-details.dep == 'xray_image_processor' || + contains(matrix.test-details.dep, 'xray_image_processor') }} + run: | + pip install --progress-bar off .[xray_image_processor] + - name: Test with pytest and run coverage run: | coverage run -m pytest ${{ matrix.test-details.testfile}} @@ -152,7 +141,6 @@ jobs: restore-keys: | ${{ runner.os }}-pip- ${{ runner.os }}- - - name: Install dependencies run: | python -m pip install --progress-bar off --upgrade pip @@ -160,7 +148,6 @@ jobs: pip install --progress-bar off -r requirements.txt pip install --progress-bar off -r docs/requirements.txt pip install --progress-bar off . - - name: Build Docs run: | cd docs diff --git a/examples/medical_text_understanding/medical_text_understanding.py b/examples/medical_text_understanding/medical_text_understanding.py new file mode 100644 index 00000000..52ce2731 --- /dev/null +++ b/examples/medical_text_understanding/medical_text_understanding.py @@ -0,0 +1,74 @@ +import sys +from termcolor import colored + +from forte.data.data_pack import DataPack +from forte.data.readers import PlainTextReader +from forte.pipeline import Pipeline +from forte.processors.writers import PackIdJsonPackWriter + +from ft.onto.base_ontology import ( + Token, +) + +from ftx.medical.clinical_ontology import Hyponym, Abbreviation +from fortex.health.processors.scispacy_processor import ( + ScispaCyProcessor, +) + +def main( + input_path: str, + process: str, +): + pl = Pipeline[DataPack]() + pl.set_reader(PlainTextReader()) + if process == "abb": + pl.add( + ScispaCyProcessor(), + { + "entry_type": "ft.onto.base_ontology.Document", + "model_name": "en_core_sci_sm", + "pipe_name": "abbreviation_detector", + "prefer_gpu": True, + "require_gpu": False, + "gpu_id": 0, + }, + ) + elif process == "hyp": + pl.add( + ScispaCyProcessor(), + { + "entry_type": "ft.onto.base_ontology.Document", + "model_name": "en_core_sci_sm", + "pipe_name": "hyponym_detector", + "prefer_gpu": True, + "require_gpu": False, + "gpu_id": 0, + }, + ) + + pl.initialize() + + packs = pl.process_dataset(input_path) + + for pack in packs: + if process == "abb": + print(colored("Input Sentence:", "blue"), colored( pack.text, "red")) + for idx, abv_item in enumerate(pack.get(Abbreviation)): + print(colored("Abbreviation:", "blue"), colored( abv_item.text, "green")) + print(colored("Long form text:", "blue"), colored( abv_item.long_form.text, "green"), "\n") + + elif process == "hyp": + print(colored("Input Sentence:", "blue"), colored( pack.text, "red")) + for idx, detected in enumerate(pack.get(Hyponym)): + print(colored("hyponym_link:", "blue"), colored(detected.hyponym_link, "green")) + print(colored("hypernym:", "blue"), colored(detected.general, "green")) + print(colored("hyponym:", "blue"), colored(detected.specific, "green")) +# Examples: +''' +Read from sample_data: +Abbreviation: + python medical_text_understanding.py sample_data_abb/ abb +Hyponym detection: + python medical_text_understanding.py sample_data_hyp/ hyp +''' +main(sys.argv[1], sys.argv[2]) diff --git a/examples/medical_text_understanding/sample_data_abb/abb.txt b/examples/medical_text_understanding/sample_data_abb/abb.txt new file mode 100644 index 00000000..0f8600f5 --- /dev/null +++ b/examples/medical_text_understanding/sample_data_abb/abb.txt @@ -0,0 +1,2 @@ +Spinal and bulbar muscular atrophy (SBMA) is an inherited motor neuron disease caused by the expansion of a polyglutamine tract within the androgen +receptor (AR). SBMA can be caused by this easily. diff --git a/examples/medical_text_understanding/sample_data_hyp/hyp.txt b/examples/medical_text_understanding/sample_data_hyp/hyp.txt new file mode 100644 index 00000000..b707de89 --- /dev/null +++ b/examples/medical_text_understanding/sample_data_hyp/hyp.txt @@ -0,0 +1 @@ +Keystone plant species such as fig trees are good for the soil. diff --git a/fortex/health/readers/xray_image_reader.py b/fortex/health/readers/xray_image_reader.py index 39934197..819150bd 100644 --- a/fortex/health/readers/xray_image_reader.py +++ b/fortex/health/readers/xray_image_reader.py @@ -26,7 +26,7 @@ class XrayImageReader(PackReader): - r""":class:`ImageReader` is designed to read image files from a given folder.""" + r""":class:`XrayImageReader` is designed to read image files from a given folder.""" def __init__(self): super().__init__() @@ -64,7 +64,7 @@ def default_configs(cls): Here: - - file_ext (str): The file extension to find the target audio files + - file_ext (str): The file extension to find the target files under a specific directory path. Default value is ".jpeg". - read_kwargs (dict): A dictionary containing all the keyword diff --git a/setup.py b/setup.py index 515d989b..b4e55063 100644 --- a/setup.py +++ b/setup.py @@ -28,15 +28,19 @@ "test": [ "ddt", "testfixtures", - "transformers==4.18.0", + "transformers==4.2.2", "protobuf==3.19.4", - "Pillow==8.4.0", + "numpy==1.21.6", 'forte @ git+https://github.com/asyml/forte', ], "scispacy_processor": [ "scispacy==0.5.0", "en-core-sci-sm @ https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_core_sci_sm-0.5.0.tar.gz" ], + "xray_image_processor": [ + "Pillow==8.4.0", + "transformers==4.18.0", + ], }, include_package_data=True, python_requires='>=3.6',