ci: add full stack NLP regression test #864
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CI | |
on: | |
pull_request: | |
push: | |
branches: | |
- main | |
paths-ignore: | |
- 'docs/**' | |
- '*.md' | |
# The goal here is to cancel older workflows when a PR is updated (because it's pointless work) | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }} | |
cancel-in-progress: true | |
jobs: | |
# unittest: | |
# name: unit tests | |
# runs-on: ubuntu-22.04 | |
# strategy: | |
# matrix: | |
# # while we are still private, don't go crazy with the Python versions as they eat up CI minutes | |
# python-version: ["3.10"] | |
# | |
# steps: | |
# - uses: actions/checkout@v4 | |
# | |
# - name: Set up Python ${{ matrix.python-version }} | |
# uses: actions/setup-python@v4 | |
# with: | |
# python-version: ${{ matrix.python-version }} | |
# | |
# - name: Install dependencies | |
# run: | | |
# python -m pip install --upgrade pip | |
# pip install pytest | |
# pip install .[tests] | |
# | |
# - name: Check out MS tool | |
# uses: actions/checkout@v4 | |
# with: | |
# repository: microsoft/Tools-for-Health-Data-Anonymization | |
# path: mstool | |
# | |
# - name: Build MS tool | |
# run: | | |
# sudo apt-get update | |
# sudo apt-get install dotnet6 | |
# dotnet publish \ | |
# --runtime=linux-x64 \ | |
# --configuration=Release \ | |
# -p:PublishSingleFile=true \ | |
# --output=$HOME/.local/bin \ | |
# mstool/FHIR/src/Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool | |
# | |
# - name: Test with pytest | |
# run: | | |
# python -m pytest | |
nlp-regression: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install Docker | |
uses: docker/setup-buildx-action@v3 | |
- name: Install Docker images | |
run: | | |
export UMLS_API_KEY=${{ secrets.UMLS_API_KEY }} | |
docker compose --profile covid-symptom up -d --quiet-pull | |
- name: Build ETL image | |
uses: docker/build-push-action@v5 | |
with: | |
push: false # this is the default, but just for clarity: we are only building here | |
tags: smartonfhir/cumulus-etl:latest | |
- name: Run NLP | |
run: | | |
export UMLS_API_KEY=${{ secrets.UMLS_API_KEY }} # just to quiet warnings about it | |
export DATADIR=$(realpath tests/data/nlp-regression) | |
# Run the NLP task | |
docker compose run --rm \ | |
--volume $DATADIR:/in \ | |
cumulus-etl \ | |
/in/input \ | |
/in/run-output \ | |
/in/phi \ | |
--output-format=ndjson \ | |
--task covid_symptom__nlp_results | |
# Compare results | |
diff -upr $DATADIR/expected-output/covid_symptom__nlp_results \ | |
$DATADIR/run-output/covid_symptom__nlp_results | |
# lint: | |
# runs-on: ubuntu-22.04 | |
# steps: | |
# - uses: actions/checkout@v4 | |
# | |
# - name: Install linters | |
# # black is synced with the .pre-commit-hooks version | |
# run: | | |
# python -m pip install --upgrade pip | |
# pip install bandit[toml] pycodestyle pylint black==23.11.0 | |
# | |
# - name: Run pycodestyle | |
# # E203: pycodestyle is a little too rigid about slices & whitespace | |
# # See https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#slices | |
# # W503: a default ignore that we are restoring | |
# run: | | |
# pycodestyle --max-line-length=120 --ignore=E203,W503 . | |
# | |
# - name: Run pylint | |
# if: success() || failure() # still run pylint if above checks fail | |
# run: | | |
# pylint cumulus_etl tests | |
# | |
# - name: Run bandit | |
# if: success() || failure() # still run bandit if above checks fail | |
# run: | | |
# bandit -c pyproject.toml -r . | |
# | |
# - name: Run black | |
# if: success() || failure() # still run black if above checks fails | |
# run: | | |
# black --check --verbose --line-length 120 . |