Skip to content

feat: write out a bulk export log file when we do the export #900

feat: write out a bulk export log file when we do the export

feat: write out a bulk export log file when we do the export #900

Workflow file for this run

name: CI
on:
pull_request:
push:
branches:
- main
paths-ignore:
- 'docs/**'
- '*.md'
# The goal here is to cancel older workflows when a PR is updated (because it's pointless work)
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }}
cancel-in-progress: true
jobs:
unittest:
name: unit tests
runs-on: ubuntu-22.04
strategy:
matrix:
# while we are still private, don't go crazy with the Python versions as they eat up CI minutes
python-version: ["3.10"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
pip install .[tests]
- name: Check out MS tool
uses: actions/checkout@v4
with:
repository: microsoft/Tools-for-Health-Data-Anonymization
path: mstool
- name: Build MS tool
run: |
sudo apt-get update
sudo apt-get install dotnet6
dotnet publish \
--runtime=linux-x64 \
--configuration=Release \
-p:PublishSingleFile=true \
--output=$HOME/.local/bin \
mstool/FHIR/src/Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool
- name: Test with pytest
run: |
python -m pytest
nlp-regression:
runs-on: ubuntu-latest
env:
UMLS_API_KEY: ${{ secrets.UMLS_API_KEY }}
steps:
- uses: actions/checkout@v4
- name: Install Docker
uses: docker/setup-buildx-action@v3
- name: Build ETL image
uses: docker/build-push-action@v5
with:
load: true # just build, no push
tags: smartonfhir/cumulus-etl:latest
- name: Download NLP images
run: docker compose --profile covid-symptom up -d --quiet-pull
- name: Run NLP
run: |
export DATADIR=$(realpath tests/data/nlp-regression)
# Run the NLP task
docker compose run --rm \
--volume $DATADIR:/in \
cumulus-etl \
/in/input \
/in/run-output \
/in/phi \
--output-format=ndjson \
--task covid_symptom__nlp_results
# Compare results
export OUTDIR=$DATADIR/run-output/covid_symptom__nlp_results
sudo chown -R $(id -u) $OUTDIR
sed -i 's/"generated_on": "[^"]*", //g' $OUTDIR/*.ndjson
diff -upr $DATADIR/expected-output $OUTDIR
echo "All Good!"
lint:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Install linters
# black is synced with the .pre-commit-hooks version
run: |
python -m pip install --upgrade pip
pip install bandit[toml] pycodestyle pylint 'black >= 24, < 25'
- name: Run pycodestyle
# E203: pycodestyle is a little too rigid about slices & whitespace
# See https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#slices
# W503: a default ignore that we are restoring
run: |
pycodestyle --max-line-length=120 --ignore=E203,W503 .
- name: Run pylint
if: success() || failure() # still run pylint if above checks fail
run: |
pylint cumulus_etl tests
- name: Run bandit
if: success() || failure() # still run bandit if above checks fail
run: |
bandit -c pyproject.toml -r .
- name: Run black
if: success() || failure() # still run black if above checks fails
run: |
black --check --verbose --line-length 120 .