optionally enable protein compression and logging #1541

Workflow file for this run

.github/workflows/short-read-mngs-viral-benchmarks.yml at 1ecfe52

	name: short-read-mngs viral benchmarks

	on:
	push

	env:
	LC_ALL: C.UTF-8
	LANG: C.UTF-8
	DEBIAN_FRONTEND: noninteractive

	jobs:
	cancel-previous:
	runs-on: ubuntu-22.04
	steps:
	- uses: styfle/[email protected]
	with:
	access_token: ${{ github.token }}

	has-diff:
	runs-on: ubuntu-22.04
	outputs:
	has_diff: ${{steps.check_for_diff.outputs.has_diff}}
	steps:
	- uses: actions/checkout@v2
	with:
	fetch-depth: 2
	- id: check_for_diff
	# only run on wdl dirs with diff
	run: echo "::set-output name=has_diff::$(if `git diff --name-status HEAD^\|cut -f2-\|xargs dirname\|sort\|uniq\|grep -q short-read-mngs`; then echo true; else echo false; fi)"

	run-samples:
	runs-on: ubuntu-22.04
	needs: has-diff
	if: ${{needs['has-diff'].outputs.has_diff == 'true'}}
	strategy:
	matrix:
	sample: [idseq_bench_3, idseq_bench_5]
	settings: [default]
	steps:
	- uses: actions/checkout@v2
	- name: docker build
	run: \|
	IMAGE_NAME=czid-short-read-mngs-public
	IMAGE_URI="ghcr.io/${GITHUB_REPOSITORY}/${IMAGE_NAME}"
	CACHE_FROM=""; docker pull "$IMAGE_URI" && CACHE_FROM="--cache-from $IMAGE_URI"
	./scripts/docker-build.sh workflows/short-read-mngs --tag czid-short-read-mngs $CACHE_FROM \
	\|\| scripts/docker-build.sh workflows/short-read-mngs --tag czid-short-read-mngs
	- name: run sample
	run: \|
	pip install -r requirements-dev.txt

	export PATH=$PATH:$HOME/.local/bin
	export MINIWDL__DOWNLOAD_CACHE__GET=true
	export MINIWDL__DOWNLOAD_CACHE__PUT=true
	export MINIWDL__DOWNLOAD_CACHE__DIR="$(mktemp -d --tmpdir miniwdl_download_cache_XXXXXX)"
	# configure miniwdl to auto-delete task working directories, to reduce chance of worker
	# running out of space
	export MINIWDL__FILE_IO__OUTPUT_HARDLINKS=true
	export MINIWDL__FILE_IO__DELETE_WORK=success
	# explicitly block EC2 IMDS endpoint to work around awscli issue:
	# https://github.com/aws/aws-cli/issues/5234#issuecomment-635459464
	# https://github.com/aws/aws-cli/issues/5262
	sudo ip route add blackhole 169.254.169.254

	workflows/short-read-mngs/auto_benchmark/run_local.py --dir testrun/ --docker-image-id czid-short-read-mngs \
	--settings ${{ matrix.settings }} ${{ matrix.sample }}
	- name: harvest output statistics
	run: \|
	export PATH=$PATH:$HOME/.local/bin
	taxadb download -o taxadb --type taxa
	taxadb create -i taxadb --dbname taxadb.sqlite \|\| true # exits nonzero w/o accessions
	workflows/short-read-mngs/auto_benchmark/harvest.py ${{ matrix.sample }}=testrun/${{ matrix.sample }} \
	--taxadb taxadb.sqlite > ${{ matrix.sample }}.${{ matrix.settings }}_viral.json
	- uses: actions/upload-artifact@v2
	with:
	name: ${{ matrix.sample }}.${{ matrix.settings }}_viral.json
	path: ${{ matrix.sample }}.${{ matrix.settings }}_viral.json

	notebook:
	runs-on: ubuntu-22.04
	needs: run-samples
	strategy:
	matrix:
	settings: [default]
	steps:
	- uses: actions/checkout@v2
	- name: fetch idseq_bench_3
	uses: actions/download-artifact@v2
	with:
	name: idseq_bench_3.${{ matrix.settings }}_viral.json
	- name: fetch idseq_bench_5
	uses: actions/download-artifact@v2
	with:
	name: idseq_bench_5.${{ matrix.settings }}_viral.json
	- name: run notebook
	run: \|
	TAG=$(git describe --long --tags --always --dirty)
	cp *_viral.json workflows/short-read-mngs/auto_benchmark # notebook wd
	chmod -R 777 workflows/short-read-mngs/auto_benchmark
	# execute notebook
	docker run -v $(pwd)/workflows:/home/jovyan/work --workdir /home/jovyan/work \
	--env "RUN_NAME=${{ matrix.settings }}_viral_$TAG" \
	--env HARVEST_DATA=$(ls -1 *_viral.json \| tr '\n' ':') \
	--env REF_LIB=/home/jovyan/work/short-read-mngs/auto_benchmark/ref_libs/${{ matrix.settings }}_viral \
	--env CHOWN_HOME=yes \
	jupyter/scipy-notebook:2022-03-14 jupyter nbconvert --to notebook --execute --inplace \
	short-read-mngs/auto_benchmark/short-read-mngs-benchmarks.ipynb
	cp workflows/short-read-mngs/auto_benchmark/short-read-mngs-benchmarks.ipynb short-read-mngs-benchmarks.${{ matrix.settings }}_viral.ipynb
	# make HTML with code
	docker run -v $(pwd)/workflows:/home/jovyan/work --workdir /home/jovyan/work \
	jupyter/scipy-notebook:2022-03-14 jupyter nbconvert --to html \
	short-read-mngs/auto_benchmark/short-read-mngs-benchmarks.ipynb
	mv workflows/short-read-mngs/auto_benchmark/short-read-mngs-benchmarks.html short-read-mngs-benchmarks.${{ matrix.settings }}_viral.with_code.html
	# make HTML without code
	docker run -v $(pwd)/workflows:/home/jovyan/work --workdir /home/jovyan/work \
	jupyter/scipy-notebook:2022-03-14 jupyter nbconvert --to html --no-input \
	short-read-mngs/auto_benchmark/short-read-mngs-benchmarks.ipynb
	mv workflows/short-read-mngs/auto_benchmark/short-read-mngs-benchmarks.html short-read-mngs-benchmarks.${{ matrix.settings }}_viral.html
	mkdir ref_libs
	cp -r workflows/short-read-mngs/auto_benchmark/ref_libs/${{ matrix.settings }}_viral ref_libs/
	- name: test harvest2.py
	run: workflows/short-read-mngs/auto_benchmark/harvest2.py *_viral.json --version "$(git describe --long --tags --always --dirty)" --index-version 2021-01-22 --wall 999
	- uses: actions/upload-artifact@v2
	with:
	name: short-read-mngs-benchmarks.${{ matrix.settings }}_viral.html
	path: short-read-mngs-benchmarks.${{ matrix.settings }}_viral.html
	- uses: actions/upload-artifact@v2
	with:
	name: short-read-mngs-benchmarks.${{ matrix.settings }}_viral.ipynb
	path: \|
	short-read-mngs-benchmarks.${{ matrix.settings }}_viral.*
	*.${{ matrix.settings }}_viral.json
	ref_libs/
	- name: annotate deviations
	run: \|
	if [[ -f workflows/short-read-mngs/auto_benchmark/.short-read-mngs-benchmarks-deviation ]]; then
	echo "::warning ::auto_benchmark detected >1% deviation from reference values; review the notebook and update reference library if needed."
	else
	echo "auto_benchmark OK (all metrics within 1% of reference values)"
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

optionally enable protein compression and logging #1541

Workflow file

optionally enable protein compression and logging #1541

Jobs

Run details

Workflow file for this run