Skip to content

CI

CI #52

Workflow file for this run

name: CI
# Controls when the workflow will run
on:
schedule:
# * is a special character in YAML so you have to quote this string
- cron: '0 0 * * *'
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Environment Variables from Dotenv
uses: c-py/action-dotenv-to-setenv@v4
- name: Install required NCBI packages
run: |
#retrieve packages and set permissions
curl -O 'https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/v2/linux-amd64/dataformat'
curl -O 'https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/v2/linux-amd64/datasets'
chmod +x datasets dataformat
- name: Run query and get new rows
run: |
existing_assemblies="${{env.MATRIX_PATH}}"
new_assemblies="output.tsv"
ncbi_assemblies="ncbi.tsv"
column_name="${{env.COLUMN_NAME}}"
# Get TSV from dataset
./datasets summary genome accession ${{env.PROJECT_ACCESSION}} ${{env.DATASET_EXTRA_ARGS}} | ./dataformat tsv genome --fields ${{env.TSV_FIELDS}} >> "$ncbi_assemblies"
function url_exists(){
local url="$1"
echo "$url"
if curl -s -m 60 --head "$url" | head -n 1 | grep -q "HTTP/1.1 404"; then
return 1 #url returns 404
else
return 0 #url exists
fi
}
if [ ! -s "$existing_assemblies" ]; then
cat "$ncbi_assemblies" >> "$new_assemblies"
sed -i "1s/$/\t$column_name/" "$existing_assemblies"
else
awk -F'\t' 'NR==FNR{a[$1];next} !($1 in a)' "$existing_assemblies" "$ncbi_assemblies" >> "$new_assemblies"
fi
added_rows=0
# Process the input file and create the output file
while IFS=$'\t' read -r col1 rest_of_line; do
if [[ "$added_rows" -ge 200 ]]; then
break
fi
if [[ "$col1" == "Assembly Accession" ]]; then
continue
fi
url="https://www.ebi.ac.uk/ena/browser/api/fasta/$col1?download=true"
if url_exists "$url"; then
url_to_add="$url&gzip=true"
echo "$url_to_add"
echo -e "$col1\t$rest_of_line\t$url_to_add" >> "$existing_assemblies"
((added_rows++))
fi
done < "$new_assemblies"
sort -k3 $existing_assemblies
# Count new rows and set the environment variable
new_rows=$(wc -l < "$new_assemblies")
echo "new_rows=$new_rows" >> "$GITHUB_ENV"
rm $new_assemblies
rm $ncbi_assemblies
- name: Commit & Push changes
uses: actions-js/push@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
message: "Added ${{ env.new_rows }} new row(s)"