.github/workflows/ci.yml

name: nf-core CI
# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors
on:
  push:
    branches:
      - dev
  pull_request:
  release:
    types: [published]

# Uncomment if we need an edge release of Nextflow again
# env: NXF_EDGE: 1

jobs:
  test:
    name: Run workflow tests
    # Only run on push if this is the nf-core dev branch (merged PRs)
    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/eager') }}
    runs-on: ubuntu-latest
    env:
      NXF_VER: ${{ matrix.nxf_ver }}
      NXF_ANSI_LOG: false
    strategy:
      matrix:
        # Nextflow versions: check pipeline minimum and current latest
        nxf_ver: ["20.07.1", "22.10.6"]
    steps:
      - name: Check out pipeline code
        uses: actions/checkout@v2

      - name: Check if Dockerfile or Conda environment changed
        uses: technote-space/get-diff-action@v4
        with:
          FILES: |
            Dockerfile
            environment.yml

      - name: Build new docker image
        if: env.MATCHED_FILES
        run: docker build --no-cache . -t nfcore/eager:2.5.2

      - name: Pull docker image
        if: ${{ !env.MATCHED_FILES }}
        run: |
          docker pull nfcore/eager:dev
          docker tag nfcore/eager:dev nfcore/eager:2.5.2

      - name: Install Nextflow
        env:
          CAPSULE_LOG: none
        run: |
          wget -qO- get.nextflow.io | bash
          sudo mv nextflow /usr/local/bin/
      - name: HELPTEXT Run with the help flag
        run: |
          nextflow run ${GITHUB_WORKSPACE} --help
      - name: Get test data for cases where we don't use TSV input
        run: |
          git clone --single-branch --branch eager https://github.com/nf-core/test-datasets.git data
      - name: DELAY to try address some odd behaviour with what appears to be a conflict between parallel htslib jobs leading to CI hangs
        run: |
          if [[ $NXF_VER = '' ]]; then sleep 1200; fi
      - name: BASIC Run the basic pipeline with directly supplied single-end FASTQ
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_direct,docker --input 'data/testdata/Mammoth/fastq/*_R1_*.fq.gz' --single_end
      - name: BASIC Run the basic pipeline with directly supplied paired-end FASTQ
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_direct,docker --input 'data/testdata/Mammoth/fastq/*_{R1,R2}_*tengrand.fq.gz'
      - name: BASIC Run the basic pipeline with supplied --input BAM
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_direct,docker --input 'data/testdata/Mammoth/bam/*_R1_*.bam' --bam --single_end
      - name: BASIC Run the basic pipeline with the test profile with, PE/SE, bwa aln
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --save_reference
      - name: REFERENCE Basic workflow, with supplied indices
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --bwa_index 'results/reference_genome/bwa_index/BWAIndex/' --fasta_index 'https://github.com/nf-core/test-datasets/blob/eager/reference/Mammoth/Mammoth_MT_Krause.fasta.fai'
      - name: REFERENCE Run the basic pipeline with FastA reference with `fna` extension
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_fna,docker
      - name: REFERENCE Test with zipped reference input
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --fasta 'https://github.com/nf-core/test-datasets/raw/eager/reference/Mammoth/Mammoth_MT_Krause.fasta.gz'
      - name: FASTP Test fastp complexity filtering
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --complexity_filter_poly_g
      - name: ADAPTERREMOVAL Test skip paired end collapsing
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --skip_collapse
      - name: ADAPTERREMOVAL Test paired end collapsing but no trimming
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_pretrim,docker --skip_trim
      - name: ADAPTERREMOVAL Run the basic pipeline with paired end data without adapterRemoval
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --skip_adapterremoval
      - name: ADAPTERREMOVAL Run the basic pipeline with preserve5p end option
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --preserve5p
      - name: ADAPTERREMOVAL Run the basic pipeline with merged only option
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --mergedonly
      - name: ADAPTERREMOVAL Run the basic pipeline with preserve5p end and merged reads only options
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --preserve5p --mergedonly
      - name: ADAPTER LIST Run the basic pipeline using an adapter list
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --clip_adapters_list 'https://github.com/nf-core/test-datasets/raw/eager/databases/adapters/adapter-list.txt'
      - name: ADAPTER LIST Run the basic pipeline using an adapter list, skipping adapter removal
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --clip_adapters_list 'https://github.com/nf-core/test-datasets/raw/eager/databases/adapters/adapter-list.txt' --skip_adapterremoval
      - name: POST_AR_FASTQ_TRIMMING Run the basic pipeline post-adapterremoval FASTQ trimming
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_post_ar_trimming
      - name: POST_AR_FASTQ_TRIMMING Run the basic pipeline post-adapterremoval FASTQ trimming, but skip adapterremoval
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_post_ar_trimming --skip_adapterremoval
      - name: MAPPER_CIRCULARMAPPER Test running with CircularMapper
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --mapper 'circularmapper' --circulartarget 'NC_007596.2'
      - name: MAPPER_BWAMEM Test running with BWA Mem
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --mapper 'bwamem' --skip_collapse
      - name: MAPPER_BT2 Test running with BowTie2
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --mapper 'bowtie2' --bt2_alignmode 'local' --bt2_sensitivity 'sensitive' --bt2n 1 --bt2l 16 --bt2_trim5 1 --bt2_trim3 1
      - name: HOST_REMOVAL_FASTQ Run the basic pipeline with output unmapped reads as fastq
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --hostremoval_input_fastq
      - name: BAM_FILTERING Run basic mapping pipeline with mapping quality filtering, and unmapped export
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_bam_filtering --bam_mapping_quality_threshold 37  --bam_unmapped_type 'fastq'
      - name: BAM_FILTERING Run basic mapping pipeline with post-mapping length filtering
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --clip_readlength 0 --run_bam_filtering --bam_filter_minreadlength 50
      - name: PRESEQ Run basic mapping pipeline with different preseq mode
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --preseq_mode 'lc_extrap' --preseq_maxextrap 10000 --preseq_bootstrap 10
      - name: DEDUPLICATION Test with dedup
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --dedupper 'dedup' --dedup_all_merged
      - name: BEDTOOLS Test bedtools feature annotation
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_bedtools_coverage --anno_file 'https://github.com/nf-core/test-datasets/raw/eager/reference/Mammoth/Mammoth_MT_Krause.gff3'
      - name: MAPDAMAGE2 damage calculation
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --damage_calculation_tool 'mapdamage'
      - name: GENOTYPING_HC Test running GATK HaplotypeCaller
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_fna,docker --run_genotyping --genotyping_tool 'hc' --gatk_hc_out_mode 'EMIT_ALL_ACTIVE_SITES' --gatk_hc_emitrefconf 'BP_RESOLUTION'
      - name: GENOTYPING_FB Test running FreeBayes
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_genotyping --genotyping_tool 'freebayes'
      - name: GENOTYPING_PC Test running pileupCaller
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --run_genotyping --genotyping_tool 'pileupcaller'
      - name: GENOTYPING_ANGSD Test running ANGSD genotype likelihood calculation
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --run_genotyping --genotyping_tool 'angsd'
      - name: GENOTYPING_BCFTOOLS Test running FreeBayes with bcftools stats turned on
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_genotyping --genotyping_tool 'freebayes' --run_bcftools_stats
      - name: SKIPPING Test checking all skip steps work i.e. input bam, skipping straight to genotyping
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_genotyping --genotyping_tool 'freebayes'
      - name: TRIMBAM Test bamutils works alone
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_trim_bam
      - name: PMDTOOLS Test PMDtools works alone
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_pmdtools
      - name: GENOTYPING_UG AND MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_genotyping --genotyping_tool 'ug' --gatk_ug_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
      - name: COMPLEX LANE/LIBRARY MERGING Test running lane and library merging prior to GATK UnifiedGenotyper and running MultiVCFAnalyzer
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --run_genotyping --genotyping_tool 'ug' --gatk_ug_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
      - name: GENOTYPING_UG ON TRIMMED BAM Test
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_ug_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP'
      - name: BAM_INPUT Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_adapterremoval
      - name: BAM_INPUT Run the basic pipeline with the bam input profile, convert to FASTQ for adapterremoval test and downstream
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --run_convertinputbam
      - name: METAGENOMIC Download MALT database
        run: |
          mkdir -p databases/malt
          readlink -f databases/malt/
          for i in index0.idx ref.db ref.idx ref.inf table0.db table0.idx taxonomy.idx taxonomy.map taxonomy.tre; do wget https://github.com/nf-core/test-datasets/raw/eager/databases/malt/"$i" -P databases/malt/; done
      - name: METAGENOMIC Run the basic pipeline but with unmapped reads going into MALT
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_bam_filtering  --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --malt_sam_output
      - name: METAGENOMIC Run the basic pipeline but low-complexity filtered reads going into MALT
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_bam_filtering  --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --metagenomic_complexity_filter
      - name: MALTEXTRACT Download resource files
        run: |
          mkdir -p databases/maltextract
          for i in ncbi.tre ncbi.map; do wget https://github.com/rhuebler/HOPS/raw/0.33/Resources/"$i" -P databases/maltextract/; done
      - name: MALTEXTRACT Basic with MALT plus MaltExtract
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_bam_filtering  --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt" --run_maltextract --maltextract_ncbifiles "/home/runner/work/eager/eager/databases/maltextract/" --maltextract_taxon_list 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/maltextract/MaltExtract_list.txt'
      - name: METAGENOMIC Run the basic pipeline but with unmapped reads going into Kraken
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_kraken,docker --run_bam_filtering  --bam_unmapped_type 'fastq'
      - name: SNPCAPTURE Run the basic pipeline with the bam input profile, generating statistics with a SNP capture bed
        run: |
          wget https://github.com/nf-core/test-datasets/raw/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz && gunzip 1240K.pos.list_hs37d5.0based.bed.gz
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --snpcapture_bed 1240K.pos.list_hs37d5.0based.bed
      - name: SEXDETERMINATION Run the basic pipeline with the bam input profile, but don't convert BAM, skip everything but sex determination
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --run_sexdeterrmine
      - name: NUCLEAR CONTAMINATION Run basic pipeline with bam input profile, but don't convert BAM, skip everything but nuclear contamination estimation
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --run_nuclear_contamination
      - name: MTNUCRATIO Run basic pipeline with bam input profile, but don't convert BAM, skip everything but nmtnucratio
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_mtnucratio
      - name: RESCALING Run basic pipeline with basic pipeline but with mapDamage rescaling of BAM files. Note this will be slow
        run: |
          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_mapdamage_rescaling --run_genotyping --genotyping_tool hc --genotyping_source 'rescaled'