diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f0676a3..005ef5ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,16 +20,18 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements - [[PR #387](https://github.com/nf-core/viralrecon/pull/387)] - Software closes gracefully when encountering an error - [[PR #395](https://github.com/nf-core/viralrecon/pull/395)] - Remove minia from default assemblers because it is unreliable - [[PR #393](https://github.com/nf-core/viralrecon/pull/393)] - Changed primer set to params +- [[PR #401](https://github.com/nf-core/viralrecon/pull/401)] - Added option to add a custom annotation ### Parameters -| Old parameter | New parameter | -| ------------------- | ------------- | -| `--skip_freyja` | | -| `--freyja_repeats` | | -| `--freyja_db_name` | | -| `--freyja_barcodes` | | -| `--freyja_lineages` | | +| Old parameter | New parameter | +| ------------------- | -------------------- | +| `--skip_freyja` | | +| `--freyja_repeats` | | +| `--freyja_db_name` | | +| `--freyja_barcodes` | | +| `--freyja_lineages` | | +| | `--additional_annot` | > **NB:** Parameter has been **updated** if both old and new parameter information is present. > **NB:** Parameter has been **added** if just the new parameter information is present. diff --git a/bin/make_variants_long_table.py b/bin/make_variants_long_table.py index f0bad221..dbcfc277 100755 --- a/bin/make_variants_long_table.py +++ b/bin/make_variants_long_table.py @@ -236,11 +236,7 @@ def snpsift_to_table(snpsift_file): new_colnames = [x.replace("ANN[*].", "") for x in old_colnames] table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True) table = table.loc[:, ["CHROM", "POS", "REF", "ALT", "GENE", "EFFECT", "HGVS_C", "HGVS_P"]] - - ## Split by comma and get first value in cols = ['ALT','GENE','EFFECT','HGVS_C','HGVS_P'] - for i in range(len(table)): - for j in range(3, 8): - table.iloc[i, j] = str(table.iloc[i, j]).split(",")[0] + table = one_effect_per_line(table) ## Amino acid substitution aa = [] @@ -252,6 +248,51 @@ def snpsift_to_table(snpsift_file): return table +def one_effect_per_line(table): + one_effect_per_line_table = pd.DataFrame() + for i in range(len(table)): + gene_list = table.iloc[i, 4].split(",") + effect_list = table.iloc[i, 5].split(",") + hgvs_c_list = table.iloc[i, 6].split(",") + hgvs_p_list = table.iloc[i, 7].split(",") + + count = 0 + for j in range(len(gene_list)): + if "upstream" in effect_list[j] or "downstream" in effect_list[j]: + count += 1 + for j in range(len(gene_list)): + if len(effect_list) == count: + row = { + "CHROM": table.iloc[i, 0], + "POS": table.iloc[i, 1], + "REF": table.iloc[i, 2], + "ALT": table.iloc[i, 3], + "GENE": gene_list[0], + "EFFECT": effect_list[0], + "HGVS_C": hgvs_c_list[0], + "HGVS_P": hgvs_p_list[0], + } + one_effect_per_line_table = pd.concat( + [one_effect_per_line_table, pd.DataFrame([row])], ignore_index=True + ) + else: + if not "upstream" in effect_list[j] and not "downstream" in effect_list[j]: + row = { + "CHROM": table.iloc[i, 0], + "POS": table.iloc[i, 1], + "REF": table.iloc[i, 2], + "ALT": table.iloc[i, 3], + "GENE": gene_list[j], + "EFFECT": effect_list[j], + "HGVS_C": hgvs_c_list[j], + "HGVS_P": hgvs_p_list[j], + } + one_effect_per_line_table = pd.concat( + [one_effect_per_line_table, pd.DataFrame([row])], ignore_index=True + ) + return one_effect_per_line_table + + def main(args=None): args = parser_args(args) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 2cc48bea..4c1c4c22 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -564,6 +564,14 @@ if (!params.skip_variants) { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: 'MAKE_VARIANTS_LONG_TABLE_ADDITIONAL' { + ext.args = "--variant_caller ${variant_caller} --output_file 'additional_variants_long_table.csv'" + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } } } diff --git a/docs/output.md b/docs/output.md index 320f845e..32f0bcda 100644 --- a/docs/output.md +++ b/docs/output.md @@ -289,6 +289,7 @@ As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs - `/` - `variants_long_table.csv`: Long format table collating per-sample information for individual variants, functional effect prediction and lineage analysis. + - `additional_variants_long_table.csv`: Long format table similar to `variants_long_table.csv` for additional annotation file with overlapping annotation features. **NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). diff --git a/modules/local/snpeff_build.nf b/modules/local/snpeff_build.nf index e1ab367f..b9c875ba 100644 --- a/modules/local/snpeff_build.nf +++ b/modules/local/snpeff_build.nf @@ -20,7 +20,14 @@ process SNPEFF_BUILD { task.ext.when == null || task.ext.when script: + def args = task.ext.args ?: '' def basename = fasta.baseName + def extension = gff.getExtension() + if (extension == "gtf") { + format = "gtf22" + } else { + format = "gff3" + } def avail_mem = 4 if (!task.memory) { @@ -36,7 +43,7 @@ process SNPEFF_BUILD { cd ../../ mkdir -p snpeff_db/${basename}/ cd snpeff_db/${basename}/ - ln -s ../../$gff genes.gff + ln -s ../../$gff genes.$extension cd ../../ echo "${basename}.genome : ${basename}" > snpeff.config @@ -46,7 +53,8 @@ process SNPEFF_BUILD { build \\ -config snpeff.config \\ -dataDir ./snpeff_db \\ - -gff3 \\ + -${format} \\ + $args \\ -v \\ ${basename} diff --git a/nextflow.config b/nextflow.config index 03e9c48b..3f244421 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,6 +22,7 @@ params { primer_left_suffix = '_LEFT' primer_right_suffix = '_RIGHT' save_reference = false + additional_annot = null // Nanopore options fastq_dir = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 083172b5..c4500c72 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -78,6 +78,14 @@ "description": "Full path to GFF annotation file.", "fa_icon": "fas fa-file-invoice" }, + "additional_annot": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+(\\.gff|\\.gtf)(\\.gz)?$", + "description": "Full path to additional annotation file in GTF or GFF format.", + "fa_icon": "fas fa-file-invoice" + }, "bowtie2_index": { "type": "string", "format": "path", diff --git a/subworkflows/local/additional_annot.nf b/subworkflows/local/additional_annot.nf new file mode 100644 index 00000000..cde476e2 --- /dev/null +++ b/subworkflows/local/additional_annot.nf @@ -0,0 +1,79 @@ +// +// Run snpEff, bgzip, tabix, stats and SnpSift commands +// + +include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' +include { SNPEFF_ANN } from '../../modules/local/snpeff_ann' +include { SNPSIFT_EXTRACTFIELDS } from '../../modules/local/snpsift_extractfields' +include { VCF_BGZIP_TABIX_STATS } from './vcf_bgzip_tabix_stats' +include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/query/main' +include { MAKE_VARIANTS_LONG_TABLE as MAKE_VARIANTS_LONG_TABLE_ADDITIONAL } from '../../modules/local/make_variants_long_table' + + +workflow ADDITIONAL_ANNOT { + take: + vcf // channel: [ val(meta), [ vcf ] ] + tbi // channel: [ val(meta), [ tbi ] ] + fasta // path : genome.fasta + annot // path : additional_annot + pangolin // channel: [ val(meta), [ csv ] ] + + main: + + ch_versions = Channel.empty() + + // + // Make snpEff database + // + ch_snpeff_db = Channel.empty() + ch_snpeff_config = Channel.empty() + + SNPEFF_BUILD ( + fasta, + annot + ) + ch_snpeff_db = SNPEFF_BUILD.out.db + ch_snpeff_config = SNPEFF_BUILD.out.config + ch_versions = ch_versions.mix(SNPEFF_BUILD.out.versions) + + SNPEFF_ANN ( + vcf, + ch_snpeff_db, + ch_snpeff_config, + fasta + ) + ch_versions = ch_versions.mix(SNPEFF_ANN.out.versions.first()) + + VCF_BGZIP_TABIX_STATS ( + SNPEFF_ANN.out.vcf, + [], + [], + [] + ) + ch_versions = ch_versions.mix(VCF_BGZIP_TABIX_STATS.out.versions) + + SNPSIFT_EXTRACTFIELDS ( + VCF_BGZIP_TABIX_STATS.out.vcf + ) + ch_versions = ch_versions.mix(SNPSIFT_EXTRACTFIELDS.out.versions.first()) + + BCFTOOLS_QUERY ( + vcf.join(tbi, by: [0]), + [], + [], + [] + ) + ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions.first()) + + MAKE_VARIANTS_LONG_TABLE_ADDITIONAL ( + BCFTOOLS_QUERY.out.txt.collect{it[1]}, + SNPSIFT_EXTRACTFIELDS.out.txt.collect{it[1]}.ifEmpty([]), + pangolin.collect{it[1]}.ifEmpty([]) + ) + ch_versions = ch_versions.mix(MAKE_VARIANTS_LONG_TABLE_ADDITIONAL.out.versions) + + emit: + long_table = MAKE_VARIANTS_LONG_TABLE_ADDITIONAL.out.csv // channel: [ val(meta), [ csv ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 1c369a78..d25f4979 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -35,12 +35,13 @@ def checkPathParamList = [ params.input, params.fasta, params.gff, params.bowtie2_index, params.kraken2_db, params.primer_bed, params.primer_fasta, params.blast_db, params.spades_hmm, params.multiqc_config, - params.freyja_barcodes, params.freyja_lineages + params.freyja_barcodes, params.freyja_lineages, params.additional_annot ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet file not specified!' } -if (params.spades_hmm) { ch_spades_hmm = file(params.spades_hmm) } else { ch_spades_hmm = [] } +if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet file not specified!' } +if (params.spades_hmm) { ch_spades_hmm = file(params.spades_hmm) } else { ch_spades_hmm = [] } +if (params.additional_annot) { ch_additional_gtf = file(params.additional_annot) } else { additional_annot = [] } def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.trim().toLowerCase() } : [] @@ -84,6 +85,7 @@ include { VARIANTS_BCFTOOLS } from '../subworkflows/local/variants_bcftool include { CONSENSUS_IVAR } from '../subworkflows/local/consensus_ivar' include { CONSENSUS_BCFTOOLS } from '../subworkflows/local/consensus_bcftools' include { VARIANTS_LONG_TABLE } from '../subworkflows/local/variants_long_table' +include { ADDITIONAL_ANNOT } from '../subworkflows/local/additional_annot' include { ASSEMBLY_SPADES } from '../subworkflows/local/assembly_spades' include { ASSEMBLY_UNICYCLER } from '../subworkflows/local/assembly_unicycler' include { ASSEMBLY_MINIA } from '../subworkflows/local/assembly_minia' @@ -560,6 +562,21 @@ workflow ILLUMINA { ch_versions = ch_versions.mix(VARIANTS_LONG_TABLE.out.versions) } + // + // SUBWORKFLOW: Create variants long table report for additional annotation file + // + if (params.additional_annot) { + ADDITIONAL_ANNOT ( + ch_vcf, + ch_tbi, + PREPARE_GENOME.out.fasta, + ch_additional_gtf, + ch_pangolin_multiqc + + ) + ch_versions = ch_versions.mix(ADDITIONAL_ANNOT.out.versions) + } + // // MODULE: Primer trimming with Cutadapt //