diff --git a/GATK/4.2.1.0/GenotypeGvcfs.nf b/GATK/4.2.1.0/GenotypeGvcfs.nf index 5dfeba7e..7174d6ec 100644 --- a/GATK/4.2.1.0/GenotypeGvcfs.nf +++ b/GATK/4.2.1.0/GenotypeGvcfs.nf @@ -11,8 +11,8 @@ process GenotypeGVCFs { output: tuple( analysis_id, - path("${analysis_id}_${interval_file.baseName}${ext_vcf}"), - path("${analysis_id}_${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), + path("${analysis_id}_${interval_file.simpleName}${ext_vcf}"), + path("${analysis_id}_${interval_file.simpleName}${ext_vcf}${ext_vcf_index}"), emit:vcf_file ) @@ -24,14 +24,14 @@ process GenotypeGVCFs { gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \ --reference ${params.genome} \ --variant $input_files \ - --output ${analysis_id}_${interval_file.baseName}${ext_vcf} \ + --output ${analysis_id}_${interval_file.simpleName}${ext_vcf} \ --intervals ${interval_file} \ ${params.optional} """ } process GenotypeGVCF { - tag {"GATK GenotypeGVCF ${sample_id} - ${interval_file.baseName}"} + tag {"GATK GenotypeGVCF ${sample_id} - ${interval_file.simpleName}"} label 'GATK_4_2_1_0' label 'GATK_4_2_1_0_GenotypeGVCF' container = 'broadinstitute/gatk:4.2.1.0' @@ -43,8 +43,8 @@ process GenotypeGVCF { output: tuple( val(sample_id), - path("${sample_id}_${interval_file.baseName}${ext_vcf}"), - path("${sample_id}_${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), + path("${sample_id}_${interval_file.simpleName}${ext_vcf}"), + path("${sample_id}_${interval_file.simpleName}${ext_vcf}${ext_vcf_index}"), emit: vcf_file ) @@ -56,7 +56,7 @@ process GenotypeGVCF { gatk --java-options "-Xmx${task.memory.toGiga()-4}G" GenotypeGVCFs \ --reference ${params.genome} \ --variant $input_files \ - --output ${sample_id}_${interval_file.baseName}${ext_vcf} \ + --output ${sample_id}_${interval_file.simpleName}${ext_vcf} \ --intervals ${interval_file} \ ${params.optional} """ diff --git a/GATK/4.2.1.0/HaplotypeCaller.nf b/GATK/4.2.1.0/HaplotypeCaller.nf index 0cad1bec..638ce33a 100644 --- a/GATK/4.2.1.0/HaplotypeCaller.nf +++ b/GATK/4.2.1.0/HaplotypeCaller.nf @@ -11,24 +11,21 @@ process HaplotypeCaller { output: tuple( val(analysis_id), - path("${analysis_id}.${interval_file.baseName}${ext_vcf}"), - path("${analysis_id}.${interval_file.baseName}${ext_vcf}${ext_vcf_index}"), + path("${analysis_id}.${interval_file.simpleName}${ext_vcf}"), + path("${analysis_id}.${interval_file.simpleName}${ext_vcf}${ext_vcf_index}"), emit: vcf_file ) script: def input_files = bam_files.collect{"$it"}.join(" --input ") - ext_vcf = ".vcf" - ext_vcf_index = ".idx" - if( params.compress ) - ext_vcf = ".vcf.gz" - ext_vcf_index = ".tbi" + ext_vcf = params.compress ? ".vcf.gz" : ".vcf" + ext_vcf_index = params.compress ? ".tbi" : ".idx" """ gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \ --reference ${params.genome} \ --input ${input_files} \ --intervals ${interval_file} \ - --output ${analysis_id}.${interval_file.baseName}${ext_vcf} \ + --output ${analysis_id}.${interval_file.simpleName}${ext_vcf} \ ${params.optional} """ } @@ -47,24 +44,21 @@ process HaplotypeCallerGVCF { output: tuple( val(sample_id), - path("${sample_id}_${interval_file.baseName}${ext_gvcf}"), - path("${sample_id}_${interval_file.baseName}${ext_gvcf}${ext_gvcf_index}"), + path("${sample_id}_${interval_file.simpleName}${ext_gvcf}"), + path("${sample_id}_${interval_file.simpleName}${ext_gvcf}${ext_gvcf_index}"), path(interval_file), emit: vcf_file ) script: - ext_gvcf = ".g.vcf" - ext_gvcf_index = ".idx" - if( params.compress ) - ext_gvcf = ".g.vcf.gz" - ext_gvcf_index = ".tbi" + ext_gvcf = params.compress ? ".g.vcf.gz" : ".g.vcf" + ext_gvcf_index = params.compress ? ".tbi" : ".idx" """ gatk --java-options "-Xmx${task.memory.toGiga()-4}G" HaplotypeCaller \ --reference ${params.genome} \ --input ${bam_file} \ --intervals ${interval_file} \ - --output ${sample_id}_${interval_file.baseName}${ext_gvcf} \ + --output ${sample_id}_${interval_file.simpleName}${ext_gvcf} \ --emit-ref-confidence ${params.emit_ref_confidence} \ ${params.optional} """ diff --git a/GATK/4.2.1.0/MergeVcfs.nf b/GATK/4.2.1.0/MergeVcfs.nf index acb683c7..426dbc9a 100644 --- a/GATK/4.2.1.0/MergeVcfs.nf +++ b/GATK/4.2.1.0/MergeVcfs.nf @@ -13,11 +13,8 @@ process MergeVcfs { script: def input_files = vcf_files.collect{"$it"}.join(" --INPUT ") - ext_vcf = ".vcf" - ext_vcf_index = ".idx" - if( params.compress ) - ext_vcf = ".vcf.gz" - ext_vcf_index = ".tbi" + ext_vcf = params.compress ? ".vcf.gz" : ".vcf" + ext_vcf_index = params.compress ? ".tbi" : ".idx" """ gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}${ext_vcf} """ @@ -39,11 +36,8 @@ process MergeGvcfs { script: def input_files = vcf_files.collect{"$it"}.join(" --INPUT ") - ext_gvcf = ".g.vcf" - ext_gvcf_index = ".idx" - if( params.compress ) - ext_gvcf = ".g.vcf.gz" - ext_gvcf_index = ".tbi" + ext_gvcf = params.compress ? ".g.vcf.gz" : ".g.vcf" + ext_gvcf_index = params.compress ? ".tbi" : ".idx" """ gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${input_files} --OUTPUT ${output_name}${ext_gvcf} """ diff --git a/GATK/4.2.1.0/SelectVariants.nf b/GATK/4.2.1.0/SelectVariants.nf index 20036edb..f7ae7121 100644 --- a/GATK/4.2.1.0/SelectVariants.nf +++ b/GATK/4.2.1.0/SelectVariants.nf @@ -11,8 +11,8 @@ process SelectVariantsSample { output: tuple( sample_id, - path("${sample_id}_${vcf_file.baseName}${ext_vcf}"), - path("${sample_id}_${vcf_file.baseName}${ext_vcf}${ext_vcf_index}"), + path("${sample_id}_${vcf_file.simpleName}${ext_vcf}"), + path("${sample_id}_${vcf_file.simpleName}${ext_vcf}${ext_vcf_index}"), emit: vcf_file ) @@ -23,7 +23,7 @@ process SelectVariantsSample { gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants \ --reference ${params.genome} \ --variant ${vcf_file} \ - --output ${sample_id}_${vcf_file.baseName}${ext_vcf} \ + --output ${sample_id}_${vcf_file.simpleName}${ext_vcf} \ --sample-name ${sample_id} \ ${params.optional} """ diff --git a/GATK/4.2.1.0/VariantFiltration.nf b/GATK/4.2.1.0/VariantFiltration.nf index 90e75772..aa511584 100644 --- a/GATK/4.2.1.0/VariantFiltration.nf +++ b/GATK/4.2.1.0/VariantFiltration.nf @@ -9,18 +9,40 @@ process VariantFiltrationSnpIndel { tuple(analysis_id, path(vcf_file), path(vcf_idx_file)) output: - tuple(analysis_id, path("${vcf_file.baseName}.filter${ext_vcf}"), path("${vcf_file.baseName}.filter${ext_vcf}${ext_vcf_index}"), emit: vcf_file) + tuple(analysis_id, path("${vcf_file.simpleName}.filter${ext_vcf}"), path("${vcf_file.simpleName}.filter${ext_vcf}${ext_vcf_index}"), emit: vcf_file) script: ext_vcf = params.compress || vcf_file.getExtension() == ".gz" ? ".vcf.gz" : ".vcf" ext_vcf_index = params.compress || vcf_file.getExtension() == ".gz" ? ".tbi" : ".idx" """ - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.snp${ext_vcf} --select-type-to-exclude INDEL - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants --reference ${params.genome} --variant $vcf_file --output ${vcf_file.baseName}.indel${ext_vcf} --select-type-to-include INDEL + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants \ + --reference ${params.genome} \ + --variant $vcf_file \ + --output ${vcf_file.simpleName}.snp${ext_vcf} \ + --select-type-to-exclude INDEL - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.snp${ext_vcf} --output ${vcf_file.baseName}.snp_filter${ext_vcf} ${params.snp_filter} ${params.snp_cluster} - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration --reference ${params.genome} --variant ${vcf_file.baseName}.indel${ext_vcf} --output ${vcf_file.baseName}.indel_filter${ext_vcf} ${params.indel_filter} + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" SelectVariants \ + --reference ${params.genome} \ + --variant $vcf_file \ + --output ${vcf_file.simpleName}.indel${ext_vcf} \ + --select-type-to-include INDEL - gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs --INPUT ${vcf_file.baseName}.snp_filter${ext_vcf} --INPUT ${vcf_file.baseName}.indel_filter${ext_vcf} --OUTPUT ${vcf_file.baseName}.filter${ext_vcf} + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration \ + --reference ${params.genome} \ + --variant ${vcf_file.simpleName}.snp${ext_vcf} \ + --output ${vcf_file.simpleName}.snp_filter${ext_vcf} \ + ${params.snp_filter} \ + ${params.snp_cluster} + + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" VariantFiltration \ + --reference ${params.genome} \ + --variant ${vcf_file.simpleName}.indel${ext_vcf} \ + --output ${vcf_file.simpleName}.indel_filter${ext_vcf} \ + ${params.indel_filter} + + gatk --java-options "-Xmx${task.memory.toGiga()-4}G" MergeVcfs \ + --INPUT ${vcf_file.simpleName}.snp_filter${ext_vcf} \ + --INPUT ${vcf_file.simpleName}.indel_filter${ext_vcf} \ + --OUTPUT ${vcf_file.simpleName}.filter${ext_vcf} """ }