Skip to content

Commit

Permalink
Merge branch 'single-read-raw-clean' into single-read-profile
Browse files Browse the repository at this point in the history
  • Loading branch information
simonleandergrimm committed Dec 11, 2024
2 parents 8a09b08 + 6ad3ce2 commit 11f2145
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 74 deletions.
3 changes: 0 additions & 3 deletions .github/workflows/end-to-end.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ jobs:
- name: Run run workflow
run: nf-test test --tag run --verbose

- name: Run run_dev_se workflow
run: nf-test test --tag run_dev_se --verbose

test-validation:
runs-on: ubuntu-latest
timeout-minutes: 5
Expand Down
44 changes: 24 additions & 20 deletions modules/local/truncateConcat/main.nf
Original file line number Diff line number Diff line change
@@ -1,34 +1,38 @@
// Truncate concatenated read files for trial run
process TRUNCATE_CONCAT {
process TRUNCATE_CONCAT_PAIRED {
label "single"
label "BBTools"
input:
tuple val(sample), path(reads)
val n_reads
val single_end
output:

tuple val(sample), path({
single_end ?
"${sample}_trunc.fastq.gz" :
"${sample}_trunc_{1,2}.fastq.gz"
}), emit: reads
tuple val(sample), path("${sample}_trunc_{1,2}.fastq.gz"), emit: reads
shell:
'''
echo "Number of output reads: !{n_reads}"
n_lines=$(expr !{n_reads} \\* 4)
echo "Number of output lines: ${n_lines}"
if [ $(echo "!{reads}" | wc -w) -eq 2 ]; then
echo "Processing paired-end reads"
o1=!{sample}_trunc_1.fastq.gz
o2=!{sample}_trunc_2.fastq.gz
zcat !{reads[0]} | head -n ${n_lines} | gzip -c > ${o1}
zcat !{reads[1]} | head -n ${n_lines} | gzip -c > ${o2}
else
echo "Processing single-end reads"
o=!{sample}_trunc.fastq.gz
zcat !{reads[0]} | head -n ${n_lines} | gzip -c > ${o}
fi
o1=!{sample}_trunc_1.fastq.gz
o2=!{sample}_trunc_2.fastq.gz
zcat !{reads[0]} | head -n ${n_lines} | gzip -c > ${o1}
zcat !{reads[1]} | head -n ${n_lines} | gzip -c > ${o2}
'''
}

process TRUNCATE_CONCAT_SINGLE {
label "single"
label "BBTools"
input:
tuple val(sample), path(reads)
val n_reads
output:
tuple val(sample), path("${sample}_trunc.fastq.gz"), emit: reads
shell:
'''
echo "Number of output reads: !{n_reads}"
n_lines=$(expr !{n_reads} \\* 4)
echo "Number of output lines: ${n_lines}"
o=!{sample}_trunc.fastq.gz
zcat !{reads[0]} | head -n ${n_lines} | gzip -c > ${o}
'''
}
45 changes: 45 additions & 0 deletions subworkflows/local/loadSampleSheet/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/***********
| WORKFLOW |
***********/

workflow LOAD_SAMPLESHET {
take:
sample_sheet
main:
if (params.single_end) {
if (params.grouping) {
samplesheet = Channel
.fromPath(params.sample_sheet)
.splitCsv(header: true)
.map { row -> tuple(row.sample, file(row.fastq), row.group) }
samplesheet_ch = samplesheet.map { sample, read, group -> tuple(sample, [read]) }
group_ch = samplesheet.map { sample, read, group -> tuple(sample, group) }
} else {
samplesheet = Channel
.fromPath(params.sample_sheet)
.splitCsv(header: true)
.map { row -> tuple(row.sample, file(row.fastq)) }
samplesheet_ch = samplesheet.map { sample, read -> tuple(sample, [read]) }
group_ch = Channel.empty()
}
} else {
if (params.grouping) {
samplesheet = Channel
.fromPath(params.sample_sheet)
.splitCsv(header: true)
.map { row -> tuple(row.sample, file(row.fastq_1), file(row.fastq_2), row.group) }
samplesheet_ch = samplesheet.map { sample, read1, read2, group -> tuple(sample, [read1, read2]) }
group_ch = samplesheet.map { sample, read1, read2, group -> tuple(sample, group) }
} else {
samplesheet = Channel
.fromPath(params.sample_sheet)
.splitCsv(header: true)
.map { row -> tuple(row.sample, file(row.fastq_1), file(row.fastq_2)) }
samplesheet_ch = samplesheet.map { sample, read1, read2 -> tuple(sample, [read1, read2]) }
group_ch = Channel.empty()
}
}
emit:
samplesheet = samplesheet_ch
group = group_ch
}
8 changes: 6 additions & 2 deletions subworkflows/local/raw/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
***************************/

include { QC } from "../../../subworkflows/local/qc"
include { TRUNCATE_CONCAT } from "../../../modules/local/truncateConcat"
if (params.single_end) {
include { TRUNCATE_CONCAT_SINGLE as TRUNCATE_CONCAT } from "../../../modules/local/truncateConcat"
} else {
include { TRUNCATE_CONCAT_PAIRED as TRUNCATE_CONCAT } from "../../../modules/local/truncateConcat"
}

/***********
| WORKFLOW |
Expand All @@ -23,7 +27,7 @@ workflow RAW {
single_end
main:
if ( n_reads_trunc > 0 ) {
out_ch = TRUNCATE_CONCAT(samplesheet_ch, n_reads_trunc, single_end)
out_ch = TRUNCATE_CONCAT(samplesheet_ch, n_reads_trunc)
} else {
out_ch = samplesheet_ch
}
Expand Down
22 changes: 6 additions & 16 deletions workflows/run.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ include { BLAST_VIRAL } from "../subworkflows/local/blastViral"
include { PROFILE } from "../subworkflows/local/profile"
include { PROCESS_OUTPUT } from "../subworkflows/local/processOutput"
include { EXTRACT_RAW_READS_FROM_PROCESSED } from "../modules/local/extractRawReadsFromProcessed"
include { LOAD_SAMPLESHET } from "../subworkflows/local/loadSampleSheet"
nextflow.preview.output = true

/*****************
Expand All @@ -40,22 +41,11 @@ workflow RUN {
}
}

// Prepare samplesheet
if ( params.grouping ) {
samplesheet = Channel
.fromPath(params.sample_sheet)
.splitCsv(header: true)
.map { row -> tuple(row.sample, file(row.fastq_1), file(row.fastq_2), row.group) }
samplesheet_ch = samplesheet.map { sample, read1, read2, group -> tuple(sample, [read1, read2]) }
group_ch = samplesheet.map { sample, read1, read2, group -> tuple(sample, group) }
} else {
samplesheet = Channel
.fromPath(params.sample_sheet)
.splitCsv(header: true)
.map { row -> tuple(row.sample, file(row.fastq_1), file(row.fastq_2)) }
samplesheet_ch = samplesheet.map { sample, read1, read2 -> tuple(sample, [read1, read2]) }
group_ch = Channel.empty()
}
// Load samplesheet
LOAD_SAMPLESHET(params.sample_sheet)
samplesheet_ch = LOAD_SAMPLESHET.out.samplesheet
group_ch = LOAD_SAMPLESHET.out.group

// Preprocessing
RAW(samplesheet_ch, params.n_reads_trunc, "2", "4 GB", "raw_concat", params.single_end)
CLEAN(RAW.out.reads, params.adapters, "2", "4 GB", "cleaned", params.single_end)
Expand Down
46 changes: 13 additions & 33 deletions workflows/run_dev_se.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ include { RAW } from "../subworkflows/local/raw"
include { CLEAN } from "../subworkflows/local/clean"
include { PROCESS_OUTPUT } from "../subworkflows/local/processOutput"
include { PROFILE } from "../subworkflows/local/profile"
include { LOAD_SAMPLESHET } from "../subworkflows/local/loadSampleSheet"
nextflow.preview.output = true

/*****************
Expand All @@ -25,43 +26,22 @@ workflow RUN_DEV_SE {
start_time = new Date()
start_time_str = start_time.format("YYYY-MM-dd HH:mm:ss z (Z)")

// Prepare samplesheet
if (params.single_end) {
if (params.grouping) {
samplesheet = Channel
.fromPath(params.sample_sheet)
.splitCsv(header: true)
.map { row -> tuple(row.sample, file(row.fastq), row.group) }
samplesheet_ch = samplesheet.map { sample, read, group -> tuple(sample, [read]) }
group_ch = samplesheet.map { sample, read, group -> tuple(sample, group) }
} else {
samplesheet = Channel
.fromPath(params.sample_sheet)
.splitCsv(header: true)
.map { row -> tuple(row.sample, file(row.fastq)) }
samplesheet_ch = samplesheet.map { sample, read -> tuple(sample, [read]) }
group_ch = Channel.empty()
}
} else {
if (params.grouping) {
samplesheet = Channel
.fromPath(params.sample_sheet)
.splitCsv(header: true)
.map { row -> tuple(row.sample, file(row.fastq_1), file(row.fastq_2), row.group) }
samplesheet_ch = samplesheet.map { sample, read1, read2, group -> tuple(sample, [read1, read2]) }
group_ch = samplesheet.map { sample, read1, read2, group -> tuple(sample, group) }
} else {
samplesheet = Channel
.fromPath(params.sample_sheet)
.splitCsv(header: true)
.map { row -> tuple(row.sample, file(row.fastq_1), file(row.fastq_2)) }
samplesheet_ch = samplesheet.map { sample, read1, read2 -> tuple(sample, [read1, read2]) }
group_ch = Channel.empty()
}
// Check if grouping column exists in samplesheet
check_grouping = new File(params.sample_sheet).text.readLines()[0].contains('group') ? true : false
if (params.grouping != check_grouping) {
if (params.grouping && !check_grouping) {
throw new Exception("Grouping enabled in config file, but group column absent from samplesheet.")
} else if (!params.grouping && check_grouping) {
throw new Exception("Grouping is not enabled in config file, but group column is present in the samplesheet.")
}
// Prepare Kraken DB
kraken_db_path = "${params.ref_dir}/results/kraken_db"
}

// Load samplesheet
LOAD_SAMPLESHET(params.sample_sheet)
samplesheet_ch = LOAD_SAMPLESHET.out.samplesheet
group_ch = LOAD_SAMPLESHET.out.group

// Preprocessing
RAW(samplesheet_ch, params.n_reads_trunc, "2", "4 GB", "raw_concat", params.single_end)
Expand Down

0 comments on commit 11f2145

Please sign in to comment.