Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates #217

Merged
merged 26 commits into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
1847de9
Update version
chris-cheshire Nov 7, 2023
ae11418
Run multiqc when running some only keywords
chris-cheshire Nov 7, 2023
054e040
Improve igg ctrl matching with target samples
chris-cheshire Nov 8, 2023
ad99e9f
Update version
chris-cheshire Nov 8, 2023
bb59ea5
Add option to change sample grouping in igv output
chris-cheshire Nov 11, 2023
1cea405
Add ability to dump scale factor values out of the pipeline
chris-cheshire Nov 11, 2023
55dac76
Update scale factor dump
chris-cheshire Nov 11, 2023
1575fd0
Update logging
chris-cheshire Nov 11, 2023
81baaf1
Auto scale igv groups together
chris-cheshire Nov 11, 2023
448b192
Group samples on group in igv
chris-cheshire Nov 11, 2023
f0a800a
Update metadata
chris-cheshire Nov 28, 2023
d4fa90d
Update igg control mapping
chris-cheshire Jan 31, 2024
6970c85
black updating
chris-cheshire Jan 31, 2024
a6bdc2c
Update for tech reps
chris-cheshire Jan 31, 2024
19ebcc5
Merge remote-tracking branch 'nf-core/nf-core-template-merge-2.12' in…
chris-cheshire Feb 1, 2024
cbf9cf8
Update schema
chris-cheshire Feb 1, 2024
7e779ef
Prettier formating
chris-cheshire Feb 1, 2024
0d50fff
Update template
chris-cheshire Feb 1, 2024
f518d1e
Update schema
chris-cheshire Feb 1, 2024
80a3e69
Update local versions
chris-cheshire Feb 1, 2024
4238a79
Update schema
chris-cheshire Feb 1, 2024
3bdd809
Update versions
chris-cheshire Feb 1, 2024
da78f72
Update changelog
chris-cheshire Feb 1, 2024
957b7ab
Merge branch 'template-merge' into dev
chris-cheshire Feb 1, 2024
037a62b
Whitespace
chris-cheshire Feb 1, 2024
4407282
whitespace
chris-cheshire Feb 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
report_comment: >

This report has been generated by the <a href="https://github.com/nf-core/cutandrun/releases/tag/3.2.1" target="_blank">nf-core/cutandrun</a>
This report has been generated by the <a href="https://github.com/nf-core/cutandrun/releases/tag/3.2.2" target="_blank">nf-core/cutandrun</a>
analysis pipeline. For information about how to interpret these results, please see the
<a href="https://nf-co.re/cutandrun/3.2.1/docs/output" target="_blank">documentation</a>.
<a href="https://nf-co.re/cutandrun/3.2.2/docs/output" target="_blank">documentation</a>.

export_plots: true

Expand Down
31 changes: 30 additions & 1 deletion bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,34 @@ def check_samplesheet(file_in, file_out, use_control):
"WARNING: Parameter --use_control was set to false, but an control group was found in " + str(file_in) + "."
)

# Calculate the exact control/replicate id combo
if use_control == "true":
for group, reps in sorted(sample_run_dict.items()):
# Calculate the ctrl group
ctrl_group = None
is_ctrl = False
for replicate, info in sorted(reps.items()):
ctrl_group = info[0][2]
if ctrl_group == "":
is_ctrl = True
break

# Continue if ctrl
if is_ctrl:
continue

# Get num reps
num_reps = len(reps)
num_ctrl_reps = len(sample_run_dict[ctrl_group])

# Assign actual ctrl rep id
for rep, info in sorted(reps.items()):
if num_reps == num_ctrl_reps:
ctrl_group_new = ctrl_group + "_" + str(rep)
else:
ctrl_group_new = ctrl_group + "_1"
info[0][2] = ctrl_group_new

## Write validated samplesheet with appropriate columns
if len(sample_run_dict) > 0:
out_dir = os.path.dirname(file_out)
Expand All @@ -267,7 +295,8 @@ def check_samplesheet(file_in, file_out, use_control):
check_group = sample_run_dict[sample][replicate][0][2]
for tech_rep in sample_run_dict[sample][replicate]:
if tech_rep[2] != check_group:
print_error("Control group must match within technical replicates", tech_rep[2])
tech_rep[2] = check_group
# print_error("Control group must match within technical replicates", tech_rep[2])

## Write to file
for idx, sample_info in enumerate(sample_run_dict[sample][replicate]):
Expand Down
13 changes: 11 additions & 2 deletions bin/igv_files_to_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,15 @@ def igv_files_to_session(XMLOut, ListFile, Genome, GtfBed, PathPrefix=""):
break
fout.close()

## Construct groups
groups = {}
group_num = 1
for ifile, colour in fileList:
group = os.path.basename(ifile).split("_R")[0]
if group not in groups:
groups[group] = group_num
group_num = group_num + 1

## ADD RESOURCES SECTION
XMLStr = '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n'
XMLStr += '<Session genome="%s" hasGeneTrack="true" hasSequenceTrack="true" locus="All" version="8">\n' % (Genome)
Expand Down Expand Up @@ -159,8 +168,8 @@ def igv_files_to_session(XMLOut, ListFile, Genome, GtfBed, PathPrefix=""):
)
elif extension in [".bw", ".bigwig", ".tdf", ".bedGraph", ".bedgraph"]:
XMLStr += (
'\t\t<Track altColor="0,0,178" autoScale="true" clazz="org.broad.igv.track.DataSourceTrack" color="%s" '
% (colour)
'\t\t<Track altColor="0,0,178" autoScale="true" autoscaleGroup="%s" clazz="org.broad.igv.track.DataSourceTrack" color="%s" '
% (groups[os.path.basename(ifile).split("_R")[0]], colour)
)
XMLStr += 'displayMode="COLLAPSED" featureVisibilityWindow="-1" fontSize="12" height="100" '
XMLStr += (
Expand Down
4 changes: 2 additions & 2 deletions conf/flowswitch.config
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ if(params.only_preqc) {
params.run_remove_linear_dups = false
params.run_peak_calling = false
params.run_reporting = false
params.run_multiqc = false
params.run_multiqc = true
}

if(params.only_alignment) {
Expand All @@ -99,7 +99,7 @@ if(params.only_alignment) {
params.run_remove_linear_dups = false
params.run_peak_calling = false
params.run_reporting = false
params.run_multiqc = false
params.run_multiqc = true
}

if(params.only_filtering) {
Expand Down
14 changes: 11 additions & 3 deletions modules/local/python/igv_session.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ process IGV_SESSION {
path beds
path secondary_beds
path bigwig
val sort_by_groups

output:
path('*.{txt,xml,bed,bigWig,fa,fai,fna,gtf,gff,narrowPeak,broadPeak,gz,tbi,bedGraph}', includeInputs:true)
Expand All @@ -27,10 +28,17 @@ process IGV_SESSION {
output = ''
colours = [:]
colour_pos = 0
file_list = []

if(sort_by_groups) {
file_list = beds.collect{it.toString()}.sort()
file_list += secondary_beds.collect{it.toString()}.sort()
file_list += bigwig.collect{it.toString()}.sort()
}
else {
file_list = (bigwig + secondary_beds + beds).collect{ it.toString() }.sort()
}

file_list = beds.collect{it.toString()}.sort()
file_list += secondary_beds.collect{it.toString()}.sort()
file_list += bigwig.collect{it.toString()}.sort()
for(file in file_list){
file_split = file.split('_R')
group = file_split[0]
Expand Down
4 changes: 3 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,9 @@ params {
skip_peak_qc = false
skip_preseq = false
skip_multiqc = false
dump_scale_factors = false
igv_show_gene_names = true
igv_sort_by_groups = true
min_frip_overlap = 0.20
min_peak_overlap = 0.20

Expand Down Expand Up @@ -339,7 +341,7 @@ manifest {
description = """Analysis pipeline for CUT&RUN and CUT&TAG experiments that includes sequencing QC, spike-in normalisation, IgG control normalisation, peak calling and downstream peak analysis."""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
version = '3.2.1'
version = '3.2.2'
doi = 'https://doi.org/10.5281/zenodo.5653535'
}

Expand Down
4 changes: 2 additions & 2 deletions subworkflows/local/prepare_genome.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ workflow PREPARE_GENOME {
ch_fasta = GUNZIP_FASTA ( [ [id:"target_fasta"], params.fasta ] ).gunzip
ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions)
} else {
ch_fasta = Channel.from( file(params.fasta) ).map { row -> [[id:"spikein_fasta"], row] }
ch_fasta = Channel.from( file(params.fasta) ).map { row -> [[id:"fasta"], row] }
}

/*
Expand Down Expand Up @@ -116,7 +116,7 @@ workflow PREPARE_GENOME {
/*
* Index genome fasta file
*/
ch_fasta_index = SAMTOOLS_FAIDX ( ch_fasta, [[id:"spikein_fasta"], []] ).fai
ch_fasta_index = SAMTOOLS_FAIDX ( ch_fasta, [[id:"fasta"], []] ).fai
ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions)

/*
Expand Down
34 changes: 34 additions & 0 deletions subworkflows/local/prepare_peakcalling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,23 @@ workflow PREPARE_PEAKCALLING {
ch_bedgraph = BEDTOOLS_GENOMECOV.out.genomecov
//EXAMPLE CHANNEL STRUCT: [META], BEDGRAPH]
//BEDTOOLS_GENOMECOV.out.genomecov | view

/*
* CHANNEL: Dump scale factor values
*/
if(params.dump_scale_factors) {
ch_scale_factor = ch_bam_scale_factor
.map { [it[0].id, it[0].group, it[2]] }
.toSortedList( { a, b -> a[0] <=> b[0] } )
.map { list ->
new File('scale-factors.csv').withWriter('UTF-8') { writer ->
list.each { item ->
str = item[0] + "," + item[1] + "," + item[2]
writer.write(str + "\n")
}
}
}
}
} else {
/*
* CHANNEL: Combine bam and bai files on id
Expand Down Expand Up @@ -130,6 +147,23 @@ workflow PREPARE_PEAKCALLING {
ch_bedgraph = DEEPTOOLS_BAMCOVERAGE.out.bedgraph
// EXAMPLE CHANNEL STRUCT: [[META], BAM, BAI]
//ch_bedgraph | view

/*
* CHANNEL: Dump scale factor values
*/
if(params.dump_scale_factors) {
ch_scale_factor = ch_bam_bai_scale_factor
.map { [it[0].id, it[0].group, it[3]] }
.toSortedList( { a, b -> a[0] <=> b[0] } )
.map { list ->
new File('scale-factors.csv').withWriter('UTF-8') { writer ->
list.each { item ->
str = item[0] + "," + item[1] + "," + item[2]
writer.write(str + "\n")
}
}
}
}
}

/*
Expand Down
56 changes: 15 additions & 41 deletions workflows/cutandrun.nf
Original file line number Diff line number Diff line change
Expand Up @@ -483,31 +483,17 @@ workflow CUTANDRUN {
* MODULE: Call peaks using SEACR with IgG control
*/
if('seacr' in callers) {
/*
* CHANNEL: Subset control groups
*/
ch_bedgraph_target.map{
row -> [row[0].control_group, row]
}
.set { ch_bg_target_ctrlgrp }
//ch_bg_target_ctrlgrp | view

ch_bedgraph_control.map{
row -> [row[0].control_group, row]
}
.set { ch_bg_control_ctrlgrp }
//ch_bg_control_ctrlgrp | view

/*
* CHANNEL: Create target/control pairings
*/
// Create pairs of controls (IgG) with target samples if they are supplied
ch_bg_control_ctrlgrp.cross(ch_bg_target_ctrlgrp).map {
row -> [row[1][1][0], row[1][1][1], row[0][1][1]]
ch_bedgraph_control.map{ row -> [row[0].control_group + "_" + row[0].replicate, row] }
.cross( ch_bedgraph_target.map{ row -> [row[0].control_group, row] } )
.map {
row ->
[ row[1][1][0], row[1][1][1], row[0][1][1] ]
}
.set{ ch_bedgraph_paired }
.set { ch_bedgraph_paired }
// EXAMPLE CHANNEL STRUCT: [[META], TARGET_BEDGRAPH, CONTROL_BEDGRAPH]
//ch_bedgraph_paired | view

SEACR_CALLPEAK_IGG (
ch_bedgraph_paired,
Expand All @@ -520,31 +506,18 @@ workflow CUTANDRUN {
}

if('macs2' in callers) {
/*
* CHANNEL: Split control groups
*/
ch_bam_target.map{
row -> [row[0].control_group, row]
}
.set { ch_bam_target_ctrlgrp }
//ch_bam_target_ctrlgrp | view

ch_bam_control.map{
row -> [row[0].control_group, row]
}
.set { ch_bam_control_ctrlgrp }
// ch_bam_control_ctrlgrp | view

/*
* CHANNEL: Create target/control pairings
*/
// Create pairs of controls (IgG) with target samples if they are supplied
ch_bam_control_ctrlgrp.cross(ch_bam_target_ctrlgrp).map{
row -> [row[1][1][0], row[1][1][1], row[0][1][1]]
ch_bam_control.map{ row -> [row[0].control_group + "_" + row[0].replicate, row] }
.cross( ch_bam_target.map{ row -> [row[0].control_group, row] } )
.map {
row ->
[ row[1][1][0], row[1][1][1], row[0][1][1] ]
}
.set{ ch_bam_paired }
.set { ch_bam_paired }
// EXAMPLE CHANNEL STRUCT: [[META], TARGET_BAM, CONTROL_BAM]
// ch_bam_paired | view
//ch_bam_paired | view

MACS2_CALLPEAK_IGG (
ch_bam_paired,
Expand Down Expand Up @@ -725,7 +698,8 @@ workflow CUTANDRUN {
//PREPARE_GENOME.out.gtf.collect(),
ch_peaks_primary.collect{it[1]}.filter{ it -> it.size() > 1}.ifEmpty([]),
ch_peaks_secondary.collect{it[1]}.filter{ it -> it.size() > 1}.ifEmpty([]),
ch_bigwig.collect{it[1]}.ifEmpty([])
ch_bigwig.collect{it[1]}.ifEmpty([]),
params.igv_sort_by_groups
)
//ch_software_versions = ch_software_versions.mix(IGV_SESSION.out.versions)
}
Expand Down
Loading