Skip to content

Commit

Permalink
Merge pull request #6 from nf-core/motif-files
Browse files Browse the repository at this point in the history
Simplify handling of motif files
  • Loading branch information
nictru authored May 30, 2024
2 parents 9c629d9 + 2656f79 commit 31b5c43
Show file tree
Hide file tree
Showing 22 changed files with 474 additions and 466 deletions.
272 changes: 38 additions & 234 deletions conf/igenomes.config

Large diffs are not rendered by default.

37 changes: 25 additions & 12 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@ include { PREPARE_GENOME } from './subworkflows/local/prepare_genome'
params.fasta = getGenomeAttribute('fasta')
params.gtf = getGenomeAttribute('gtf')
params.blacklist = getGenomeAttribute('blacklist')
params.pwms = getGenomeAttribute('pwms')
params.taxon_id = getGenomeAttribute('taxon_id')

if (!params.motifs && !params.taxon_id) {
error "Please provide either a motifs file or a taxon ID"
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -55,11 +59,12 @@ workflow NFCORE_TFACTIVITY {

ch_versions = Channel.empty()

ch_fasta = Channel.value(file(params.fasta))
ch_gtf = Channel.value(file(params.gtf))
ch_blacklist = Channel.value(file(params.blacklist))
ch_pwms = Channel.value(file(params.pwms))
ch_counts = Channel.value(file(params.counts))
ch_fasta = Channel.value(file(params.fasta, checkIfExists: true))
ch_gtf = Channel.value(file(params.gtf, checkIfExists: true))
ch_blacklist = params.blacklist ? Channel.value(file(params.blacklist, checkIfExists: true)) : Channel.value([])
ch_motifs = params.motifs ? Channel.value(file(params.motifs, checkIfExists: true)) : Channel.empty()
ch_counts = Channel.value(file(params.counts, checkIfExists: true))
ch_taxon_id = (!params.motifs && params.taxon_id) ? Channel.value(params.taxon_id) : Channel.empty()

//
// SUBWORKFLOW: Prepare genome
Expand All @@ -81,34 +86,42 @@ workflow NFCORE_TFACTIVITY {
PREPARE_GENOME.out.fasta,
PREPARE_GENOME.out.gtf,
ch_blacklist,
ch_pwms,
ch_motifs,
ch_taxon_id,
PREPARE_GENOME.out.gene_lengths,
PREPARE_GENOME.out.gene_map,
ch_counts,
ch_extra_counts,
Channel.value(file(params.counts_design, checkIfExists: true))
.map{ design -> [[id: "design"], design]},
samplesheet_bam,
PREPARE_GENOME.out.chrom_sizes,

// ChromHMM
samplesheet_bam,
params.chromhmm_states,
params.chromhmm_threshold,
params.chromhmm_marks.split(','),

// Peaks
params.window_size,
params.decay,
params.merge_samples,
params.affinity_aggregation,

// Counts
ch_counts,
ch_extra_counts,
Channel.value(file(params.counts_design, checkIfExists: true))
.map{ design -> [[id: "design"], design]},
params.min_count,
params.min_tpm,
params.expression_aggregation,
params.min_count_tf,
params.min_tpm_tf,

// Dynamite
params.dynamite_ofolds,
params.dynamite_ifolds,
params.dynamite_alpha,
params.dynamite_randomize,

// Ranking
params.alpha,

ch_versions
Expand Down
17 changes: 10 additions & 7 deletions modules/local/fimo/filter_motifs/main.nf
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
process FILTER_MOTIFS {

conda 'conda-forge::python==3.9.5'
conda "conda-forge::pandas==1.5.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.9--1':
'biocontainers/python:3.9--1' }"
'https://depot.galaxyproject.org/singularity/pandas:1.5.2':
'biocontainers/pandas:1.5.2' }"

input:
tuple val(meta), path(tfs_jaspar_ids)
path jaspar_motifs
tuple val(meta2), path(meme_motifs)

output:
tuple val(meta), path("sign_motifs/*.meme"), emit: motifs
path "versions.yml", emit: versions
tuple val(meta), path("motifs/*.meme"), emit: motifs
path "versions.yml", emit: versions

script:
template "filter_motifs.py"

stub:
"""
touch motifs.meme
mkdir motifs
touch motifs/MA0778.1.meme
touch motifs/MA0938.3.meme
touch motifs/MA1272.1.meme
"""
}
71 changes: 57 additions & 14 deletions modules/local/fimo/filter_motifs/templates/filter_motifs.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,41 @@
#!/usr/bin/env python3

from os import mkdir
from os.path import exists
from shutil import copy
import pandas as pd
import platform
from collections import defaultdict


def parse_meme_file(path_meme_file):
with open(path_meme_file, "r") as f:
meme_file = f.read()

lines = meme_file.split('\\n')
header = []
meme_to_matrix = {}
symbol_to_meme = defaultdict(set)
current_motif = []
current_motif_meme = ""
is_header = True

for line in lines:
if line.startswith("MOTIF"):
# List not empty -> not first motif
if current_motif:
meme_to_matrix[current_motif_meme] = '\\n'.join(header + current_motif)
current_motif = []
current_motif_meme, current_motif_symbol = line.split()[1:3]
symbol_to_meme[current_motif_symbol].add(current_motif_meme)
is_header = False
if is_header:
header.append(line)
else:
current_motif.append(line)

if current_motif:
meme_to_matrix[current_motif_meme] = '\\n'.join(header + current_motif)

return meme_to_matrix, symbol_to_meme


def format_yaml_like(data: dict, indent: int = 0) -> str:
Expand All @@ -26,28 +58,39 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
return yaml_str


tfs_jaspar_ids = "${tfs_jaspar_ids}"
jaspar_motifs = "${jaspar_motifs}"
tfs_ranking_file = '${tfs_jaspar_ids}'
path_meme_file = '${meme_motifs}'


# Read differentially expressed (DE) transcription factors (TF)
with open(tfs_jaspar_ids, "r") as f:
tfs_jaspar_ids = f.read().split('\\n')
# Parse tfs_ranking
tfs_ranking = pd.read_csv(tfs_ranking_file, sep='\\t', index_col=0).index.tolist()

# Create directory for significant motif files
mkdir("sign_motifs")
# Parse meme file
meme_to_matrix, symbol_to_meme = parse_meme_file(path_meme_file)

# Iterate over TFs and store meme files for DE TFs
for jaspar_id in tfs_jaspar_ids:
if exists(f"jaspar_motifs/{jaspar_id}.meme"):
copy(f"jaspar_motifs/{jaspar_id}.meme", f"sign_motifs/{jaspar_id}.meme")
mkdir('motifs')
for symbol in tfs_ranking:
if symbol not in symbol_to_meme:
# Check if symbol without version is in dictionary
base_symbol = symbol.split('.')[0]
if base_symbol not in symbol_to_meme:
print(f'Symbol {symbol} not found')
continue
# Remove version from symbol
symbol = base_symbol
for meme_id in symbol_to_meme[symbol]:
with open(f'motifs/{meme_id}.meme', 'w') as f:
f.write(meme_to_matrix[meme_id])


# Create version file
versions = {
"${task.process}" : {
"python": platform.python_version()
"python": platform.python_version(),
"pandas": pd.__version__,
}
}

# Write version file
with open("versions.yml", "w") as f:
f.write(format_yaml_like(versions))
34 changes: 0 additions & 34 deletions modules/local/fimo/jaspar_download/main.nf

This file was deleted.

24 changes: 0 additions & 24 deletions modules/local/fimo/jaspar_mapping/main.nf

This file was deleted.

63 changes: 0 additions & 63 deletions modules/local/fimo/jaspar_mapping/templates/jaspar_mapping.py

This file was deleted.

21 changes: 21 additions & 0 deletions modules/local/motifs/convert_motifs/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
process CONVERT_MOTIFS {
tag "$meta.id"
label "process_single"

conda "bioconda:bioconductor-universalmotif==1.20.0--r43hf17093f_0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-universalmotif:1.20.0--r43hf17093f_0':
'biocontainers/bioconductor-universalmotif:1.20.0--r43hf17093f_0' }"

input:
tuple val(meta), path(in_file), val(in_type)
val(out_type)

output:
tuple val(meta), path("${out_file}"), emit: converted
path "versions.yml" , emit: versions

script:
out_file = "${meta.id}.converted.${out_type}"
template "convert.R"
}
Loading

0 comments on commit 31b5c43

Please sign in to comment.