diff --git a/conf/igenomes.config b/conf/igenomes.config index e93d58b..757f4bc 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -24,7 +24,6 @@ params { macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt" - rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/hg19_refseq.ucsc" } 'GRCh38' { fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" @@ -38,7 +37,6 @@ params { macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt" - rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/hg38_refseq.ucsc" } 'CHM13' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" @@ -61,7 +59,6 @@ params { macs_gsize = "1.87e9" blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_mouse_PSEMs.txt" - rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/mm10_refseq.ucsc" } 'TAIR10' { fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" @@ -297,7 +294,6 @@ params { macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt" - rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/hg38_refseq.ucsc" } 'hg19' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" @@ -312,7 +308,6 @@ params { macs_gsize = "2.7e9" blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt" - rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/hg19_refseq.ucsc" } 'mm10' { fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" @@ -327,7 +322,6 @@ params { macs_gsize = "1.87e9" blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_mouse_PSEMs.txt" - rose_ucsc = "https://raw.githubusercontent.com/stjude/ROSE/master/annotation/mm10_refseq.ucsc" } 'bosTau8' { fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" diff --git a/conf/modules.config b/conf/modules.config index 7839d96..d8bfef0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -23,7 +23,7 @@ process { } withName: CLEAN_BED { - ext.args = {"'{print \$1 \"\\t\" \$2 \"\\t\" \$3}'"} + ext.args = {"'{print \$1 \"\\t\" \$2 \"\\t\" \$3 \"\\t\" \$4 \"\\t\" \$5 \"\\t\" \$6}'"} ext.prefix = {"${meta.id}.clean"} ext.suffix = "bed" } @@ -62,22 +62,8 @@ process { ext.prefix = {"${meta.id}_control"} } - withName: BED_TO_GFF { - ext.args = {"'BEGIN{FS=\"\\t\";OFS=\"\\t\"} { print \$1, \"bed2gff\", \"region\", \$2+1, \$3, \".\", \".\", \".\", \".\"}'"} - ext.prefix = {"$meta.id"} - ext.suffix = "gff" - } - - withName: REFORMAT_GFF { - ext.args = {"'BEGIN{FS=\"\\t\";OFS=\"\\t\"} {if(!match(\$1, /^chr/)) \$1=\"chr\"\$1; \$2=\"seq_\"NR; print \$1, \$2, \"\", \$4, \$5, \"\", \$7, \"\", \$2}'"} - ext.prefix = {"${meta.id}_reformatted"} - ext.suffix = "gff" - } - - withName: ROSE_OUTPUT_TO_BED { - ext.args = {"'BEGIN{FS=\"\\t\";OFS=\"\\t\"} {print \$1, \$4-1, \$5}'"} - ext.prefix = {"$meta.id"} - ext.suffix = "bed" + withName: UCSC_GTFTOGENEPRED { + ext.args = "-genePredExt" } withName: ".*DYNAMITE:FILTER" { diff --git a/main.nf b/main.nf index a456e21..26edcd4 100644 --- a/main.nf +++ b/main.nf @@ -34,7 +34,6 @@ params.fasta = getGenomeAttribute('fasta') params.gtf = getGenomeAttribute('gtf') params.blacklist = getGenomeAttribute('blacklist') params.pwms = getGenomeAttribute('pwms') -params.rose_ucsc = getGenomeAttribute('rose_ucsc') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -94,7 +93,6 @@ workflow NFCORE_TFACTIVITY { params.chromhmm_states, params.chromhmm_threshold, params.chromhmm_marks.split(','), - params.rose_ucsc, params.window_size, params.decay, params.merge_samples, diff --git a/modules.json b/modules.json index 5905f5d..b148061 100644 --- a/modules.json +++ b/modules.json @@ -59,6 +59,11 @@ "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", "installed_by": ["modules"] + }, + "ucsc/gtftogenepred": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] } } }, diff --git a/modules/local/chromhmm/get_results/templates/get_results.py b/modules/local/chromhmm/get_results/templates/get_results.py index f0a58d9..28fcf20 100755 --- a/modules/local/chromhmm/get_results/templates/get_results.py +++ b/modules/local/chromhmm/get_results/templates/get_results.py @@ -10,20 +10,23 @@ emissions = pd.read_csv("$emissions", sep = "\\t")[["State (Emission order)"] + marks].rename(columns={"State (Emission order)": "State"}) -# Read input bed file and remove unecessary columns +# Read input bed file bed = pd.read_csv("$bed", sep="\\t", skiprows=1, names=["chr", "start", "end", "state", "score", "strand", "start_1", "end_1", "rgb"] - ).drop(columns=["strand", "score", "start_1", "end_1", "rgb"]) + ) # Keep state if any of the marks is enriched > threshold for this state -states = emissions[np.any([emissions[mark] >= $threshold for mark in marks], axis=0)]["State"].tolist() +states = emissions[np.any([emissions[mark] >= float("$threshold") for mark in marks], axis=0)]["State"].tolist() # Subset bed file for selected states -out_bed = bed[np.isin(bed["state"], states)].drop(columns=["state"]) +bed = bed[np.isin(bed["state"], states)].drop(columns=["state"]) +bed["name"] = bed["chr"] + ":" + bed["start"].astype(str) + "-" + bed["end"].astype(str) + +bed = bed[["chr", "start", "end", "name", "score", "strand"]] # Write output -out_bed.to_csv("$output_file", index=False, sep="\\t", header=False) +bed.to_csv("$output_file", index=False, sep="\\t", header=False) diff --git a/modules/local/rose/main.nf b/modules/local/rose/main.nf index 6c1c9ae..e9c99f3 100644 --- a/modules/local/rose/main.nf +++ b/modules/local/rose/main.nf @@ -8,11 +8,12 @@ process ROSE { 'biocontainers/mulled-v2-2076f4a3fb468a04063c9e6b7747a630abb457f6:fccb0c41a243c639e11dd1be7b74f563e624fcca-0' }" input: - tuple val(meta), path(gff) - path ucsc_file + tuple val(meta), path(bed) + tuple val(meta2), path(genepred) output: - tuple val(meta), path("${gff.baseName}_STITCHED.gff") + tuple val(meta), path("${meta.id}.rose.bed"), emit: stitched + path("versions.yml") , emit: versions script: stitch = 12500 @@ -21,6 +22,6 @@ process ROSE { stub: """ - touch "${gff.baseName}_STITCHED.gff" + touch "${meta.id}.rose.bed" """ } diff --git a/modules/local/rose/templates/rose.py b/modules/local/rose/templates/rose.py index 677d660..407f825 100755 --- a/modules/local/rose/templates/rose.py +++ b/modules/local/rose/templates/rose.py @@ -22,18 +22,13 @@ def format_yaml_like(data: dict, indent: int = 0) -> str: yaml_str += f"{spaces}{key}: {value}\\n" return yaml_str -def region_stitching(input_gff, stitch_window, tss_window, annot_file, remove_tss=True): +def region_stitching(bound_collection, stitch_window, tss_window, start_dict): print('Performing region stitching...') - # first have to turn bound region file into a locus collection - # need to make sure this names correctly... each region should have a unique name - bound_collection = gff_to_locus_collection(input_gff) + remove_tss = tss_window != 0 # filter out all bound regions that overlap the TSS of an ACTIVE GENE if remove_tss: - # first make a locus collection of TSS - start_dict = make_start_dict(annot_file) - # now makeTSS loci for active genes remove_ticker = 0 # this loop makes a locus centered around +/- tss_window of transcribed genes @@ -171,91 +166,79 @@ def format_folder(folder_name, create=False): # ================================================================== -def make_start_dict(annot_file, gene_list=[]): - """ - makes a dictionary keyed by refseq ID that contains information about - chrom/start/stop/strand/common name - """ +def make_start_dict(annot_file): + transcripts = [] + + genepred_table, genepred_dict = import_genepred(annot_file) + if len(transcripts) == 0: + transcripts = list(genepred_dict.keys()) + start_dict = {} + for transcript in transcripts: + if transcript not in genepred_dict: + continue + start_dict[transcript] = {} + start_dict[transcript]['sense'] = genepred_table[genepred_dict[transcript][0]][2] + start_dict[transcript]['chr'] = genepred_table[genepred_dict[transcript][0]][1] + start_dict[transcript]['start'] = get_tsss([transcript], genepred_table, genepred_dict) + if start_dict[transcript]['sense'] == '+': + start_dict[transcript]['end'] = [int(genepred_table[genepred_dict[transcript][0]][4])] + else: + start_dict[transcript]['end'] = [int(genepred_table[genepred_dict[transcript][0]][3])] + start_dict[transcript]['name'] = genepred_table[genepred_dict[transcript][0]][11] - if type(gene_list) == str: - gene_list = parse_table(gene_list, '\\t') - gene_list = [line[0] for line in gene_list] - - if annot_file.upper().count('REFSEQ') == 1: - refseq_table, refseq_dict = import_refseq(annot_file) - if len(gene_list) == 0: - gene_list = list(refseq_dict.keys()) - start_dict = {} - for gene in gene_list: - if gene not in refseq_dict: - continue - start_dict[gene] = {} - start_dict[gene]['sense'] = refseq_table[refseq_dict[gene][0]][3] - start_dict[gene]['chr'] = refseq_table[refseq_dict[gene][0]][2] - start_dict[gene]['start'] = get_tsss([gene], refseq_table, refseq_dict) - if start_dict[gene]['sense'] == '+': - start_dict[gene]['end'] = [int(refseq_table[refseq_dict[gene][0]][5])] - else: - start_dict[gene]['end'] = [int(refseq_table[refseq_dict[gene][0]][4])] - start_dict[gene]['name'] = refseq_table[refseq_dict[gene][0]][12] return start_dict # generic function to get the TSS of any gene -def get_tsss(gene_list, refseq_table, refseq_dict): +def get_tsss(gene_list, genepred_table, genepred_dict): if len(gene_list) == 0: - refseq = refseq_table + genepred = genepred_table else: - refseq = refseq_from_key(gene_list, refseq_dict, refseq_table) + genepred = genepred_from_key(gene_list, genepred_dict, genepred_table) tss = [] - for line in refseq: - if line[3] == '+': + for line in genepred: + if line[2] == '+': + tss.append(line[3]) + if line[2] == '-': tss.append(line[4]) - if line[3] == '-': - tss.append(line[5]) tss = list(map(int, tss)) return tss # 12/29/08 -# refseq_from_key(refseqKeyList,refseq_dict,refseq_table) -# function that grabs refseq lines from refseq IDs -def refseq_from_key(refseq_key_list, refseq_dict, refseq_table): - type_refseq = [] - for name in refseq_key_list: - if name in refseq_dict: - type_refseq.append(refseq_table[refseq_dict[name][0]]) - return type_refseq - - -# 10/13/08 -# import_refseq -# takes in a refseq table and makes a refseq table and a refseq dictionary for keying the table - -def import_refseq(refseq_file, return_multiples=False): - """ - opens up a refseq file downloaded by UCSC - """ - refseq_table = parse_table(refseq_file, '\\t') - refseq_dict = {} - ticker = 1 - for line in refseq_table[1:]: - if line[1] in refseq_dict: - refseq_dict[line[1]].append(ticker) +# genepred_from_key(genepredKeyList,genepred_dict,genepred_table) +# function that grabs genepred lines from genepred IDs +def genepred_from_key(genepred_key_list, genepred_dict, genepred_table): + type_genepred = [] + for name in genepred_key_list: + if name in genepred_dict: + type_genepred.append(genepred_table[genepred_dict[name][0]]) + return type_genepred + + + +def import_genepred(genepred_file, return_multiples=False): + genepred_table = parse_table(genepred_file, '\\t') + genepred_dict = {} + ticker = 0 + for line in genepred_table: + transcript = line[0] + if transcript in genepred_dict: + genepred_dict[transcript].append(ticker) else: - refseq_dict[line[1]] = [ticker] + genepred_dict[transcript] = [ticker] ticker = ticker + 1 multiples = [] - for i in refseq_dict: - if len(refseq_dict[i]) > 1: + for i in genepred_dict: + if len(genepred_dict[i]) > 1: multiples.append(i) if return_multiples: - return refseq_table, refseq_dict, multiples + return genepred_table, genepred_dict, multiples else: - return refseq_table, refseq_dict + return genepred_table, genepred_dict # ================================================================== @@ -275,7 +258,7 @@ class Locus: # sense = '+' or '-' (or '.' for an ambidextrous locus) # start,end = ints of the start and end coords of the locus # end coord is the coord of the last nucleotide. - def __init__(self, chr, start, end, sense, id=''): + def __init__(self, chr, start, end, sense, id='', score=0): coords = [int(start), int(end)] coords.sort() # this method for assigning chromosome should help avoid storage of @@ -287,6 +270,7 @@ def __init__(self, chr, start, end, sense, id=''): self._start = int(coords[0]) self._end = int(coords[1]) self._id = id + self._score = score def id(self): return self._id @@ -316,6 +300,9 @@ def coords(self): def sense(self): return self._sense + def score(self): + return self._score + # returns boolean; True if two loci share any coordinates in common def overlaps(self, other_locus): if self.chr() != other_locus.chr(): @@ -556,37 +543,25 @@ def stitch_collection(self, stitch_window=1, sense='both'): # ========================LOCUS FUNCTIONS=========================== # ================================================================== # 06/11/09 -# turns a locusCollection into a gff +# turns a locusCollection into a bed # does not write to disk though -def locus_collection_to_gff(locus_collection): +def locus_collection_to_bed(locus_collection): loci_list = locus_collection.get_loci() - gff = [] + bed = [] for locus in loci_list: - new_line = [locus.chr(), locus.id(), '', locus.coords()[0], locus.coords()[1], '', locus.sense(), '', - locus.id()] - gff.append(new_line) - return gff + new_line = [locus.chr(), locus.coords()[0], locus.coords()[1], locus.id(), locus.score(), locus.sense()] + bed.append(new_line) + return bed -def gff_to_locus_collection(gff, window=500): +def bed_to_locus_collection(bed, window=500): """ - opens up a gff file and turns it into a LocusCollection instance + opens up a bed file and turns it into a LocusCollection instance """ - loci_list = [] - if type(gff) == str: - gff = parse_table(gff, '\\t') + loci_list = [Locus(line[0], line[1], line[2], line[5], line[3]) + for line in parse_table(bed, '\\t')] - for line in gff: - # USE line[2] as the locus id. If that is empty use line[8] - if len(line[2]) > 0: - name = line[2] - elif len(line[8]) > 0: - name = line[8] - else: - name = f'{line[0]}:{line[6]}:{line[3]}-{line[4]}' - - loci_list.append(Locus(line[0], line[3], line[4], line[6], name)) return LocusCollection(loci_list, window) @@ -628,34 +603,12 @@ def idfun(x): return x result.append(item) return result -input_gff_file = "$gff" -stitched_gff_file = "${gff.baseName}_STITCHED.gff" -annot_file = "$ucsc_file" - -stitch_window = int("$stitch") -tss_window = int("$tss_dist") - -if tss_window != 0: - remove_tss = True -else: - remove_tss = False - -# GETTING THE BOUND REGION FILE USED TO DEFINE ENHANCERS -print(f'Using {input_gff_file} as the input gff') -input_name = input_gff_file.split('/')[-1].split('.')[0] - -print(f'Using {annot_file} as the genome') -print('Making start dict') -start_dict = make_start_dict(annot_file) - -print('Stitching regions together') -stitched_collection = region_stitching(input_gff_file, stitch_window, tss_window, annot_file, remove_tss) - -print('Making GFF from stitched collection') -stitched_gff = locus_collection_to_gff(stitched_collection) -print(f'Writing stitched GFF to disk as {stitched_gff_file}') -unparse_table(stitched_gff, stitched_gff_file, '\\t') +start_dict = make_start_dict("$genepred") +locus_collection = bed_to_locus_collection("$bed") +stitched_collection = region_stitching(locus_collection, int("$stitch"), int("$tss_dist"), start_dict) +stitched = locus_collection_to_bed(stitched_collection) +unparse_table(stitched, "${meta.id}.rose.bed", '\\t') # Create version file versions = { diff --git a/modules/nf-core/ucsc/gtftogenepred/environment.yml b/modules/nf-core/ucsc/gtftogenepred/environment.yml new file mode 100644 index 0000000..5216fc8 --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/environment.yml @@ -0,0 +1,7 @@ +name: ucsc_gtftogenepred +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ucsc-gtftogenepred=447 diff --git a/modules/nf-core/ucsc/gtftogenepred/main.nf b/modules/nf-core/ucsc/gtftogenepred/main.nf new file mode 100644 index 0000000..88aace2 --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/main.nf @@ -0,0 +1,54 @@ +process UCSC_GTFTOGENEPRED { + tag '${meta.id}' + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ucsc-gtftogenepred:447--h954228d_0': + 'biocontainers/ucsc-gtftogenepred:447--h954228d_0' }" + + input: + tuple val(meta), path(gtf) + + output: + tuple val(meta), path("*.genepred"), emit: genepred + tuple val(meta), path("*.refflat") , emit: refflat , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def gen_refflat = args.contains("-genePredExt") && args.contains("-geneNameAsName2") ? "true" : "false" + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '447' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + gtfToGenePred \\ + $args \\ + $gtf \\ + ${prefix}.genepred + + if [ "${gen_refflat}" == "true" ] ; then + awk 'BEGIN { OFS="\\t"} {print \$12, \$1, \$2, \$3, \$4, \$5, \$6, \$7, \$8, \$9, \$10}' ${prefix}.genepred > ${prefix}.refflat + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '447' + """ + touch ${prefix}.genepred + touch ${prefix}.refflat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/ucsc/gtftogenepred/meta.yml b/modules/nf-core/ucsc/gtftogenepred/meta.yml new file mode 100644 index 0000000..02122e4 --- /dev/null +++ b/modules/nf-core/ucsc/gtftogenepred/meta.yml @@ -0,0 +1,47 @@ +name: ucsc_gtftogenepred +description: compute average score of bigwig over bed file +keywords: + - gtf + - genepred + - refflat + - ucsc + - gtftogenepred +tools: + - ucsc: + description: Convert GTF files to GenePred format + homepage: http://hgdownload.cse.ucsc.edu/admin/exe/ + licence: ["varies; see http://genome.ucsc.edu/license"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gtf: + type: file + description: GTF file + pattern: "*.{gtf}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - genepred: + type: file + description: genepred file + pattern: "*.{genepred}" + - refflat: + type: file + description: refflat file + pattern: "*.{refflat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@BarryDigby" + - "@anoronh4" +maintainers: + - "@BarryDigby" + - "@anoronh4" diff --git a/nextflow_schema.json b/nextflow_schema.json index 859b17a..6227f15 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -267,16 +267,6 @@ "help_text": "This parameter is *mandatory* if `--genome` is not specified.", "fa_icon": "far fa-file-code" }, - "rose_ucsc": { - "type": "string", - "format": "file-path", - "exists": true, - "mimetype": "text/plain", - "pattern": "^\\S+\\.ucsc$", - "description": "Path to ROSE UCSC file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified and input_bam is defined.", - "fa_icon": "far fa-file-code" - }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", diff --git a/subworkflows/local/peaks.nf b/subworkflows/local/peaks.nf index e1d9a06..e016c82 100644 --- a/subworkflows/local/peaks.nf +++ b/subworkflows/local/peaks.nf @@ -34,7 +34,6 @@ workflow PEAKS { chromhmm_states chromhmm_threshold chromhmm_marks - rose_ucsc main: @@ -64,7 +63,7 @@ workflow PEAKS { } CHROMHMM(ch_samplesheet_bam, chrom_sizes, chromhmm_states, chromhmm_threshold, chromhmm_marks) - ROSE(CHROMHMM.out.enhancers, rose_ucsc) + ROSE(CHROMHMM.out.enhancers, gtf) ch_versions = ch_versions.mix(CHROMHMM.out.versions) ch_versions = ch_versions.mix(ROSE.out.versions) diff --git a/subworkflows/local/rose.nf b/subworkflows/local/rose.nf index 1349412..b7af511 100644 --- a/subworkflows/local/rose.nf +++ b/subworkflows/local/rose.nf @@ -1,29 +1,23 @@ -include { GAWK as BED_TO_GFF } from "../../modules/nf-core/gawk" -include { GAWK as REFORMAT_GFF } from "../../modules/nf-core/gawk" include { ROSE as RUN_ROSE } from "../../modules/local/rose" -include { GAWK as ROSE_OUTPUT_TO_BED } from "../../modules/nf-core/gawk" +include { UCSC_GTFTOGENEPRED } from "../../modules/nf-core/ucsc/gtftogenepred" workflow ROSE { take: ch_bed - ucsc_file + ch_gtf main: ch_versions = Channel.empty() - BED_TO_GFF(ch_bed, []) - REFORMAT_GFF(BED_TO_GFF.out.output, []) + UCSC_GTFTOGENEPRED(ch_gtf) + RUN_ROSE(ch_bed, UCSC_GTFTOGENEPRED.out.genepred) - RUN_ROSE(REFORMAT_GFF.out.output, ucsc_file) - ROSE_OUTPUT_TO_BED(RUN_ROSE.out, []) - - ch_versions = ch_versions.mix(BED_TO_GFF.out.versions) - ch_versions = ch_versions.mix(REFORMAT_GFF.out.versions) - ch_versions = ch_versions.mix(ROSE_OUTPUT_TO_BED.out.versions) + ch_versions = ch_versions.mix(RUN_ROSE.out.versions) + ch_versions = ch_versions.mix(UCSC_GTFTOGENEPRED.out.versions) emit: - enhancers = ROSE_OUTPUT_TO_BED.out.output + enhancers = RUN_ROSE.out.stitched versions = ch_versions } diff --git a/workflows/tfactivity.nf b/workflows/tfactivity.nf index 6be2373..8329e8f 100644 --- a/workflows/tfactivity.nf +++ b/workflows/tfactivity.nf @@ -42,7 +42,6 @@ workflow TFACTIVITY { chromhmm_states chromhmm_threshold chromhmm_marks - rose_ucsc window_size decay @@ -106,8 +105,7 @@ workflow TFACTIVITY { chrom_sizes, chromhmm_states, chromhmm_threshold, - chromhmm_marks, - rose_ucsc + chromhmm_marks ) DYNAMITE(