diff --git a/conf/igenomes.config b/conf/igenomes.config index 757f4bc..3048b21 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -13,434 +13,238 @@ params { genomes { 'GRCh37' { fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" mito_name = "MT" - macs_gsize = "2.7e9" + taxon_id = 9606 blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" - pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt" } 'GRCh38' { fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" mito_name = "chrM" - macs_gsize = "2.7e9" + taxon_id = 9606 blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt" } 'CHM13' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" - gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" mito_name = "chrM" + taxon_id = 9606 } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" mito_name = "MT" - macs_gsize = "1.87e9" + taxon_id = 10090 blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" - pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_mouse_PSEMs.txt" - } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" mito_name = "Mt" } 'EB2' { fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + taxon_id = 224308 } 'UMD3.1' { fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" mito_name = "MT" + taxon_id = 9913 } 'WBcel235' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" mito_name = "MtDNA" macs_gsize = "9e7" + taxon_id = 6239 } 'CanFam3.1' { fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" mito_name = "MT" + taxon_id = 9615 } 'GRCz10' { fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" mito_name = "MT" + taxon_id = 7955 } 'BDGP6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" mito_name = "M" macs_gsize = "1.2e8" + taxon_id = 7227 } 'EquCab2' { fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" mito_name = "MT" + taxon_id = 9796 } 'EB1' { fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + taxon_id = 83333 } 'Galgal4' { fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" mito_name = "MT" + taxon_id = 9031 } 'Gm01' { fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + taxon_id = 3847 } 'Mmul_1' { fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" mito_name = "MT" + taxon_id = 9544 } 'IRGSP-1.0' { fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" mito_name = "Mt" + taxon_id = 39947 } 'CHIMP2.1.4' { fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" mito_name = "MT" + taxon_id = 9598 } 'Rnor_5.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" mito_name = "MT" + taxon_id = 10116 } 'Rnor_6.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" mito_name = "MT" + taxon_id = 10116 } 'R64-1-1' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" mito_name = "MT" - macs_gsize = "1.2e7" + taxon_id = 559292 } 'EF2' { fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" mito_name = "MT" - macs_gsize = "1.21e7" + taxon_id = 4896 } 'Sbi1' { fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + taxon_id = 4558 } 'Sscrofa10.2' { fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" mito_name = "MT" + taxon_id = 9823 } 'AGPv3' { fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" mito_name = "Mt" + taxon_id = 4577 } 'hg38' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" mito_name = "chrM" - macs_gsize = "2.7e9" + taxon_id = 9606 blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt" } 'hg19' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" mito_name = "chrM" - macs_gsize = "2.7e9" + taxon_id = 9606 blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" - pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_human_PSEMs.txt" } 'mm10' { fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" mito_name = "chrM" - macs_gsize = "1.87e9" + taxon_id = 10090 blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" - pwms = "${projectDir}/assets/PWMs/Jaspar_Hocomoco_Kellis_mouse_PSEMs.txt" } 'bosTau8' { fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" mito_name = "chrM" + taxon_id = 9913 } 'ce10' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" mito_name = "chrM" macs_gsize = "9e7" + taxon_id = 6239 } 'canFam3' { fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" mito_name = "chrM" + taxon_id = 9615 } 'danRer10' { fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" mito_name = "chrM" macs_gsize = "1.37e9" + taxon_id = 7955 } 'dm6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" mito_name = "chrM" - macs_gsize = "1.2e8" + taxon_id = 7227 } 'equCab2' { fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" mito_name = "chrM" + taxon_id = 9796 } 'galGal4' { fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" mito_name = "chrM" + taxon_id = 9031 } 'panTro4' { fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" mito_name = "chrM" + taxon_id = 9598 } 'rn6' { fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" mito_name = "chrM" + taxon_id = 10116 } 'sacCer3' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" mito_name = "chrM" macs_gsize = "1.2e7" + taxon_id = 559292 } 'susScr3' { fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" mito_name = "chrM" + taxon_id = 9823 } } } diff --git a/main.nf b/main.nf index 26edcd4..4ba5a3e 100644 --- a/main.nf +++ b/main.nf @@ -33,7 +33,11 @@ include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' params.fasta = getGenomeAttribute('fasta') params.gtf = getGenomeAttribute('gtf') params.blacklist = getGenomeAttribute('blacklist') -params.pwms = getGenomeAttribute('pwms') +params.taxon_id = getGenomeAttribute('taxon_id') + +if (!params.motifs && !params.taxon_id) { + error "Please provide either a motifs file or a taxon ID" +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -55,11 +59,12 @@ workflow NFCORE_TFACTIVITY { ch_versions = Channel.empty() - ch_fasta = Channel.value(file(params.fasta)) - ch_gtf = Channel.value(file(params.gtf)) - ch_blacklist = Channel.value(file(params.blacklist)) - ch_pwms = Channel.value(file(params.pwms)) - ch_counts = Channel.value(file(params.counts)) + ch_fasta = Channel.value(file(params.fasta, checkIfExists: true)) + ch_gtf = Channel.value(file(params.gtf, checkIfExists: true)) + ch_blacklist = params.blacklist ? Channel.value(file(params.blacklist, checkIfExists: true)) : Channel.value([]) + ch_motifs = params.motifs ? Channel.value(file(params.motifs, checkIfExists: true)) : Channel.empty() + ch_counts = Channel.value(file(params.counts, checkIfExists: true)) + ch_taxon_id = (!params.motifs && params.taxon_id) ? Channel.value(params.taxon_id) : Channel.empty() // // SUBWORKFLOW: Prepare genome @@ -81,34 +86,42 @@ workflow NFCORE_TFACTIVITY { PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.gtf, ch_blacklist, - ch_pwms, + ch_motifs, + ch_taxon_id, PREPARE_GENOME.out.gene_lengths, PREPARE_GENOME.out.gene_map, - ch_counts, - ch_extra_counts, - Channel.value(file(params.counts_design, checkIfExists: true)) - .map{ design -> [[id: "design"], design]}, - samplesheet_bam, PREPARE_GENOME.out.chrom_sizes, + + // ChromHMM + samplesheet_bam, params.chromhmm_states, params.chromhmm_threshold, params.chromhmm_marks.split(','), + + // Peaks params.window_size, params.decay, params.merge_samples, params.affinity_aggregation, + // Counts + ch_counts, + ch_extra_counts, + Channel.value(file(params.counts_design, checkIfExists: true)) + .map{ design -> [[id: "design"], design]}, params.min_count, params.min_tpm, params.expression_aggregation, params.min_count_tf, params.min_tpm_tf, + // Dynamite params.dynamite_ofolds, params.dynamite_ifolds, params.dynamite_alpha, params.dynamite_randomize, + // Ranking params.alpha, ch_versions diff --git a/modules/local/fimo/filter_motifs/main.nf b/modules/local/fimo/filter_motifs/main.nf index 05c5f13..43ac8b2 100644 --- a/modules/local/fimo/filter_motifs/main.nf +++ b/modules/local/fimo/filter_motifs/main.nf @@ -1,23 +1,26 @@ process FILTER_MOTIFS { - conda 'conda-forge::python==3.9.5' + conda "conda-forge::pandas==1.5.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.9--1': - 'biocontainers/python:3.9--1' }" + 'https://depot.galaxyproject.org/singularity/pandas:1.5.2': + 'biocontainers/pandas:1.5.2' }" input: tuple val(meta), path(tfs_jaspar_ids) - path jaspar_motifs + tuple val(meta2), path(meme_motifs) output: - tuple val(meta), path("sign_motifs/*.meme"), emit: motifs - path "versions.yml", emit: versions + tuple val(meta), path("motifs/*.meme"), emit: motifs + path "versions.yml", emit: versions script: template "filter_motifs.py" stub: """ - touch motifs.meme + mkdir motifs + touch motifs/MA0778.1.meme + touch motifs/MA0938.3.meme + touch motifs/MA1272.1.meme """ } diff --git a/modules/local/fimo/filter_motifs/templates/filter_motifs.py b/modules/local/fimo/filter_motifs/templates/filter_motifs.py index 8068a0d..29f096c 100644 --- a/modules/local/fimo/filter_motifs/templates/filter_motifs.py +++ b/modules/local/fimo/filter_motifs/templates/filter_motifs.py @@ -1,9 +1,41 @@ #!/usr/bin/env python3 from os import mkdir -from os.path import exists -from shutil import copy +import pandas as pd import platform +from collections import defaultdict + + +def parse_meme_file(path_meme_file): + with open(path_meme_file, "r") as f: + meme_file = f.read() + + lines = meme_file.split('\\n') + header = [] + meme_to_matrix = {} + symbol_to_meme = defaultdict(set) + current_motif = [] + current_motif_meme = "" + is_header = True + + for line in lines: + if line.startswith("MOTIF"): + # List not empty -> not first motif + if current_motif: + meme_to_matrix[current_motif_meme] = '\\n'.join(header + current_motif) + current_motif = [] + current_motif_meme, current_motif_symbol = line.split()[1:3] + symbol_to_meme[current_motif_symbol].add(current_motif_meme) + is_header = False + if is_header: + header.append(line) + else: + current_motif.append(line) + + if current_motif: + meme_to_matrix[current_motif_meme] = '\\n'.join(header + current_motif) + + return meme_to_matrix, symbol_to_meme def format_yaml_like(data: dict, indent: int = 0) -> str: @@ -26,28 +58,39 @@ def format_yaml_like(data: dict, indent: int = 0) -> str: return yaml_str -tfs_jaspar_ids = "${tfs_jaspar_ids}" -jaspar_motifs = "${jaspar_motifs}" +tfs_ranking_file = '${tfs_jaspar_ids}' +path_meme_file = '${meme_motifs}' + -# Read differentially expressed (DE) transcription factors (TF) -with open(tfs_jaspar_ids, "r") as f: - tfs_jaspar_ids = f.read().split('\\n') +# Parse tfs_ranking +tfs_ranking = pd.read_csv(tfs_ranking_file, sep='\\t', index_col=0).index.tolist() -# Create directory for significant motif files -mkdir("sign_motifs") +# Parse meme file +meme_to_matrix, symbol_to_meme = parse_meme_file(path_meme_file) -# Iterate over TFs and store meme files for DE TFs -for jaspar_id in tfs_jaspar_ids: - if exists(f"jaspar_motifs/{jaspar_id}.meme"): - copy(f"jaspar_motifs/{jaspar_id}.meme", f"sign_motifs/{jaspar_id}.meme") +mkdir('motifs') +for symbol in tfs_ranking: + if symbol not in symbol_to_meme: + # Check if symbol without version is in dictionary + base_symbol = symbol.split('.')[0] + if base_symbol not in symbol_to_meme: + print(f'Symbol {symbol} not found') + continue + # Remove version from symbol + symbol = base_symbol + for meme_id in symbol_to_meme[symbol]: + with open(f'motifs/{meme_id}.meme', 'w') as f: + f.write(meme_to_matrix[meme_id]) # Create version file versions = { "${task.process}" : { - "python": platform.python_version() + "python": platform.python_version(), + "pandas": pd.__version__, } } +# Write version file with open("versions.yml", "w") as f: f.write(format_yaml_like(versions)) diff --git a/modules/local/fimo/jaspar_download/main.nf b/modules/local/fimo/jaspar_download/main.nf deleted file mode 100644 index 2607ca2..0000000 --- a/modules/local/fimo/jaspar_download/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -process JASPAR_DOWNLOAD { - label 'process_single' - - conda "conda-forge::curl==7.80.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/curl:7.80.0': - 'biocontainers/curl:7.80.0' }" - - output: - path "jaspar_motifs", emit: motifs - path "versions.yml", emit: versions - - script: - """ - curl -o jaspar.zip https://jaspar.elixir.no/download/data/2024/CORE/JASPAR2024_CORE_redundant_pfms_meme.zip - unzip jaspar.zip -d jaspar_motifs - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - curl: \$( curl --version | awk 'NR==1{print \$2}' ) - unzip: \$( unzip -v 2>&1 | grep -o 'v[0-9]\\+\\.[0-9]\\+\\.[0-9]\\+' | sed 's/v//' ) - END_VERSIONS - """ - - stub: - """ - mkdir jaspar_motifs - touch jaspar_motifs/MA0001.1.meme - touch jaspar_motifs/MA0001.2.meme - touch jaspar_motifs/MA0001.3.meme - touch jaspar_motifs/MA0002.1.meme - touch jaspar_motifs/MA0002.2.meme - """ -} diff --git a/modules/local/fimo/jaspar_mapping/main.nf b/modules/local/fimo/jaspar_mapping/main.nf deleted file mode 100644 index 4df7d3d..0000000 --- a/modules/local/fimo/jaspar_mapping/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process JASPAR_MAPPING { - label 'process_single' - - conda "conda-forge::pandas==1.5.2" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pandas:1.5.2': - 'biocontainers/pandas:1.5.2' }" - - input: - tuple val(meta), path(tf_ranking) - path pwm - - output: - tuple val(meta), path("tfs_jaspar_ids.txt"), emit: jaspar_ids - path "versions.yml", emit: versions - - script: - template "jaspar_mapping.py" - - stub: - """ - touch tfs_jaspar_ids.txt - """ -} diff --git a/modules/local/fimo/jaspar_mapping/templates/jaspar_mapping.py b/modules/local/fimo/jaspar_mapping/templates/jaspar_mapping.py deleted file mode 100644 index eb13897..0000000 --- a/modules/local/fimo/jaspar_mapping/templates/jaspar_mapping.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python3 - -from collections import defaultdict -import platform -import pandas as pd - -def format_yaml_like(data: dict, indent: int = 0) -> str: - """Formats a dictionary to a YAML-like string. - - Args: - data (dict): The dictionary to format. - indent (int): The current indentation level. - - Returns: - str: A string formatted as YAML. - """ - yaml_str = "" - for key, value in data.items(): - spaces = " " * indent - if isinstance(value, dict): - yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" - else: - yaml_str += f"{spaces}{key}: {value}\\n" - return yaml_str - - -path_tf_ranking = "${tf_ranking}" -path_pwm = "${pwm}" - -# Read differentially expressed TFs -tf_ranking = pd.read_csv(path_tf_ranking, sep='\\t', index_col=0).index.tolist() - -# Get mapping file -with open(path_pwm, 'r') as f: - file = f.read() -mapping = [tuple(line[1:].split("\\t")[:2]) for line in file.split('\\n') if line.startswith('>')] - -# Create mapping dict from mapping files -symbol_to_id = defaultdict(set) -for jaspar_id, symbol in mapping: - symbol_to_id[symbol].add(jaspar_id) - -# Cast defaultdict to dict -symbol_to_id = dict(symbol_to_id) - -# Create file with sorted TF meme IDs -tfs = sorted([jaspar_id for tf in tf_ranking if tf in symbol_to_id for jaspar_id in symbol_to_id[tf]]) - -with open('tfs_jaspar_ids.txt', 'w') as f: - for tf in tfs: - f.write(f'{tf}\\n') - - -# Create version file -versions = { - "${task.process}" : { - "python": platform.python_version(), - "pandas": pd.__version__ - } -} - -with open("versions.yml", "w") as f: - f.write(format_yaml_like(versions)) diff --git a/modules/local/motifs/convert_motifs/main.nf b/modules/local/motifs/convert_motifs/main.nf new file mode 100644 index 0000000..5b47a79 --- /dev/null +++ b/modules/local/motifs/convert_motifs/main.nf @@ -0,0 +1,21 @@ +process CONVERT_MOTIFS { + tag "$meta.id" + label "process_single" + + conda "bioconda:bioconductor-universalmotif==1.20.0--r43hf17093f_0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bioconductor-universalmotif:1.20.0--r43hf17093f_0': + 'biocontainers/bioconductor-universalmotif:1.20.0--r43hf17093f_0' }" + + input: + tuple val(meta), path(in_file), val(in_type) + val(out_type) + + output: + tuple val(meta), path("${out_file}"), emit: converted + path "versions.yml" , emit: versions + + script: + out_file = "${meta.id}.converted.${out_type}" + template "convert.R" +} diff --git a/modules/local/motifs/convert_motifs/templates/convert.R b/modules/local/motifs/convert_motifs/templates/convert.R new file mode 100644 index 0000000..40e4ab1 --- /dev/null +++ b/modules/local/motifs/convert_motifs/templates/convert.R @@ -0,0 +1,45 @@ +#!/usr/bin/env Rscript + +library(universalmotif) + +in_file <- "$in_file" + +in_type <- "$in_type" +allowed_in_types <- c("cisbp", "homer", "jaspar", "meme", "transfac", "uniprobe", "universal") + +if (!(in_type %in% allowed_in_types)) { + stop("Input type '", in_type, "' not supported. Supported types are: ", paste(allowed_in_types, collapse=", ")) +} + +out_type <- "$out_type" +allowed_out_types <- c("homer", "jaspar", "meme", "transfac", "universal") + +if (!(out_type %in% allowed_out_types)) { + stop("Output type '", out_type, "' not supported. Supported types are: ", paste(allowed_out_types, collapse=", ")) +} + +u.motif <- switch(in_type, + cisbp = read_cisbp, + homer = read_homer, + jaspar = read_jaspar, + meme = read_meme, + transfac = read_transfac, + uniprobe = read_uniprobe, + universal = readRDS +)(in_file) + +switch(out_type, + homer = write_homer, + jaspar = write_jaspar, + meme = write_meme, + transfac = write_transfac, + universal = saveRDS +)(u.motif, "$out_file") + +writeLines( + c( + '"${task.process}":', + paste(' r-base:', strsplit(version[['version.string']], ' ')[[1]][3]), + paste(' bioconductor-universalmotif:', packageVersion("universalmotif")) + ), +'versions.yml') diff --git a/modules/local/motifs/fetch_jaspar/main.nf b/modules/local/motifs/fetch_jaspar/main.nf new file mode 100644 index 0000000..eee64d5 --- /dev/null +++ b/modules/local/motifs/fetch_jaspar/main.nf @@ -0,0 +1,19 @@ +process FETCH_JASPAR { + tag "$taxon_id" + label "process_single" + + conda "bioconda::pyjaspar==3.0.0--pyhdfd78af_0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pyjaspar:3.0.0--pyhdfd78af_0': + 'biocontainers/pyjaspar:3.0.0--pyhdfd78af_0' }" + + input: + val(taxon_id) + + output: + path("motifs.jaspar"), emit: motifs + path "versions.yml" , emit: versions + + script: + template "fetch_jaspar.py" +} diff --git a/modules/local/peaks/filter_pwms/templates/filter_pwms.py b/modules/local/motifs/fetch_jaspar/templates/fetch_jaspar.py old mode 100755 new mode 100644 similarity index 56% rename from modules/local/peaks/filter_pwms/templates/filter_pwms.py rename to modules/local/motifs/fetch_jaspar/templates/fetch_jaspar.py index cb789ff..29c2578 --- a/modules/local/peaks/filter_pwms/templates/filter_pwms.py +++ b/modules/local/motifs/fetch_jaspar/templates/fetch_jaspar.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +import pyjaspar +from pyjaspar import jaspardb import platform def format_yaml_like(data: dict, indent: int = 0) -> str: @@ -21,28 +23,22 @@ def format_yaml_like(data: dict, indent: int = 0) -> str: yaml_str += f"{spaces}{key}: {value}\\n" return yaml_str -with open("$genes", "r") as f: - legal_genes = set([gene.rstrip("\\n") for gene in f.readlines()]) +jdb = jaspardb(release='JASPAR2024') -legal = True +motifs = jdb.fetch_motifs(species=int("$taxon_id")) -with open("$pwms", "r") as f_input, open("pwms.txt", "w") as f_output: - for line in f_input: - if legal and not line.startswith(">"): - f_output.write(line) - else: - splitted = line.split("\\t") - group = splitted[1] - genes = group.split("::") - legal = any(gene in legal_genes for gene in genes) - - if legal: - f_output.write(line) +with open("motifs.jaspar", "w+") as f: + for motif in motifs: + f.write(f">{motif.matrix_id} {motif.name.upper()}\\n") + for base in ["A", "C", "G", "T"]: + f.write(f"{base} [ {' '.join([str(int(x)) for x in motif.counts[base]])} ]\\n") + f.write("\\n") # Create version file versions = { "${task.process}" : { - "python": platform.python_version() + "python": platform.python_version(), + "pyjaspar": pyjaspar.__version__ } } diff --git a/modules/local/motifs/filter_motifs/main.nf b/modules/local/motifs/filter_motifs/main.nf new file mode 100644 index 0000000..2b8e0a1 --- /dev/null +++ b/modules/local/motifs/filter_motifs/main.nf @@ -0,0 +1,21 @@ +process FILTER_MOTIFS { + tag "$meta.id" + label "process_single" + + conda "bioconda:bioconductor-universalmotif==1.20.0--r43hf17093f_0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bioconductor-universalmotif:1.20.0--r43hf17093f_0': + 'biocontainers/bioconductor-universalmotif:1.20.0--r43hf17093f_0' }" + + input: + tuple val(meta), path(in_file) + tuple val(meta2), path(tfs) + + output: + tuple val(meta), path("${out_file}"), emit: filtered + path "versions.yml" , emit: versions + + script: + out_file = "${meta.id}.filtered.RDS" + template "filter_motifs.R" +} diff --git a/modules/local/motifs/filter_motifs/templates/filter_motifs.R b/modules/local/motifs/filter_motifs/templates/filter_motifs.R new file mode 100644 index 0000000..b9892ac --- /dev/null +++ b/modules/local/motifs/filter_motifs/templates/filter_motifs.R @@ -0,0 +1,18 @@ +#!/usr/bin/env Rscript + +library(universalmotif) + +u.motif <- readRDS("$in_file") +tfs <- readLines("$tfs") + +u.motif <- filter_motifs(u.motif, altname = tfs) + +saveRDS(u.motif, "$out_file") + +writeLines( + c( + '"${task.process}":', + paste(' r-base:', strsplit(version[['version.string']], ' ')[[1]][3]), + paste(' bioconductor-universalmotif:', packageVersion("universalmotif")) + ), +'versions.yml') diff --git a/modules/local/motifs/transfac_to_psem/main.nf b/modules/local/motifs/transfac_to_psem/main.nf new file mode 100644 index 0000000..6e16fb1 --- /dev/null +++ b/modules/local/motifs/transfac_to_psem/main.nf @@ -0,0 +1,19 @@ +process TRANSFAC_TO_PSEM { + tag "$meta.id" + label "process_single" + + conda "conda-forge::pandas==1.5.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pandas:1.5.2': + 'biocontainers/pandas:1.5.2' }" + + input: + tuple val(meta), path(transfac) + + output: + tuple val(meta), path("*.psem"), emit: psem + path "versions.yml" , emit: versions + + script: + template "convert.py" +} diff --git a/modules/local/motifs/transfac_to_psem/templates/convert.py b/modules/local/motifs/transfac_to_psem/templates/convert.py new file mode 100644 index 0000000..95beb2a --- /dev/null +++ b/modules/local/motifs/transfac_to_psem/templates/convert.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 + +import numpy as np +import platform + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +transfac_path = "$transfac" +pseudocount = 1 +A = 0 +C = 1 +G = 2 +T = 3 +base_order = [A, C, G, T] + +lamda = 0.7 +gc_content = 0.43 +at_content = 1 - gc_content + +slope = 0.584 +intercept = -5.66 + +def write_pwm(f, matrix, name, ma_id): + matrix = matrix + pseudocount + matrix = matrix / matrix.sum(axis=1, keepdims=True) + + maxGC = np.maximum(matrix[:, G], matrix[:, C]) + maxAT = np.maximum(matrix[:, A], matrix[:, T]) + + pwm = np.zeros_like(matrix) + + for i, active, active_content, other, other_content in zip([A, C, G, T], + [maxAT, maxGC, maxGC, maxAT], + [at_content, gc_content, gc_content, at_content], + [maxGC, maxAT, maxAT, maxGC], + [gc_content, at_content, at_content, gc_content]): + pwm[:, i] = np.where(active{ma_id}\\t{name}\\tlnR0: {(round(lnR0, decimals))}\\n") + for row in pwm: + f.write("\\t".join([f"{round(x, decimals)}" for x in row]) + "\\n") + + +with open(transfac_path, 'r') as f_in, open("${meta.id}.psem", "w") as f_out: + cur_id, cur_name, cur_matrix = None, None, [] + for line in f_in: + splitted = line.strip().split() + prefix = splitted[0] + + if prefix in ["//"]: continue + elif prefix == "P0": + if splitted[A+1] != "A" or splitted[C+1] != "C" or splitted[G+1] != "G" or splitted[T+1] != "T": + raise ValueError("Invalid transfac file") + elif prefix == "ID": + cur_id = splitted[1] + elif prefix == "NA": + cur_name = splitted[1] + elif prefix.isnumeric(): + cur_matrix.append([int(splitted[i+1]) for i in base_order]) + elif prefix == "XX": + if not cur_id or not cur_name or not cur_matrix: + raise ValueError("Invalid transfac file") + matrix = np.array(cur_matrix) + write_pwm(f_out, matrix, cur_name, cur_id) + cur_id, cur_name, cur_matrix = None, None, [] + +# Create version file +versions = { + "${task.process}" : { + "python": platform.python_version(), + "numpy": np.__version__ + } +} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) diff --git a/modules/local/peaks/filter_pwms/main.nf b/modules/local/peaks/filter_pwms/main.nf deleted file mode 100644 index 61cb68a..0000000 --- a/modules/local/peaks/filter_pwms/main.nf +++ /dev/null @@ -1,21 +0,0 @@ -process FILTER_PWMS { - tag "$meta.id" - label "process_single" - - conda "conda-forge::mulled-v2-2076f4a3fb468a04063c9e6b7747a630abb457f6==fccb0c41a243c639e11dd1be7b74f563e624fcca-0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-2076f4a3fb468a04063c9e6b7747a630abb457f6:fccb0c41a243c639e11dd1be7b74f563e624fcca-0': - 'biocontainers/mulled-v2-2076f4a3fb468a04063c9e6b7747a630abb457f6:fccb0c41a243c639e11dd1be7b74f563e624fcca-0' }" - - input: - tuple val(meta), path(genes) - path(pwms) - - output: - tuple val(meta), path("pwms.txt") , emit: pwms - - path "versions.yml" , emit: versions - - script: - template "filter_pwms.py" -} diff --git a/nextflow.config b/nextflow.config index 174604d..914d359 100644 --- a/nextflow.config +++ b/nextflow.config @@ -40,6 +40,7 @@ params { // References genome = null + motifs = null igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 988475d..c1e74fb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -257,16 +257,22 @@ "help_text": "This parameter is *mandatory* if `--genome` is not specified.", "fa_icon": "far fa-file-code" }, - "pwms": { + "motifs": { "type": "string", "format": "file-path", "exists": true, "mimetype": "text/plain", - "pattern": "^\\S+\\.txt(\\.gz)?$", - "description": "Path to PWMs file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified.", + "pattern": "^\\S+\\.(cisbp|homer|jaspar|meme|transfac|uniprobe)?$", + "description": "Path to transcription factor motifs file.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified. Alternatively, you can use the `--taxon_id` parameter to fetch the motifs from the JASPAR database.", "fa_icon": "far fa-file-code" }, + "taxon_id": { + "type": "integer", + "description": "NCBI Taxonomy ID.", + "fa_icon": "fas fa-dna", + "help_text": "This parameter is *mandatory* if `--genome` and `--motifs` are not specified. Use this parameter to fetch the motifs from the JASPAR database." + }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", diff --git a/subworkflows/local/fimo.nf b/subworkflows/local/fimo.nf index f81441e..781e44c 100644 --- a/subworkflows/local/fimo.nf +++ b/subworkflows/local/fimo.nf @@ -1,12 +1,10 @@ -include { JASPAR_MAPPING } from "../../modules/local/fimo/jaspar_mapping/main" -include { JASPAR_DOWNLOAD } from "../../modules/local/fimo/jaspar_download/main" -include { FILTER_MOTIFS } from "../../modules/local/fimo/filter_motifs/main" -include { CAT_CAT as CONCAT_BEDS } from "../../modules/nf-core/cat/cat/main" -include { BEDTOOLS_SORT as SORT_REGIONS } from "../../modules/nf-core/bedtools/sort/main" -include { BEDTOOLS_MERGE as MERGE_REGIONS } from "../../modules/nf-core/bedtools/merge/main" -include { BEDTOOLS_GETFASTA as EXTRACT_SEQUENCE } from "../../modules/nf-core/bedtools/getfasta/main" -include { RUN_FIMO } from "../../modules/local/fimo/run_fimo/main" -include { COMBINE_RESULTS } from "../../modules/local/fimo/combine_results/main" +include { FILTER_MOTIFS } from "../../modules/local/fimo/filter_motifs" +include { CAT_CAT as CONCAT_BEDS } from "../../modules/nf-core/cat/cat" +include { BEDTOOLS_SORT as SORT_REGIONS } from "../../modules/nf-core/bedtools/sort" +include { BEDTOOLS_MERGE as MERGE_REGIONS } from "../../modules/nf-core/bedtools/merge" +include { BEDTOOLS_GETFASTA as EXTRACT_SEQUENCE } from "../../modules/nf-core/bedtools/getfasta" +include { RUN_FIMO } from "../../modules/local/fimo/run_fimo" +include { COMBINE_RESULTS } from "../../modules/local/fimo/combine_results" workflow FIMO { @@ -14,25 +12,17 @@ workflow FIMO { fasta tf_ranking enhancer_regions - pwm + motifs_meme main: ch_versions = Channel.empty() - JASPAR_MAPPING(tf_ranking, pwm) - - JASPAR_DOWNLOAD() - - FILTER_MOTIFS(JASPAR_MAPPING.out.jaspar_ids, JASPAR_DOWNLOAD.out.motifs) + FILTER_MOTIFS(tf_ranking, motifs_meme) ch_cat_input = enhancer_regions - .map{ - meta, file -> file - } + .map{meta, file -> file} .collect() - .map{ - item -> [[id: "enhancer_regions"], item] - } + .map{files -> [[id: "enhancer_regions"], files]} CONCAT_BEDS(ch_cat_input) @@ -58,8 +48,6 @@ workflow FIMO { COMBINE_RESULTS(ch_combine_results) ch_versions = ch_versions.mix( - JASPAR_MAPPING.out.versions, - JASPAR_DOWNLOAD.out.versions, FILTER_MOTIFS.out.versions, CONCAT_BEDS.out.versions, SORT_REGIONS.out.versions, diff --git a/subworkflows/local/motifs.nf b/subworkflows/local/motifs.nf new file mode 100644 index 0000000..8ab5c23 --- /dev/null +++ b/subworkflows/local/motifs.nf @@ -0,0 +1,46 @@ +include { FETCH_JASPAR } from '../../modules/local/motifs/fetch_jaspar' +include { CONVERT_MOTIFS as CONVERT_TO_UNIVERSAL} from '../../modules/local/motifs/convert_motifs' +include { FILTER_MOTIFS } from '../../modules/local/motifs/filter_motifs' +include { CONVERT_MOTIFS as CONVERT_TO_MEME } from '../../modules/local/motifs/convert_motifs' +include { CONVERT_MOTIFS as CONVERT_TO_TRANSFAC } from '../../modules/local/motifs/convert_motifs' +include { TRANSFAC_TO_PSEM } from '../../modules/local/motifs/transfac_to_psem' + +workflow MOTIFS { + take: + ch_input_motifs + ch_tfs + ch_taxon_id + + main: + ch_versions = Channel.empty() + + FETCH_JASPAR(ch_taxon_id) + + // ch_taxon_id and ch_input_motifs are mutually exclusive + ch_motifs = FETCH_JASPAR.out.motifs.mix(ch_input_motifs).first() + + CONVERT_TO_UNIVERSAL(ch_motifs + .map { motifs -> [[id: 'motifs'], motifs, motifs.extension] }, + "universal") + + ch_filtered = FILTER_MOTIFS(CONVERT_TO_UNIVERSAL.out.converted, ch_tfs) + .filtered.map{meta, motifs -> [meta, motifs, "universal"]} + + ch_versions = ch_versions.mix(FETCH_JASPAR.out.versions) + ch_versions = ch_versions.mix(CONVERT_TO_UNIVERSAL.out.versions) + ch_versions = ch_versions.mix(FILTER_MOTIFS.out.versions) + + CONVERT_TO_MEME(ch_filtered, "meme") + CONVERT_TO_TRANSFAC(ch_filtered, "transfac") + TRANSFAC_TO_PSEM(CONVERT_TO_TRANSFAC.out.converted) + + ch_versions = ch_versions.mix(CONVERT_TO_MEME.out.versions) + ch_versions = ch_versions.mix(CONVERT_TO_TRANSFAC.out.versions) + ch_versions = ch_versions.mix(TRANSFAC_TO_PSEM.out.versions) + + emit: + meme = CONVERT_TO_MEME.out.converted + psem = TRANSFAC_TO_PSEM.out.psem + + versions = ch_versions +} diff --git a/subworkflows/local/peaks.nf b/subworkflows/local/peaks.nf index e016c82..a78297d 100644 --- a/subworkflows/local/peaks.nf +++ b/subworkflows/local/peaks.nf @@ -1,7 +1,6 @@ // Modules include { GAWK as CLEAN_BED } from '../../modules/nf-core/gawk/main' include { BEDTOOLS_SORT as SORT_PEAKS } from '../../modules/nf-core/bedtools/sort/main' -include { FILTER_PWMS } from '../../modules/local/peaks/filter_pwms' include { STARE } from '../../modules/local/peaks/stare' include { AGGREGATE_SYNONYMS } from '../../modules/local/peaks/aggregate_synonyms/main' include { COMBINE_TABLES as AFFINITY_MEAN } from '../../modules/local/combine_tables/main' @@ -26,7 +25,6 @@ workflow PEAKS { decay merge_samples contrasts - tfs gene_map agg_method ch_samplesheet_bam @@ -74,14 +72,12 @@ workflow PEAKS { condition: meta.condition, assay: meta.assay], peaks]} - FILTER_PWMS(tfs, pwms) - STARE( ch_peaks, fasta, gtf, blacklist, - FILTER_PWMS.out.pwms.collect(), + pwms.collect(), window_size, decay ) @@ -132,7 +128,6 @@ workflow PEAKS { AFFINITY_SUM(ch_contrast_affinities, "sum") ch_versions = ch_versions.mix( - FILTER_PWMS.out.versions, STARE.out.versions, AGGREGATE_SYNONYMS.out.versions, AFFINITY_RATIO.out.versions, diff --git a/workflows/tfactivity.nf b/workflows/tfactivity.nf index 4f4693c..e48fd64 100644 --- a/workflows/tfactivity.nf +++ b/workflows/tfactivity.nf @@ -12,6 +12,7 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_tfac include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' include { COUNTS } from '../subworkflows/local/counts' +include { MOTIFS } from '../subworkflows/local/motifs' include { PEAKS } from '../subworkflows/local/peaks' include { DYNAMITE } from '../subworkflows/local/dynamite' include { RANKING } from '../subworkflows/local/ranking' @@ -28,27 +29,33 @@ workflow TFACTIVITY { take: ch_samplesheet // channel: samplesheet read in from --input + + // Genome fasta gtf blacklist - pwms + ch_motifs + ch_taxon_id gene_lengths gene_map - counts - extra_counts - counts_design - ch_samplesheet_bam chrom_sizes + + // ChromHMM + ch_samplesheet_bam chromhmm_states chromhmm_threshold chromhmm_marks + // Peaks window_size decay merge_samples affinity_agg_method // Counts + counts + extra_counts + counts_design min_count min_tpm expression_agg_method @@ -88,17 +95,22 @@ workflow TFACTIVITY { min_tpm_tf ) + MOTIFS( + ch_motifs, + COUNTS.out.tfs, + ch_taxon_id + ) + PEAKS( ch_samplesheet, fasta, gtf, blacklist, - pwms, + MOTIFS.out.psem, window_size, decay, merge_samples, ch_contrasts, - COUNTS.out.tfs, gene_map, affinity_agg_method, ch_samplesheet_bam, @@ -128,7 +140,7 @@ workflow TFACTIVITY { fasta, RANKING.out.tf_total_ranking, PEAKS.out.enhancers, - pwms + MOTIFS.out.meme, ) REPORT( @@ -139,6 +151,7 @@ workflow TFACTIVITY { ch_versions = ch_versions.mix( COUNTS.out.versions, + MOTIFS.out.versions, PEAKS.out.versions, DYNAMITE.out.versions, RANKING.out.versions,