diff --git a/modules/local/xref_fetch/main.nf b/modules/local/xref_fetch/main.nf index 3cb7a72..9f28e5b 100644 --- a/modules/local/xref_fetch/main.nf +++ b/modules/local/xref_fetch/main.nf @@ -41,7 +41,7 @@ process FILTER_AND_SPLIT { process MAP_XREFS { - label process_single + label "process_single" container "dessimozlab/omabuild:nf-latest" input: @@ -68,4 +68,4 @@ process MAP_XREFS { --seq-idx-db $seq_idx_db \\ --xref-source-db $src_xref_db """ -} \ No newline at end of file +} diff --git a/subworkflows/local/hdf5import/main.nf b/subworkflows/local/hdf5import/main.nf index f17e5a7..3c0a2a4 100644 --- a/subworkflows/local/hdf5import/main.nf +++ b/subworkflows/local/hdf5import/main.nf @@ -2,7 +2,7 @@ // Modules include { ADD_GENOMES; BUILD_SEQINDEX; BUILD_HOG_H5; ADD_PAIRWISE_ORTHOLOGS; ADD_DOMAINS; COMBINE_H5_FILES } from "./../../../modules/local/hdf5import" -include { PREPARE_XREFS, MAP_XREFS_WF } from "./../xrefs" +include { PREPARE_XREFS; MAP_XREFS_WF } from "./../xrefs" workflow IMPORT_HDF5 { @@ -43,7 +43,7 @@ workflow IMPORT_HDF5 { PREPARE_XREFS(gs_tsv, genomes_folder, uniprot_swissprot, uniprot_trembl) MAP_XREFS_WF(PREPARE_XREFS.out.xref, gs_tsv, - genomes_folder + genomes_folder, COMBINE_H5_FILES.out.db_h5, BUILD_SEQINDEX.out.seqidx_h5, ADD_GENOMES.out.source_xref_h5) diff --git a/subworkflows/local/xrefs/main.nf b/subworkflows/local/xrefs/main.nf index d39b167..f47eb65 100644 --- a/subworkflows/local/xrefs/main.nf +++ b/subworkflows/local/xrefs/main.nf @@ -13,18 +13,19 @@ workflow PREPARE_XREFS { // Transform swissprot and trembl channels into tuples def swissprot_channel = uniprot_swissprot.map { path -> tuple(path, 'swiss', 'swissprot') } def trembl_channel = uniprot_trembl.map { path -> tuple(path, 'swiss', 'trembl') } - def refseq_channel = FETCH_REFSEQ().out.refseq_proteins.map{ path -> tuple(path, 'genbank', 'refseq') } + FETCH_REFSEQ() + def refseq_channel = FETCH_REFSEQ.out.refseq_proteins.map{ path -> tuple(path, 'genbank', 'refseq') } // Concatenate the three channels def xref_channel = swissprot_channel.concat(trembl_channel, refseq_channel) xref_channel.view() - def taxonomy_sqlite = genome_folder / "taxonomy.sqlite" + def taxonomy_sqlite = genome_folder / "taxonomy.sqlite" def tax_traverse_pkl = genome_folder / "taxonomy.sqlite.traverse.pkl" - FILTER_AND_SPLIT(up_channel, gs_tsv, taxonomy_sqlite, tax_traverse_pkl) + FILTER_AND_SPLIT(xref_channel, gs_tsv, taxonomy_sqlite, tax_traverse_pkl) // debug output - FILTER_AND_SPLIT.out.split_xref.view() + FILTER_AND_SPLIT.out.split_xref.view() emit: xref = FILTER_AND_SPLIT.out.split_xref @@ -35,11 +36,11 @@ workflow PREPARE_XREFS { workflow MAP_XREFS_WF { take: - xref, - gs_tsv, + xref + gs_tsv genome_folder - db, - seq_idx_db, + db + seq_idx_db source_xref_db main: @@ -50,3 +51,13 @@ workflow MAP_XREFS_WF { emit: xref_db = MAP_XREFS.out.xref_h5 } + +workflow { + def gs_tsv = Channel.fromPath("/cluster/scratch/adriaal/nf-oma-work/c9/4450a31daf64694adf62909bace554/gs.tsv") + def genomes_folder = file("/cluster/scratch/adriaal/OMA/genomes1/") + def uniprot_swissprot = Channel.fromPath(params.xref_uniprot_swissprot) + def uniprot_trembl = Channel.fromPath(params.xref_uniprot_trembl) + + PREPARE_XREFS(gs_tsv, genomes_folder, uniprot_swissprot, uniprot_trembl) + //MAP_XREFS_WF(PREPARE_XREFS.out.xref, gs_tsv, genome_folder) +}