diff --git a/modules/local/xref_fetch/main.nf b/modules/local/xref_fetch/main.nf index 4f3f4be..1683878 100644 --- a/modules/local/xref_fetch/main.nf +++ b/modules/local/xref_fetch/main.nf @@ -32,7 +32,7 @@ process FILTER_AND_SPLIT { """ oma-build -vv filter-xref \\ --xref $xref \\ - --out-prefix ./xref-${source}- \\ + --out-prefix ./xref-${source} \\ --format $format \\ --gs-tsv $gs_tsv \\ --tax-sqlite $tax_sqlite @@ -65,7 +65,7 @@ process MAP_XREFS { --source $source \\ --gs-tsv $gs_tsv \\ --tax-sqlite $tax_sqlite \\ - --out xref.pkl \\ + --out xref-${source}.pkl \\ --db $db \\ --seq-idx-db $seq_idx_db \\ --xref-source-db $src_xref_db @@ -98,18 +98,18 @@ process COLLECT_XREFS { process COMBINE_ALL_XREFS { label "process_single" container "dessimozlab/omabuild:nf-latest" - tag "Collecting xrefs for $source" + tag "Combining all xrefs into single hdf5 db" input: path xref_dbs output: - tuple "XRef-db.h5", emit: xref_db_h5 + path("XRef-db.h5"), emit: xref_db_h5 script: """ oma-build -vv combine-xrefs \\ - --out "XRef-db.h5" + --out XRef-db.h5 \\ --xrefs $xref_dbs \\ """ -} \ No newline at end of file +} diff --git a/nextflow.config b/nextflow.config index 00b4f90..f5caf82 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,7 +15,7 @@ params { // domains directory known_domains = null cath_names_path = "http://download.cathdb.info/cath/releases/latest-release/cath-classification-data/cath-names.txt" - pfam_names_path = "ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.clans.tsv.gz" + pfam_names_path = "https://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.clans.tsv.gz" // xref paths xref_uniprot_swissprot = "https://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/uniprot_sprot.dat.gz" diff --git a/subworkflows/local/hdf5import/main.nf b/subworkflows/local/hdf5import/main.nf index fa93640..2e0119e 100644 --- a/subworkflows/local/hdf5import/main.nf +++ b/subworkflows/local/hdf5import/main.nf @@ -28,9 +28,10 @@ workflow IMPORT_HDF5 { pw_h5 = null } if (params.known_domains != null) { - domains = Channel.fromPath(params.known_domains).collect() + domains = Channel.fromPath("${params.known_domains}/*") cath_names = Channel.fromPath(params.cath_names_path) pfam_names = Channel.fromPath(params.pfam_names_path) + ADD_DOMAINS(ADD_GENOMES.out.db_h5, domains, cath_names, pfam_names) domains_h5 = ADD_DOMAINS.out.domains_h5 } else { diff --git a/subworkflows/local/xrefs/main.nf b/subworkflows/local/xrefs/main.nf index 32c9d38..8d7f5ef 100644 --- a/subworkflows/local/xrefs/main.nf +++ b/subworkflows/local/xrefs/main.nf @@ -63,7 +63,7 @@ workflow MAP_XREFS_WF { MAP_XREFS(map_xref_params, taxonomy_sqlite, tax_traverse_pkl) grouped_by_source = MAP_XREFS.out.matched_xrefs .groupTuple() - .map { source, map_resList, format, xrefList -> [source, map_resList, format, xrefList.flatten()] + .map { source, map_resList, format, xrefList -> [source, map_resList, format[0], xrefList.flatten()] } COLLECT_XREFS(grouped_by_source) xref_dbs_list = COLLECT_XREFS.out.xref_by_source_h5 .map{ source, db -> db}