From 9c3137cea171136c32e8c1d66807a3214513e70b Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 1 Oct 2024 10:28:05 -0400 Subject: [PATCH 1/3] Fix commas that shouldn't be in biotools_mappings.tsv --- .../ontologies/biotools_mappings.tsv | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv b/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv index bb23dd9fc7cb..e3a8abedf495 100644 --- a/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv +++ b/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv @@ -965,23 +965,23 @@ stacks2_tsv2bam stacks stacks2_sstacks stacks BayeScan bayescan stacks2_cstacks stacks -NSPDK_candidateClust graphclust, GraphClust2 -structure_to_gspan graphclust, GraphClust2 -preproc graphclust, GraphClust3 -preMloc graphclust, GraphClust4 -nspdk_sparse graphclust, GraphClust5 -locarna_best_subtree graphclust, GraphClust6 -gspan graphclust, GraphClust7 -glob_report graphclust, GraphClust8 -cmFinder graphclust, GraphClust9 -graphclust_align_cluster graphclust, GraphClust10 -graphclust_aggregate_alignments graphclust, GraphClust11 -graphclust_glob_report_no_align graphclust, GraphClust12 -motifFinderPlot graphclust, GraphClust13 +NSPDK_candidateClust graphclust +structure_to_gspan graphclust +preproc graphclust +preMloc graphclust +nspdk_sparse graphclust +locarna_best_subtree graphclust +gspan graphclust +glob_report graphclust +cmFinder graphclust +graphclust_align_cluster graphclust +graphclust_aggregate_alignments graphclust +graphclust_glob_report_no_align graphclust +motifFinderPlot graphclust infernal_cmbuild infernal infernal_cmsearch infernal infernal_cmstat infernal -customize_metaphlan_database metaphlan, seqtk +customize_metaphlan_database metaphlan merge_metaphlan_tables metaphlan metabat2_jgi_summarize_bam_contig_depths MetaBAT_2 est_abundance bracken @@ -1898,10 +1898,10 @@ gatk_count_covariates gatk spring_model spring spring_minz spring spring_cross spring -pubmed_by_queries pubmed, simtext -text_to_wordmatrix pubmed, simtext -abstracts_by_pmids pubmed, simtext -pmids_to_pubtator_matrix pubmed, simtext +pubmed_by_queries pubmed +text_to_wordmatrix pubmed +abstracts_by_pmids pubmed +pmids_to_pubtator_matrix pubmed spring_model_all spring spring_map spring spring_mcc spring From beb4842b8d8ddaa25995425c3dbeb50acfde41de Mon Sep 17 00:00:00 2001 From: John Chilton Date: Wed, 2 Oct 2024 00:10:44 -0400 Subject: [PATCH 2/3] Allow multiple xrefs in biotools_mappings.tsv. --- .../ontologies/biotools_mappings.tsv | 2 +- .../tool_util/ontologies/ontology_data.py | 23 +++++++------- test/unit/tool_util/test_ontologies.py | 30 +++++++++++++++++++ 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv b/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv index e3a8abedf495..9cbb1f041360 100644 --- a/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv +++ b/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv @@ -44,6 +44,7 @@ gffcompare gffcompare sample_seqs biopython seq_filter_by_id biopython rxlr_motifs signalp +rxlr_motifs hmmer2 crossmap_bam crossmap samtools_bam_to_cram samtools crossmap_region crossmap @@ -260,7 +261,6 @@ fraggenescan fraggenescan repeatmodeler repeatmodeler promoter2 promoter Psortb psortb -rxlr_motifs signalp hmmer2 signalp3 signalp tmhmm2 tmhmm wolf_psort wolf_psort diff --git a/lib/galaxy/tool_util/ontologies/ontology_data.py b/lib/galaxy/tool_util/ontologies/ontology_data.py index 67822130e428..63a4d573b4d7 100644 --- a/lib/galaxy/tool_util/ontologies/ontology_data.py +++ b/lib/galaxy/tool_util/ontologies/ontology_data.py @@ -1,3 +1,4 @@ +from collections import defaultdict from typing import ( cast, Dict, @@ -34,13 +35,11 @@ def _read_ontology_data_text(filename: str) -> str: EDAM_TOPIC_MAPPING_FILENAME = "edam_topic_mappings.tsv" BIOTOOLS_MAPPING_CONTENT = _read_ontology_data_text(BIOTOOLS_MAPPING_FILENAME) -BIOTOOLS_MAPPING: Dict[str, str] = dict( - [ - cast(Tuple[str, str], tuple(x.split("\t"))) - for x in BIOTOOLS_MAPPING_CONTENT.splitlines() - if not x.startswith("#") - ] -) +BIOTOOLS_MAPPING: Dict[str, List[str]] = defaultdict(list) +for line in BIOTOOLS_MAPPING_CONTENT.splitlines(): + if not line.startswith("#"): + tool_id, xref = line.split("\t") + BIOTOOLS_MAPPING[tool_id].append(xref) EDAM_OPERATION_MAPPING_CONTENT = _read_ontology_data_text(EDAM_OPERATION_MAPPING_FILENAME) EDAM_OPERATION_MAPPING: Dict[str, List[str]] = _multi_dict_mapping(EDAM_OPERATION_MAPPING_CONTENT) @@ -61,11 +60,11 @@ def biotools_reference(xrefs): return None -def legacy_biotools_external_reference(all_ids: List[str]) -> Optional[str]: +def legacy_biotools_external_reference(all_ids: List[str]) -> List[str]: for tool_id in all_ids: if tool_id in BIOTOOLS_MAPPING: return BIOTOOLS_MAPPING[tool_id] - return None + return [] def expand_ontology_data( @@ -74,9 +73,9 @@ def expand_ontology_data( xrefs = tool_source.parse_xrefs() has_biotools_reference = any(x["reftype"] == "bio.tools" for x in xrefs) if not has_biotools_reference: - legacy_biotools_ref = legacy_biotools_external_reference(all_ids) - if legacy_biotools_ref is not None: - xrefs.append({"value": legacy_biotools_ref, "reftype": "bio.tools"}) + for legacy_biotools_ref in legacy_biotools_external_reference(all_ids): + if legacy_biotools_ref is not None: + xrefs.append({"value": legacy_biotools_ref, "reftype": "bio.tools"}) edam_operations = tool_source.parse_edam_operations() edam_topics = tool_source.parse_edam_topics() diff --git a/test/unit/tool_util/test_ontologies.py b/test/unit/tool_util/test_ontologies.py index 3991d358c3db..5c1faca4771f 100644 --- a/test/unit/tool_util/test_ontologies.py +++ b/test/unit/tool_util/test_ontologies.py @@ -46,6 +46,22 @@ type: integer """ +TOOL_YAML_NO_EXPLICIT_XREFS = """ +name: "Bowtie Mapper" +class: GalaxyTool +id: sort1 +version: 1.0.2 +description: "The Bowtie Mapper" +command: "bowtie --map-the-stuff" +outputs: + out1: + format: bam + from_work_dir: out1.bam +inputs: + - name: input1 + type: integer +""" + def test_parse_edam_empty(): test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_1) @@ -66,3 +82,17 @@ def test_parse_edam_mapping_operations_legacy(): ontology_data = expand_ontology_data(test_source, ["sort1"], None) assert ontology_data.edam_operations == ["operation_3802"] assert ontology_data.edam_topics == [] + + +def test_parse_biotools_default_mapping(): + test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_NO_EXPLICIT_XREFS) + ontology_data = expand_ontology_data(test_source, ["cheetah_problem_unbound_var_input"], None) + assert ontology_data.xrefs[0]["reftype"] == "bio.tools" + assert ontology_data.xrefs[0]["value"] == "bwa" + + test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_NO_EXPLICIT_XREFS) + ontology_data = expand_ontology_data(test_source, ["rxlr_motifs"], None) + assert len(ontology_data.xrefs) == 2 + values = [x["value"] for x in ontology_data.xrefs] + assert "signalp" in values + assert "hmmer2" in values From 24152ac979777c8f14ea4a49113b92cfd7fb60ae Mon Sep 17 00:00:00 2001 From: John Chilton Date: Wed, 2 Oct 2024 09:07:47 -0400 Subject: [PATCH 3/3] Fix another biotools entry. --- lib/galaxy/tool_util/ontologies/biotools_mappings.tsv | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv b/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv index 9cbb1f041360..edb75127595d 100644 --- a/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv +++ b/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv @@ -133,7 +133,6 @@ samtools_fixmate samtools samtool_filter2 samtools ngsutils_bam_filter ngsutils samtools_rmdup samtools -sam_merge2 Broken link in tool shed bamhash bamhash bedtools_annotatebed bedtools bedtools_bed12tobed6 bedtools