Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix commas that shouldn't be in biotools_mappings.tsv #18919

Merged
merged 3 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 19 additions & 20 deletions lib/galaxy/tool_util/ontologies/biotools_mappings.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ gffcompare gffcompare
sample_seqs biopython
seq_filter_by_id biopython
rxlr_motifs signalp
rxlr_motifs hmmer2
crossmap_bam crossmap
samtools_bam_to_cram samtools
crossmap_region crossmap
Expand Down Expand Up @@ -132,7 +133,6 @@ samtools_fixmate samtools
samtool_filter2 samtools
ngsutils_bam_filter ngsutils
samtools_rmdup samtools
sam_merge2 Broken link in tool shed
bamhash bamhash
bedtools_annotatebed bedtools
bedtools_bed12tobed6 bedtools
Expand Down Expand Up @@ -260,7 +260,6 @@ fraggenescan fraggenescan
repeatmodeler repeatmodeler
promoter2 promoter
Psortb psortb
rxlr_motifs signalp hmmer2
signalp3 signalp
tmhmm2 tmhmm
wolf_psort wolf_psort
Expand Down Expand Up @@ -965,23 +964,23 @@ stacks2_tsv2bam stacks
stacks2_sstacks stacks
BayeScan bayescan
stacks2_cstacks stacks
NSPDK_candidateClust graphclust, GraphClust2
structure_to_gspan graphclust, GraphClust2
preproc graphclust, GraphClust3
preMloc graphclust, GraphClust4
nspdk_sparse graphclust, GraphClust5
locarna_best_subtree graphclust, GraphClust6
gspan graphclust, GraphClust7
glob_report graphclust, GraphClust8
cmFinder graphclust, GraphClust9
graphclust_align_cluster graphclust, GraphClust10
graphclust_aggregate_alignments graphclust, GraphClust11
graphclust_glob_report_no_align graphclust, GraphClust12
motifFinderPlot graphclust, GraphClust13
NSPDK_candidateClust graphclust
structure_to_gspan graphclust
preproc graphclust
preMloc graphclust
nspdk_sparse graphclust
locarna_best_subtree graphclust
gspan graphclust
glob_report graphclust
cmFinder graphclust
graphclust_align_cluster graphclust
graphclust_aggregate_alignments graphclust
graphclust_glob_report_no_align graphclust
motifFinderPlot graphclust
infernal_cmbuild infernal
infernal_cmsearch infernal
infernal_cmstat infernal
customize_metaphlan_database metaphlan, seqtk
customize_metaphlan_database metaphlan
merge_metaphlan_tables metaphlan
metabat2_jgi_summarize_bam_contig_depths MetaBAT_2
est_abundance bracken
Expand Down Expand Up @@ -1898,10 +1897,10 @@ gatk_count_covariates gatk
spring_model spring
spring_minz spring
spring_cross spring
pubmed_by_queries pubmed, simtext
text_to_wordmatrix pubmed, simtext
abstracts_by_pmids pubmed, simtext
pmids_to_pubtator_matrix pubmed, simtext
pubmed_by_queries pubmed
text_to_wordmatrix pubmed
abstracts_by_pmids pubmed
pmids_to_pubtator_matrix pubmed
spring_model_all spring
spring_map spring
spring_mcc spring
Expand Down
23 changes: 11 additions & 12 deletions lib/galaxy/tool_util/ontologies/ontology_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import defaultdict
from typing import (
cast,
Dict,
Expand Down Expand Up @@ -34,13 +35,11 @@ def _read_ontology_data_text(filename: str) -> str:
EDAM_TOPIC_MAPPING_FILENAME = "edam_topic_mappings.tsv"

BIOTOOLS_MAPPING_CONTENT = _read_ontology_data_text(BIOTOOLS_MAPPING_FILENAME)
BIOTOOLS_MAPPING: Dict[str, str] = dict(
[
cast(Tuple[str, str], tuple(x.split("\t")))
for x in BIOTOOLS_MAPPING_CONTENT.splitlines()
if not x.startswith("#")
]
)
BIOTOOLS_MAPPING: Dict[str, List[str]] = defaultdict(list)
for line in BIOTOOLS_MAPPING_CONTENT.splitlines():
if not line.startswith("#"):
tool_id, xref = line.split("\t")
BIOTOOLS_MAPPING[tool_id].append(xref)
EDAM_OPERATION_MAPPING_CONTENT = _read_ontology_data_text(EDAM_OPERATION_MAPPING_FILENAME)
EDAM_OPERATION_MAPPING: Dict[str, List[str]] = _multi_dict_mapping(EDAM_OPERATION_MAPPING_CONTENT)

Expand All @@ -61,11 +60,11 @@ def biotools_reference(xrefs):
return None


def legacy_biotools_external_reference(all_ids: List[str]) -> Optional[str]:
def legacy_biotools_external_reference(all_ids: List[str]) -> List[str]:
for tool_id in all_ids:
if tool_id in BIOTOOLS_MAPPING:
return BIOTOOLS_MAPPING[tool_id]
return None
return []


def expand_ontology_data(
Expand All @@ -74,9 +73,9 @@ def expand_ontology_data(
xrefs = tool_source.parse_xrefs()
has_biotools_reference = any(x["reftype"] == "bio.tools" for x in xrefs)
if not has_biotools_reference:
legacy_biotools_ref = legacy_biotools_external_reference(all_ids)
if legacy_biotools_ref is not None:
xrefs.append({"value": legacy_biotools_ref, "reftype": "bio.tools"})
for legacy_biotools_ref in legacy_biotools_external_reference(all_ids):
if legacy_biotools_ref is not None:
xrefs.append({"value": legacy_biotools_ref, "reftype": "bio.tools"})

edam_operations = tool_source.parse_edam_operations()
edam_topics = tool_source.parse_edam_topics()
Expand Down
30 changes: 30 additions & 0 deletions test/unit/tool_util/test_ontologies.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,22 @@
type: integer
"""

TOOL_YAML_NO_EXPLICIT_XREFS = """
name: "Bowtie Mapper"
class: GalaxyTool
id: sort1
version: 1.0.2
description: "The Bowtie Mapper"
command: "bowtie --map-the-stuff"
outputs:
out1:
format: bam
from_work_dir: out1.bam
inputs:
- name: input1
type: integer
"""


def test_parse_edam_empty():
test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_1)
Expand All @@ -66,3 +82,17 @@ def test_parse_edam_mapping_operations_legacy():
ontology_data = expand_ontology_data(test_source, ["sort1"], None)
assert ontology_data.edam_operations == ["operation_3802"]
assert ontology_data.edam_topics == []


def test_parse_biotools_default_mapping():
test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_NO_EXPLICIT_XREFS)
ontology_data = expand_ontology_data(test_source, ["cheetah_problem_unbound_var_input"], None)
assert ontology_data.xrefs[0]["reftype"] == "bio.tools"
assert ontology_data.xrefs[0]["value"] == "bwa"

test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_NO_EXPLICIT_XREFS)
ontology_data = expand_ontology_data(test_source, ["rxlr_motifs"], None)
assert len(ontology_data.xrefs) == 2
values = [x["value"] for x in ontology_data.xrefs]
assert "signalp" in values
assert "hmmer2" in values
Loading