Skip to content

Commit

Permalink
Merge pull request #18919 from jmchilton/biotools
Browse files Browse the repository at this point in the history
Fix commas that shouldn't be in biotools_mappings.tsv
  • Loading branch information
bgruening authored Oct 4, 2024
2 parents 2e9ca39 + 24152ac commit d18bc68
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 32 deletions.
39 changes: 19 additions & 20 deletions lib/galaxy/tool_util/ontologies/biotools_mappings.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ gffcompare gffcompare
sample_seqs biopython
seq_filter_by_id biopython
rxlr_motifs signalp
rxlr_motifs hmmer2
crossmap_bam crossmap
samtools_bam_to_cram samtools
crossmap_region crossmap
Expand Down Expand Up @@ -132,7 +133,6 @@ samtools_fixmate samtools
samtool_filter2 samtools
ngsutils_bam_filter ngsutils
samtools_rmdup samtools
sam_merge2 Broken link in tool shed
bamhash bamhash
bedtools_annotatebed bedtools
bedtools_bed12tobed6 bedtools
Expand Down Expand Up @@ -260,7 +260,6 @@ fraggenescan fraggenescan
repeatmodeler repeatmodeler
promoter2 promoter
Psortb psortb
rxlr_motifs signalp hmmer2
signalp3 signalp
tmhmm2 tmhmm
wolf_psort wolf_psort
Expand Down Expand Up @@ -965,23 +964,23 @@ stacks2_tsv2bam stacks
stacks2_sstacks stacks
BayeScan bayescan
stacks2_cstacks stacks
NSPDK_candidateClust graphclust, GraphClust2
structure_to_gspan graphclust, GraphClust2
preproc graphclust, GraphClust3
preMloc graphclust, GraphClust4
nspdk_sparse graphclust, GraphClust5
locarna_best_subtree graphclust, GraphClust6
gspan graphclust, GraphClust7
glob_report graphclust, GraphClust8
cmFinder graphclust, GraphClust9
graphclust_align_cluster graphclust, GraphClust10
graphclust_aggregate_alignments graphclust, GraphClust11
graphclust_glob_report_no_align graphclust, GraphClust12
motifFinderPlot graphclust, GraphClust13
NSPDK_candidateClust graphclust
structure_to_gspan graphclust
preproc graphclust
preMloc graphclust
nspdk_sparse graphclust
locarna_best_subtree graphclust
gspan graphclust
glob_report graphclust
cmFinder graphclust
graphclust_align_cluster graphclust
graphclust_aggregate_alignments graphclust
graphclust_glob_report_no_align graphclust
motifFinderPlot graphclust
infernal_cmbuild infernal
infernal_cmsearch infernal
infernal_cmstat infernal
customize_metaphlan_database metaphlan, seqtk
customize_metaphlan_database metaphlan
merge_metaphlan_tables metaphlan
metabat2_jgi_summarize_bam_contig_depths MetaBAT_2
est_abundance bracken
Expand Down Expand Up @@ -1898,10 +1897,10 @@ gatk_count_covariates gatk
spring_model spring
spring_minz spring
spring_cross spring
pubmed_by_queries pubmed, simtext
text_to_wordmatrix pubmed, simtext
abstracts_by_pmids pubmed, simtext
pmids_to_pubtator_matrix pubmed, simtext
pubmed_by_queries pubmed
text_to_wordmatrix pubmed
abstracts_by_pmids pubmed
pmids_to_pubtator_matrix pubmed
spring_model_all spring
spring_map spring
spring_mcc spring
Expand Down
23 changes: 11 additions & 12 deletions lib/galaxy/tool_util/ontologies/ontology_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import defaultdict
from typing import (
cast,
Dict,
Expand Down Expand Up @@ -34,13 +35,11 @@ def _read_ontology_data_text(filename: str) -> str:
EDAM_TOPIC_MAPPING_FILENAME = "edam_topic_mappings.tsv"

BIOTOOLS_MAPPING_CONTENT = _read_ontology_data_text(BIOTOOLS_MAPPING_FILENAME)
BIOTOOLS_MAPPING: Dict[str, str] = dict(
[
cast(Tuple[str, str], tuple(x.split("\t")))
for x in BIOTOOLS_MAPPING_CONTENT.splitlines()
if not x.startswith("#")
]
)
BIOTOOLS_MAPPING: Dict[str, List[str]] = defaultdict(list)
for line in BIOTOOLS_MAPPING_CONTENT.splitlines():
if not line.startswith("#"):
tool_id, xref = line.split("\t")
BIOTOOLS_MAPPING[tool_id].append(xref)
EDAM_OPERATION_MAPPING_CONTENT = _read_ontology_data_text(EDAM_OPERATION_MAPPING_FILENAME)
EDAM_OPERATION_MAPPING: Dict[str, List[str]] = _multi_dict_mapping(EDAM_OPERATION_MAPPING_CONTENT)

Expand All @@ -61,11 +60,11 @@ def biotools_reference(xrefs):
return None


def legacy_biotools_external_reference(all_ids: List[str]) -> Optional[str]:
def legacy_biotools_external_reference(all_ids: List[str]) -> List[str]:
for tool_id in all_ids:
if tool_id in BIOTOOLS_MAPPING:
return BIOTOOLS_MAPPING[tool_id]
return None
return []


def expand_ontology_data(
Expand All @@ -74,9 +73,9 @@ def expand_ontology_data(
xrefs = tool_source.parse_xrefs()
has_biotools_reference = any(x["reftype"] == "bio.tools" for x in xrefs)
if not has_biotools_reference:
legacy_biotools_ref = legacy_biotools_external_reference(all_ids)
if legacy_biotools_ref is not None:
xrefs.append({"value": legacy_biotools_ref, "reftype": "bio.tools"})
for legacy_biotools_ref in legacy_biotools_external_reference(all_ids):
if legacy_biotools_ref is not None:
xrefs.append({"value": legacy_biotools_ref, "reftype": "bio.tools"})

edam_operations = tool_source.parse_edam_operations()
edam_topics = tool_source.parse_edam_topics()
Expand Down
30 changes: 30 additions & 0 deletions test/unit/tool_util/test_ontologies.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,22 @@
type: integer
"""

TOOL_YAML_NO_EXPLICIT_XREFS = """
name: "Bowtie Mapper"
class: GalaxyTool
id: sort1
version: 1.0.2
description: "The Bowtie Mapper"
command: "bowtie --map-the-stuff"
outputs:
out1:
format: bam
from_work_dir: out1.bam
inputs:
- name: input1
type: integer
"""


def test_parse_edam_empty():
test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_1)
Expand All @@ -66,3 +82,17 @@ def test_parse_edam_mapping_operations_legacy():
ontology_data = expand_ontology_data(test_source, ["sort1"], None)
assert ontology_data.edam_operations == ["operation_3802"]
assert ontology_data.edam_topics == []


def test_parse_biotools_default_mapping():
test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_NO_EXPLICIT_XREFS)
ontology_data = expand_ontology_data(test_source, ["cheetah_problem_unbound_var_input"], None)
assert ontology_data.xrefs[0]["reftype"] == "bio.tools"
assert ontology_data.xrefs[0]["value"] == "bwa"

test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_NO_EXPLICIT_XREFS)
ontology_data = expand_ontology_data(test_source, ["rxlr_motifs"], None)
assert len(ontology_data.xrefs) == 2
values = [x["value"] for x in ontology_data.xrefs]
assert "signalp" in values
assert "hmmer2" in values

0 comments on commit d18bc68

Please sign in to comment.