From ebfcedd4113f213aefdfd27916645657b2adb39a Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 29 Jul 2024 11:47:22 +0200 Subject: [PATCH] fix linter errors and restore lint_urls for lint_tool_dependencies_urls --- planemo/linters/biocontainer_registered.py | 3 +- planemo/linters/conda_requirements.py | 5 +-- planemo/shed_lint.py | 42 ++++++++++++++++++++-- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/planemo/linters/biocontainer_registered.py b/planemo/linters/biocontainer_registered.py index 01ac56349..a96381d88 100644 --- a/planemo/linters/biocontainer_registered.py +++ b/planemo/linters/biocontainer_registered.py @@ -1,6 +1,7 @@ """Ensure best-practice biocontainer registered for this tool.""" from typing import ( + List, Optional, TYPE_CHECKING, ) @@ -42,7 +43,7 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): name = mulled_container_name("biocontainers", targets) if not name: requirements_node = xml_node_from_toolsource(tool_source, "requirements") - lint_ctx.warn(MESSAGE_WARN_NO_CONTAINER, linter=cls.name(), node=requirements) + lint_ctx.warn(MESSAGE_WARN_NO_CONTAINER, linter=cls.name(), node=requirements_node) def mulled_container_name(namespace: str, targets: List[CondaTarget]) -> Optional[str]: diff --git a/planemo/linters/conda_requirements.py b/planemo/linters/conda_requirements.py index 3f13d7bd8..5836f7234 100644 --- a/planemo/linters/conda_requirements.py +++ b/planemo/linters/conda_requirements.py @@ -15,6 +15,7 @@ ) if TYPE_CHECKING: + from galaxy.tool_util.deps.conda_util import CondaTarget from galaxy.tool_util.lint import LintContext from galaxy.tool_util.parser.interface import ToolSource @@ -32,7 +33,7 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): if best_hit and exact: message = f"Requirement [{conda_target_str}] matches target in best practice Conda channel [{best_hit.get('channel')}]." requirements_node = xml_node_from_toolsource(tool_source, "requirements") - lint_ctx.info(message, linter=cls.name(), node=requirements_nodes) + lint_ctx.info(message, linter=cls.name(), node=requirements_node) class CondaRequirementInexact(Linter): @@ -63,7 +64,7 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): lint_ctx.warn(message, linter=cls.name(), node=requirements_node) -def _requirements_conda_targets(tool_source: "ToolSource") -> Generator[CondaTarget]: +def _requirements_conda_targets(tool_source: "ToolSource") -> Generator["CondaTarget"]: requirements, *_ = tool_source.parse_requirements_and_containers() for requirement in requirements: conda_target = requirement_to_conda_targets(requirement) diff --git a/planemo/shed_lint.py b/planemo/shed_lint.py index 62d2ecdcc..b68d981f2 100644 --- a/planemo/shed_lint.py +++ b/planemo/shed_lint.py @@ -3,7 +3,9 @@ import os import xml.etree.ElementTree as ET from typing import TYPE_CHECKING +from urllib.request import urlopen +import requests import yaml from galaxy.tool_util.lint import lint_tool_source_with from galaxy.tool_util.linters.help import rst_invalid @@ -17,6 +19,7 @@ ) from planemo.shed import ( CURRENT_CATEGORIES, + find_urls_for_xml, REPO_TYPE_SUITE, REPO_TYPE_TOOL_DEP, REPO_TYPE_UNRESTRICTED, @@ -188,8 +191,43 @@ def lint_readme(realized_repository, lint_ctx): def lint_tool_dependencies_urls(realized_repository, lint_ctx): - - + + def lint_urls(root, lint_ctx): + """Find referenced URLs and verify they are valid. + + note this function was used previously for tools (URLs in help) and tool dependency files + the former has been rewritten and therefore the function has been moved here + """ + urls, _ = find_urls_for_xml(root) + for url in urls: + is_valid = True + if url.startswith("http://") or url.startswith("https://"): + headers = None + r = None + try: + r = requests.get(url, headers=headers, stream=True) + r.raise_for_status() + next(r.iter_content(1000)) + except Exception as e: + if r is not None and r.status_code == 429: + # too many requests + pass + if r is not None and r.status_code in [403, 503] and "cloudflare" in r.text: + # CloudFlare protection block + pass + else: + is_valid = False + lint_ctx.error(f"Error '{e}' accessing {url}") + else: + try: + with urlopen(url) as handle: + handle.read(100) + except Exception as e: + is_valid = False + lint_ctx.error(f"Error '{e}' accessing {url}") + if is_valid: + lint_ctx.info("URL OK %s" % url) + path = realized_repository.real_path tool_dependencies = os.path.join(path, "tool_dependencies.xml") if not os.path.exists(tool_dependencies):