From 332c57a7abdd4bf73846107ce5e33fb219660e28 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 29 Jul 2024 11:47:22 +0200 Subject: [PATCH] fix linter errors and restore lint_urls for lint_tool_dependencies_urls --- planemo/linters/biocontainer_registered.py | 3 +- planemo/linters/conda_requirements.py | 5 ++- planemo/shed_lint.py | 51 +++++++++++++++++++++- 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/planemo/linters/biocontainer_registered.py b/planemo/linters/biocontainer_registered.py index 01ac56349..a96381d88 100644 --- a/planemo/linters/biocontainer_registered.py +++ b/planemo/linters/biocontainer_registered.py @@ -1,6 +1,7 @@ """Ensure best-practice biocontainer registered for this tool.""" from typing import ( + List, Optional, TYPE_CHECKING, ) @@ -42,7 +43,7 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): name = mulled_container_name("biocontainers", targets) if not name: requirements_node = xml_node_from_toolsource(tool_source, "requirements") - lint_ctx.warn(MESSAGE_WARN_NO_CONTAINER, linter=cls.name(), node=requirements) + lint_ctx.warn(MESSAGE_WARN_NO_CONTAINER, linter=cls.name(), node=requirements_node) def mulled_container_name(namespace: str, targets: List[CondaTarget]) -> Optional[str]: diff --git a/planemo/linters/conda_requirements.py b/planemo/linters/conda_requirements.py index 3f13d7bd8..5836f7234 100644 --- a/planemo/linters/conda_requirements.py +++ b/planemo/linters/conda_requirements.py @@ -15,6 +15,7 @@ ) if TYPE_CHECKING: + from galaxy.tool_util.deps.conda_util import CondaTarget from galaxy.tool_util.lint import LintContext from galaxy.tool_util.parser.interface import ToolSource @@ -32,7 +33,7 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): if best_hit and exact: message = f"Requirement [{conda_target_str}] matches target in best practice Conda channel [{best_hit.get('channel')}]." requirements_node = xml_node_from_toolsource(tool_source, "requirements") - lint_ctx.info(message, linter=cls.name(), node=requirements_nodes) + lint_ctx.info(message, linter=cls.name(), node=requirements_node) class CondaRequirementInexact(Linter): @@ -63,7 +64,7 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): lint_ctx.warn(message, linter=cls.name(), node=requirements_node) -def _requirements_conda_targets(tool_source: "ToolSource") -> Generator[CondaTarget]: +def _requirements_conda_targets(tool_source: "ToolSource") -> Generator["CondaTarget"]: requirements, *_ = tool_source.parse_requirements_and_containers() for requirement in requirements: conda_target = requirement_to_conda_targets(requirement) diff --git a/planemo/shed_lint.py b/planemo/shed_lint.py index 62d2ecdcc..2e63401f3 100644 --- a/planemo/shed_lint.py +++ b/planemo/shed_lint.py @@ -188,8 +188,55 @@ def lint_readme(realized_repository, lint_ctx): def lint_tool_dependencies_urls(realized_repository, lint_ctx): - - + + def lint_urls(root, lint_ctx): + """Find referenced URLs and verify they are valid. + + note this function was used previously for tools (URLs in help) and tool dependency files + the former has been rewritten and therefore the function has been moved here + """ + urls, docs = find_urls_for_xml(root) + + # This is from Google Chome on macOS, current at time of writing: + BROWSER_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36" + + def validate_url(url, lint_ctx, user_agent=None): + is_valid = True + if url.startswith("http://") or url.startswith("https://"): + if user_agent: + headers = {"User-Agent": user_agent, "Accept": "*/*"} + else: + headers = None + r = None + try: + r = requests.get(url, headers=headers, stream=True) + r.raise_for_status() + next(r.iter_content(1000)) + except Exception as e: + if r is not None and r.status_code == 429: + # too many requests + pass + if r is not None and r.status_code in [403, 503] and "cloudflare" in r.text: + # CloudFlare protection block + pass + else: + is_valid = False + lint_ctx.error(f"Error '{e}' accessing {url}") + else: + try: + with urlopen(url) as handle: + handle.read(100) + except Exception as e: + is_valid = False + lint_ctx.error(f"Error '{e}' accessing {url}") + if is_valid: + lint_ctx.info("URL OK %s" % url) + + for url in urls: + validate_url(url, lint_ctx) + for url in docs: + validate_url(url, lint_ctx, BROWSER_USER_AGENT) + path = realized_repository.real_path tool_dependencies = os.path.join(path, "tool_dependencies.xml") if not os.path.exists(tool_dependencies):