diff --git a/src/downloader/download.py b/src/downloader/download.py index f5174bd..da898f7 100644 --- a/src/downloader/download.py +++ b/src/downloader/download.py @@ -134,18 +134,23 @@ def download_action_or_reusable_workflow(uses_string: str, repo: str) -> None: is not None ): return - - url = get_repository_reusable_workflow(absolute_path) + workflow_path, tag = ( + absolute_path.split("@") + if uses_string_obj.ref + else (absolute_path, None) + ) + url = get_repository_reusable_workflow(workflow_path, tag) elif uses_string_obj.type == UsesStringType.ACTION: # If already scanned action if sets_db.exists_in_set(Config.action_download_history_set, absolute_path): return - # TODO: Make pretier - if uses_string_obj.ref is None: - url = get_repository_composite_action(absolute_path, None) - else: - url = get_repository_composite_action(*absolute_path.split("@")) + action_path, tag = ( + absolute_path.split("@") + if uses_string_obj.ref + else (absolute_path, None) + ) + url = get_repository_composite_action(action_path, tag) else: # Can happen with docker references. return diff --git a/src/downloader/gh_api.py b/src/downloader/gh_api.py index c73da79..8d29a52 100644 --- a/src/downloader/gh_api.py +++ b/src/downloader/gh_api.py @@ -1,7 +1,7 @@ import os import urllib from requests import get -from typing import Dict, Any, Optional, Iterator, Optional +from typing import Dict, Any, List, Optional, Iterator from http import HTTPStatus from src.config.config import Config import src.logger.log as log @@ -34,6 +34,7 @@ REPOSITORY_QUERY_MIN = "stars:>={min_stars}" REPOSITORY_QUERY_MIN_MAX = "stars:{min_stars}..{max_stars}" +ACTION_SUFFIXES = ["action.yml", "action.yaml"] headers = { "Accept": "application/vnd.github+json", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.42", @@ -193,41 +194,58 @@ def get_repository_workflows(repo: str) -> Dict[str, str]: return workflows -def get_repository_composite_action(path: str, tag: Optional[bool]) -> str: +def get_ref_of_tag(repo: str, tag: str) -> Optional[str]: """ - Retrieves the downloadable URL for a specific composite action located at the given path. + This function is used to get the ref of a tag in a repository. + If the tag is not found, it will return the provided tag as is since it might be a branch name that should be used as tag + """ + r_tags = get(TAGS_URL.format(repo_path=repo), headers=headers) + if r_tags.status_code != 200: + log.error( + f"Coudln't found tags for repository {repo}. status code: {r_tags.status_code}. Response: {r_tags.text}" + ) + return + + shas = [d["commit"]["sha"] for d in r_tags.json() if d["name"] == tag] + + # Sometimes the tag is not found, but the provided reference is actually a branch name. + return shas[0] if len(shas) > 0 else tag + + +def get_download_url( + path: str, + file_suffixes: Optional[List[str]] = None, + tag: Optional[str] = None, +) -> Optional[str]: + """ + Retrieves the downloadable URL for a GitHub resource located at the given path. Parameters: - - path (str): The repository path containing the action, in the format "owner/repo/relative_path_to_action". - - tag (Optional[bool]): The version tag of the action. If None, the latest version is used. + - path (str): The repository path containing the resource, formatted as "owner/repo/relative_path_to_resource". + - tag (Optional[str]): The version tag of the resource. If None, the latest version is used. + - file_suffixes (List[str]): List of possible file suffixes that the resource could have (e.g., ["action.yml", "action.yaml"]). Returns: - - str: The downloadable URL for the action. Returns None if the action is not found, or if a network error occurs. + - Optional[str]: The downloadable URL for the resource. Returns None if the resource is not found or if a network error occurs. Raises: - - Logs an error message if the request to GitHub API fails or if tags are not found in the repository. + - Logs an error message if the request to the GitHub API fails. """ splitted_path = path.split("/") repo, relative_path = "/".join(splitted_path[:2]), "/".join(splitted_path[2:]) headers["Authorization"] = f"Token {Config.github_token}" - if tag is not None: - r_tags = get(TAGS_URL.format(repo_path=repo), headers=headers) - if r_tags.status_code != 200: - log.error( - f"Coudln't found tags for repository {repo}. status code: {r_tags.status_code}. Response: {r_tags.text}" - ) - return - - shas = [d["commit"]["sha"] for d in r_tags.json() if d["name"] == tag] + # Get ref of commit if tag is provided + ref = get_ref_of_tag(repo, tag) if tag else None - # Sometimes the tag is not found, but the provided reference is actually a branch name. - ref = shas[0] if len(shas) > 0 else tag - - for suffix in ["action.yml", "action.yaml"]: - file_path = os.path.join(relative_path, suffix) + files_to_try = ( + [os.path.join(relative_path, fs) for fs in file_suffixes] + if file_suffixes + else [relative_path] + ) + for file_path in files_to_try: # If we have a tag, we need to use the contents by ref API to get the correct version of the action. # Otherwise, we can use the normal contents API action_download_url = ( @@ -238,7 +256,6 @@ def get_repository_composite_action(path: str, tag: Optional[bool]) -> str: r = get(action_download_url, headers=headers) if r.status_code == 404: - # can be both yml and yaml continue if r.status_code != 200: @@ -248,25 +265,15 @@ def get_repository_composite_action(path: str, tag: Optional[bool]) -> str: return r.json()["download_url"] -def get_repository_reusable_workflow(path: str) -> str: - """Returns downlodable URL for a reusable workflows in the specific path. - - Raises exception if network error occured. +def get_repository_composite_action(path: str, tag: Optional[str]) -> str: """ - path_splitted = path.split("/") - repo = "/".join(path_splitted[:2]) - relative_path = "/".join(path_splitted[2:]) - - headers["Authorization"] = f"Token {Config.github_token}" + Retrieves the downloadable URL for a specific composite action located at the given path. + """ + return get_download_url(path, tag=tag, file_suffixes=ACTION_SUFFIXES) - r = get( - CONTENTS_URL.format(repo_path=repo, file_path=relative_path), - headers=headers, - ) - if r.status_code == 404: - return - if r.status_code != 200: - log.error(f"status code: {r.status_code}. Response: {r.text}") - return - return r.json()["download_url"] +def get_repository_reusable_workflow(path: str, tag: str) -> str: + """ + Retrieves the downloadable URL for a specific reusable workflow located at the given path. + """ + return get_download_url(path, tag=tag, file_suffixes=[]) diff --git a/src/indexer/index.py b/src/indexer/index.py index 24ee022..4b58241 100644 --- a/src/indexer/index.py +++ b/src/indexer/index.py @@ -132,6 +132,7 @@ def index_workflow_file(workflow: str) -> None: return obj["path"] = workflow + obj["tag"] = workflow.split("@")[-1] if "@" in workflow else None obj["url"] = url Config.graph.push_object(Workflow.from_dict(obj)) diff --git a/src/workflow_components/workflow.py b/src/workflow_components/workflow.py index 7ce94a4..1b4b9ca 100644 --- a/src/workflow_components/workflow.py +++ b/src/workflow_components/workflow.py @@ -47,6 +47,7 @@ class Step(GraphObject): ref = Property() with_prop = Property("with") url = Property() + tag = Property() action = RelatedTo("src.workflow_components.composite_action.CompositeAction") reusable_workflow = RelatedTo("Workflow") @@ -61,6 +62,9 @@ def __init__(self, _id: str, name: Optional[str], path: str): def from_dict(obj_dict) -> "Step": s = Step(_id=obj_dict["_id"], name=obj_dict.get("name"), path=obj_dict["path"]) s.url = obj_dict["url"] + + if "tag" in obj_dict: + s.tag = obj_dict["tag"] if "run" in obj_dict: s.run = obj_dict["run"] @@ -102,6 +106,7 @@ class Job(GraphObject): ref = Property() url = Property() with_prop = Property("with") + tag = Property() steps = RelatedTo(Step) reusable_workflow = RelatedTo("Workflow") @@ -114,6 +119,11 @@ def __init__(self, _id: str, name: str, path: str): @staticmethod def from_dict(obj_dict) -> "Job": j = Job(_id=obj_dict["_id"], name=obj_dict["name"], path=obj_dict["path"]) + + # Optional fields + if "tag" in obj_dict: + j.tag = obj_dict["tag"] + if "uses" in obj_dict: j.uses = obj_dict["uses"] # Uses string is quite complex, and may reference to several types of nodes. @@ -140,6 +150,7 @@ def from_dict(obj_dict) -> "Job": step["_id"] = md5(f"{j._id}_{i}".encode()).hexdigest() step["path"] = j.path step["url"] = j.url + step["tag"] = j.tag j.steps.add(Step.from_dict(step)) return j @@ -154,6 +165,7 @@ class Workflow(GraphObject): trigger = Property() permissions = Property() url = Property() + tag = Property() jobs = RelatedTo(Job) triggered_by = RelatedFrom("Workflow") @@ -203,6 +215,9 @@ def from_dict(obj_dict: Dict[str, Any]) -> "Workflow": w.trigger = trigger w.url = obj_dict["url"] + if "tag" in obj_dict: + w.tag = obj_dict["tag"] + if "permissions" in obj_dict: w.permissions = convert_dict_to_list(obj_dict["permissions"]) @@ -214,6 +229,7 @@ def from_dict(obj_dict: Dict[str, Any]) -> "Workflow": job["path"] = w.path job["name"] = job_name job["url"] = w.url + job["tag"] = w.tag w.jobs.add(Job.from_dict(job)) return w