From 00a6fdbd87d65948e979ddeb25c6c28fc17ebe5b Mon Sep 17 00:00:00 2001 From: Arthur Deierlein Date: Tue, 15 Aug 2023 13:41:01 +0200 Subject: [PATCH] feat(api): use git for repos --- api/outdated/outdated/models.py | 52 +++++++++- api/outdated/outdated/views.py | 5 +- .../{outdated/synchroniser.py => parser.py} | 93 ++---------------- api/outdated/settings.py | 7 +- api/outdated/tracking.py | 94 +++++++++++++++++++ 5 files changed, 159 insertions(+), 92 deletions(-) rename api/outdated/{outdated/synchroniser.py => parser.py} (57%) create mode 100644 api/outdated/tracking.py diff --git a/api/outdated/outdated/models.py b/api/outdated/outdated/models.py index 32cd5b2d..e81fe5ad 100644 --- a/api/outdated/outdated/models.py +++ b/api/outdated/outdated/models.py @@ -1,10 +1,13 @@ from datetime import date, timedelta +from os.path import basename, dirname from django.db import models from django.db.models.functions import Lower +from django.dispatch import receiver from outdated.models import UniqueBooleanField, UUIDModel +from ..tracking import Tracker from ..user.models import User STATUS_OPTIONS = { @@ -108,7 +111,7 @@ class Meta: fields=["repo"], condition=models.Q(name__iexact=Lower("repo")), name="unique_repo", - ) + ), ] @property @@ -116,6 +119,28 @@ def status(self) -> str: first = self.versioned_dependencies.first() return first.release_version.status if first else STATUS_OPTIONS["undefined"] + @property + def clone_url(self): + # TODO: add logic for e.g. github access tokens + return self.repo + + @property + def repo_domain(self): + return dirname(dirname(self.repo)).split("//")[1] + + @property + def repo_namespace(self): + return basename(dirname(self.repo)) + + @property + def repo_name(self): + return basename(self.repo).replace(".git", "") + + @property + def clone_path(self): + ValueError + return f"{self.repo_domain}/{self.repo_namespace}/{self.repo_name}".lower() + def __str__(self): return self.name @@ -129,3 +154,28 @@ class Maintainer(UUIDModel): class Meta: unique_together = ("user", "project") + + +@receiver(models.signals.post_save, sender=Project) +def project_created(instance, created, **kwargs): + if not created: + return + Tracker(instance).setup() + + +@receiver(models.signals.pre_delete, sender=Project) +def project_deleted(instance, **kwargs): + Tracker(instance).delete() + + +@receiver(models.signals.pre_save, sender=Project) +def project_repo_updated(instance: Project, **kwargs): + try: + saved_instance = Project.objects.get(id=instance.pk) + except Project.DoesNotExist: + return + if saved_instance.clone_path == instance.clone_path: + return + Tracker(saved_instance).delete() + instance.versioned_dependencies.clear() + Tracker(instance).setup() diff --git a/api/outdated/outdated/views.py b/api/outdated/outdated/views.py index 574d01b4..ca6ff620 100644 --- a/api/outdated/outdated/views.py +++ b/api/outdated/outdated/views.py @@ -3,8 +3,9 @@ from rest_framework.response import Response from rest_framework.viewsets import ModelViewSet +from outdated.tracking import Tracker + from . import models, serializers -from .synchroniser import Synchroniser class ProjectViewSet(ModelViewSet): @@ -14,7 +15,7 @@ class ProjectViewSet(ModelViewSet): @action(detail=True, methods=["post"]) def sync(self, request, pk=None): try: - Synchroniser(self.get_object()).sync() + Tracker(self.get_object()).sync() except Exception as e: return Response( {"detail": f"Failed to sync project: {e}"}, diff --git a/api/outdated/outdated/synchroniser.py b/api/outdated/parser.py similarity index 57% rename from api/outdated/outdated/synchroniser.py rename to api/outdated/parser.py index 74850e35..45d3ed25 100644 --- a/api/outdated/outdated/synchroniser.py +++ b/api/outdated/parser.py @@ -1,5 +1,4 @@ from asyncio import gather, run, sleep -from datetime import datetime from os.path import basename from re import findall from tomllib import loads @@ -10,15 +9,7 @@ from django.conf import settings from semver import Version as SemVer -from . import models - -# from yaml import safe_load - - -NPM_FILES = ["yarn.lock", "pnpm-lock.yaml"] -PYPI_FILES = ["poetry.lock"] - -LOCK_FILES = [*NPM_FILES, *PYPI_FILES] +from .outdated import models def get_version(version: str) -> str: @@ -26,79 +17,6 @@ def get_version(version: str) -> str: return ".".join([*versions, *["0" for _ in range(3 - len(versions))]]) -class Synchroniser: - def __init__(self, project): - self.project = project - self.owner, self.name = findall(r"\/([^\/]+)\/([^\/]+)$", self.project.repo)[0] - - async def _get_dependencies(self): - """Get the dependencies from the lockfiles.""" - q = f""" - {{ - repository(owner: "{self.owner}", name: "{self.name}") {{ - dependencyGraphManifests {{ - nodes {{ - blobPath - }} - }} - }} - }} - """ - - async with ClientSession() as session: - async with session.post( - "https://api.github.com/graphql", - headers={ - "Authorization": f"Bearer {settings.GITHUB_API_TOKEN}", - "Accept": "application/vnd.github.hawkgirl-preview+json", - }, - json={"query": q}, - ) as response: - json = await response.json() - if json.get("message") == "Bad credentials": - raise ValueError("API Token is not set") # pragma: no cover - elif json.get("errors") and json["errors"][0]["message"] == "timedout": - return await self._get_dependencies() # pragma: no cover - elif json.get("errors"): - raise ValueError(json) # pragma: no cover - headers = response.headers - if headers.get("X-RateLimit-Remaining") == "0": # pragma: no cover - t = ( - int(headers.get("X-RateLimit-Reset")) - - int(datetime.utcnow().timestamp()) - ) / 1000 - print(f"Rate limit exceeded. Sleeping for {t} seconds.") - await sleep(t) - return await self._get_dependencies() - - lockfiles = [] - lockfile_tasks = [] - for lockfile in json["data"]["repository"]["dependencyGraphManifests"][ - "nodes" - ]: - if basename(lockfile["blobPath"]) in LOCK_FILES: - url = f"https://raw.githubusercontent.com/{lockfile['blobPath'].replace(f'blob/', f'')}" - lockfile_tasks.append(session.get(url)) - for lockfile_task in await gather(*lockfile_tasks): - lockfiles.append( - { - "name": basename(str(lockfile_task.url)), - "data": await lockfile_task.text(), - } - ) - - return await LockFileParser(lockfiles).parse() - - def sync(self): - """Sync the project with the remote project.""" - run(self.a_sync()) - - async def a_sync(self): - """Sync the project with the remote project.""" - dependencies = await self._get_dependencies() - await sync_to_async(self.project.versioned_dependencies.set)(dependencies) - - class LockFileParser: """Parse a lockfile and return a list of dependencies.""" @@ -107,7 +25,7 @@ def __init__(self, lockfiles: list[dict]): def _get_provider(self, name: str): """Get the provider of the lockfile.""" - if name in NPM_FILES: + if name in settings.NPM_FILES: return "NPM" return "PIP" @@ -164,11 +82,11 @@ async def _get_release_date(self, version): await sleep(1) return await self._get_release_date(version) - async def parse(self): + async def _parse(self): """Parse the lockfile and return a dictionary of dependencies.""" tasks = [] for lockfile in self.lockfiles: - name = lockfile["name"] + name = basename(lockfile["name"]) data = lockfile["data"] provider = self._get_provider(name) @@ -201,3 +119,6 @@ async def parse(self): ) return await gather(*tasks) + + def parse(self): + return run(self._parse()) diff --git a/api/outdated/settings.py b/api/outdated/settings.py index 06582c48..557b3e93 100644 --- a/api/outdated/settings.py +++ b/api/outdated/settings.py @@ -166,9 +166,6 @@ def default(default_dev=env.NOTSET, default_prod=env.NOTSET): JSON_API_FORMAT_TYPES = "dasherize" JSON_API_PLURALIZE_TYPES = True -# Github API -GITHUB_API_TOKEN = env.str("GITHUB_API_TOKEN") - # Syncproject settings TRACKED_DEPENDENCIES = env.list( "TRACKED_DEPENDENCIES", @@ -181,3 +178,7 @@ def default(default_dev=env.NOTSET, default_prod=env.NOTSET): "ember-cli", ], ) +NPM_FILES = ["yarn.lock", "pnpm-lock.yaml"] +PYPI_FILES = ["poetry.lock"] + +SUPPORTED_LOCK_FILES = [*NPM_FILES, *PYPI_FILES] diff --git a/api/outdated/tracking.py b/api/outdated/tracking.py new file mode 100644 index 00000000..43880236 --- /dev/null +++ b/api/outdated/tracking.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from os import path, walk +from subprocess import run +from typing import TYPE_CHECKING + +from django.conf import settings + +from outdated.parser import LockFileParser + +if TYPE_CHECKING: + from .outdated.models import Project + + +class RepoDoesNotExist(FileNotFoundError): + """Raise when repository is not locally saved.""" + + +class Tracker: + def __init__(self, project: Project): + self.project = project + self.local_path = f"/projects/{self.project.clone_path}" + + def _run(self, command, fail_without_local_copy=False): + if not self.has_local_copy and fail_without_local_copy: + raise RepoDoesNotExist( + f"Can't run {command} without local copy of {self.project.repo}" + ) + return run(command, cwd=self.repository_path, capture_output=True, shell=True) + + def clone(self, force=False): + if self.has_local_copy and not force: + return + if force: + self.delete() + self._run( + f"git clone -n --depth 1 --filter=tree:0 {self.project.clone_url} {self.project.clone_path}" + ) + self._run( + f"git sparse-checkout set --no-cone {' '.join(settings.SUPPORTED_LOCK_FILES)}" + ) + + def checkout(self): + return self._run("git checkout", True) + + def _get_lockfile(self, root, file): + file_path = path.join(root, file) + rel_file_path = path.relpath(file_path, self.local_path) + with open(file_path, "r") as file_content: + return {"name": rel_file_path, "data": file_content.read()} + + @property + def lockfiles(self): + if not self.has_local_copy: + raise RepoDoesNotExist( + f"Unable to retrieve lockfiles for {self.project.repo} because it is not saved locally." + ) + + lockfile_list = [] + for root, _, files in walk(self.local_path): + if ".git" in root: + continue + + lockfile_list.extend([self._get_lockfile(root, file) for file in files]) + + return lockfile_list + + @property + def has_local_copy(self): + return path.exists(self.local_path) + + @property + def repository_path(self): + return self.local_path if self.has_local_copy else "/projects/" + + @property + def has_changes(self): + self._run("git fetch", True) + result = self._run("git diff --quiet @{u}..") + return bool(result.returncode) + + def sync(self, only_on_change=False): + if not self.has_changes and only_on_change: + return + dependencies = LockFileParser(self.lockfiles).parse() + self.project.versioned_dependencies.set(dependencies) + + def setup(self): + self.clone() + self.checkout() + self.sync() + + def delete(self): + self._run(f"rm -rf /projects/{self.project.clone_path}")