From 57a8b075dc1cc119d7721ada3ce08422717c79a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Krassowski?= <5832902+krassowski@users.noreply.github.com> Date: Mon, 10 Jun 2024 08:41:43 +0100 Subject: [PATCH] Implement support for private repositories (#10) using token which is not stored in `.git` --- README.md | 11 +++++- jupyterlab_gallery/git_askpass.py | 11 ++++++ jupyterlab_gallery/git_utils.py | 63 ++++++++++++++++++++++++++++++ jupyterlab_gallery/gitpuller.py | 45 +++++++++++++++++---- jupyterlab_gallery/handlers.py | 2 + jupyterlab_gallery/manager.py | 65 ++++++++++++------------------- 6 files changed, 146 insertions(+), 51 deletions(-) create mode 100755 jupyterlab_gallery/git_askpass.py create mode 100644 jupyterlab_gallery/git_utils.py diff --git a/README.md b/README.md index 6330be9..1ed9836 100644 --- a/README.md +++ b/README.md @@ -32,15 +32,22 @@ c.GalleryManager.destination = "examples" c.GalleryManager.exhibits = [ { "git": "https://github.com/jupyterlab/jupyterlab.git", - "repository": "https://github.com/jupyterlab/jupyterlab/", + "homepage": "https://github.com/jupyterlab/jupyterlab/", "title": "JupyterLab", "description": "JupyterLab is a highly extensible, feature-rich notebook authoring application and editing environment.", "icon": "https://raw.githubusercontent.com/jupyterlab/jupyterlab/main/packages/ui-components/style/icons/jupyter/jupyter.svg" + }, + { + "git": "https://github.com/my_org/private-tutorial.git", + "account": "name-of-the-account-or-app-owning-the-token", + "token": "access-token-for-example-starting-with-github_pat_", + "title": "My private tutorial", + "description": "A tutorial which is not public.", } ] ``` -Using the Python file enables including the PAT access token in the `git` stanza (note: while the `git` value is never exposed to the user, the `repository` is and should not contain the secret if you do not want it to be shared with the users). +Using the Python file enables injecting the personal access token (PAT) into the `token` stanza if you prefer to store it in an environment variable rather than in the configuration file (recommended). ## Requirements diff --git a/jupyterlab_gallery/git_askpass.py b/jupyterlab_gallery/git_askpass.py new file mode 100755 index 0000000..71c3f43 --- /dev/null +++ b/jupyterlab_gallery/git_askpass.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +import os +import sys + +what = sys.argv[1].lower() + +if "username" in what: + print(os.environ["GIT_PULLER_ACCOUNT"]) +if "password" in what: + print(os.environ["GIT_PULLER_TOKEN"]) diff --git a/jupyterlab_gallery/git_utils.py b/jupyterlab_gallery/git_utils.py new file mode 100644 index 0000000..faa2a32 --- /dev/null +++ b/jupyterlab_gallery/git_utils.py @@ -0,0 +1,63 @@ +from contextlib import contextmanager +from pathlib import Path +from subprocess import run +from typing import Optional +import re +import os + + +def extract_repository_owner(git_url: str) -> str: + fragments = git_url.strip("/").split("/") + return fragments[-2] if len(fragments) >= 2 else "" + + +def extract_repository_name(git_url: str) -> str: + fragment = git_url.split("/")[-1] + if fragment.endswith(".git"): + return fragment[:-4] + return fragment + + +def has_updates(repo_path: Path) -> bool: + try: + run( + "git fetch origin $(git branch --show-current) --quiet", + cwd=repo_path, + shell=True, + ) + result = run( + "git status -b --porcelain -u n --ignored n", + cwd=repo_path, + capture_output=True, + shell=True, + ) + except FileNotFoundError: + return False + data = re.match( + r"^## (.*?)( \[(ahead (?P\d+))?(, )?(behind (?P\d+))?\])?$", + result.stdout.decode("utf-8"), + ) + if not data: + return False + return data["behind"] is not None + + +@contextmanager +def git_credentials(token: Optional[str], account: Optional[str]): + if token and account: + try: + path = Path(__file__).parent + os.environ["GIT_ASKPASS"] = str(path / "git_askpass.py") + os.environ["GIT_PULLER_ACCOUNT"] = account + os.environ["GIT_PULLER_TOKEN"] = token + # do not prompt user if askpass fails as this would + # dead lock execution! + os.environ["GIT_TERMINAL_PROMPT"] = "0" + yield + finally: + del os.environ["GIT_PULLER_ACCOUNT"] + del os.environ["GIT_PULLER_TOKEN"] + del os.environ["GIT_TERMINAL_PROMPT"] + del os.environ["GIT_ASKPASS"] + else: + yield diff --git a/jupyterlab_gallery/gitpuller.py b/jupyterlab_gallery/gitpuller.py index 908770e..3899cad 100644 --- a/jupyterlab_gallery/gitpuller.py +++ b/jupyterlab_gallery/gitpuller.py @@ -16,13 +16,15 @@ from queue import Queue, Empty from collections import defaultdict from numbers import Number - +from typing import Optional import git from jupyter_server.base.handlers import JupyterHandler from nbgitpuller.pull import GitPuller from tornado.iostream import StreamClosedError +from .git_utils import git_credentials + class CloneProgress(git.RemoteProgress): def __init__(self): @@ -59,19 +61,32 @@ def update(self, op_code: int, cur_count, max_count=None, message=""): class ProgressGitPuller(GitPuller): + def __init__( + self, git_url, repo_dir, token: Optional[str], account: Optional[str], **kwargs + ): + self._token = token + self._account = account + # it will attempt to resolve default branch which requires credentials too + with git_credentials(token=self._token, account=self._account): + super().__init__(git_url, repo_dir, **kwargs) + def initialize_repo(self): logging.info("Repo {} doesn't exist. Cloning...".format(self.repo_dir)) progress = CloneProgress() def clone_task(): - git.Repo.clone_from( - self.git_url, self.repo_dir, branch=self.branch_name, progress=progress - ) - progress.queue.put(None) + with git_credentials(token=self._token, account=self._account): + git.Repo.clone_from( + self.git_url, + self.repo_dir, + branch=self.branch_name, + progress=progress, + ) + progress.queue.put(None) threading.Thread(target=clone_task).start() - - timeout = 60 + # TODO: add configurable timeout + # timeout = 60 while True: item = progress.queue.get(True) # , timeout) @@ -81,6 +96,10 @@ def clone_task(): logging.info("Repo {} initialized".format(self.repo_dir)) + def update(self): + with git_credentials(token=self._token, account=self._account): + yield from super().update() + class SyncHandlerBase(JupyterHandler): def __init__(self, *args, **kwargs): @@ -107,7 +126,14 @@ def get_login_url(self): def git_lock(self): return self.settings["git_lock"] - async def _pull(self, repo: str, targetpath: str, exhibit_id: int): + async def _pull( + self, + repo: str, + targetpath: str, + exhibit_id: int, + token: Optional[str], + account: Optional[str], + ): q = self.settings["pull_status_queues"][exhibit_id] try: q.put_nowait({"phase": "waiting", "message": "Waiting for a git lock"}) @@ -147,6 +173,9 @@ async def _pull(self, repo: str, targetpath: str, exhibit_id: int): branch=branch, depth=depth, parent=self.settings["nbapp"], + # our additions + token=token, + account=account, ) def pull(): diff --git a/jupyterlab_gallery/handlers.py b/jupyterlab_gallery/handlers.py index 0cd57c6..9985457 100644 --- a/jupyterlab_gallery/handlers.py +++ b/jupyterlab_gallery/handlers.py @@ -66,6 +66,8 @@ async def post(self): return await super()._pull( repo=exhibit["git"], exhibit_id=exhibit_id, + account=exhibit.get("account"), + token=exhibit.get("token"), # branch # depth targetpath=str(self.gallery_manager.get_local_path(exhibit)), diff --git a/jupyterlab_gallery/manager.py b/jupyterlab_gallery/manager.py index fc7b809..b8730c0 100644 --- a/jupyterlab_gallery/manager.py +++ b/jupyterlab_gallery/manager.py @@ -3,39 +3,13 @@ from traitlets.config.configurable import LoggingConfigurable from traitlets import Dict, List, Unicode -from subprocess import run -import re - -def extract_repository_owner(git_url: str) -> str: - fragments = git_url.strip("/").split("/") - return fragments[-2] if len(fragments) >= 2 else "" - - -def extract_repository_name(git_url: str) -> str: - fragment = git_url.split("/")[-1] - if fragment.endswith(".git"): - return fragment[:-4] - return fragment - - -def has_updates(repo_path: Path) -> bool: - try: - result = run( - "git status -b --porcelain -u n --ignored n", - cwd=repo_path, - capture_output=True, - shell=True, - ) - except FileNotFoundError: - return False - data = re.match( - r"^## (.*?)( \[(ahead (?P\d+))?(, )?(behind (?P\d+))?\])?$", - result.stdout.decode("utf-8"), - ) - if not data: - return False - return data["behind"] is not None +from .git_utils import ( + extract_repository_owner, + extract_repository_name, + git_credentials, + has_updates, +) class GalleryManager(LoggingConfigurable): @@ -47,12 +21,16 @@ class GalleryManager(LoggingConfigurable): exhibits = List( Dict( per_key_traits={ - "git": Unicode( - help="Git URL used for cloning (can include branch, PAT) - not show to the user" - ), - "repository": Unicode(help="User-facing URL of the repository"), + "git": Unicode(help="Git URL used for cloning"), + "homepage": Unicode(help="User-facing URL to open if any"), "title": Unicode(help="Name of the exhibit"), "description": Unicode(help="Short description"), + "token": Unicode( + help="Personal access token - required if the repository is private" + ), + "account": Unicode( + help="Username or name of application - required if the repository is private" + ), # TODO: validate path exists "icon": Unicode(help="Path to an svg or png, or base64 encoded string"), # other ideas: `path_in_repository`, `documentation_url` @@ -63,13 +41,13 @@ class GalleryManager(LoggingConfigurable): default_value=[ { "git": "https://github.com/nebari-dev/nebari.git", - "repository": "https://github.com/nebari-dev/nebari/", + "homepage": "https://github.com/nebari-dev/nebari/", "title": "Nebari", "description": "🪴 Nebari - your open source data science platform", }, { "git": "https://github.com/nebari-dev/nebari-docker-images.git", - "repository": "https://github.com/nebari-dev/nebari-docker-images/", + "homepage": "https://github.com/nebari-dev/nebari-docker-images/", "title": "Nebari docker images", "description": "Nebari Docker images", }, @@ -97,9 +75,10 @@ def get_exhibit_data(self, exhibit): data = {} if "icon" not in exhibit: - if exhibit["repository"].startswith("https://github.com/"): + homepage = exhibit.get("homepage") + if homepage and homepage.startswith("https://github.com/"): repository_name = extract_repository_name(exhibit["git"]) - repository_owner = extract_repository_owner(exhibit["repository"]) + repository_owner = extract_repository_owner(homepage) data["icon"] = ( f"https://opengraph.githubassets.com/1/{repository_owner}/{repository_name}" ) @@ -115,6 +94,10 @@ def get_exhibit_data(self, exhibit): data["lastUpdated"] = datetime.fromtimestamp( fetch_head.stat().st_mtime ).isoformat() - data["updatesAvailable"] = has_updates(local_path) + with git_credentials( + account=exhibit.get("account"), token=exhibit.get("token") + ): + # TODO: this is blocking initial load; can we make it async? + data["updatesAvailable"] = has_updates(local_path) return data