Skip to content

Commit

Permalink
Implement support for private repositories (#10)
Browse files Browse the repository at this point in the history
using token which is not stored in `.git`
  • Loading branch information
krassowski authored Jun 10, 2024
1 parent 2324e31 commit 57a8b07
Show file tree
Hide file tree
Showing 6 changed files with 146 additions and 51 deletions.
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,22 @@ c.GalleryManager.destination = "examples"
c.GalleryManager.exhibits = [
{
"git": "https://github.com/jupyterlab/jupyterlab.git",
"repository": "https://github.com/jupyterlab/jupyterlab/",
"homepage": "https://github.com/jupyterlab/jupyterlab/",
"title": "JupyterLab",
"description": "JupyterLab is a highly extensible, feature-rich notebook authoring application and editing environment.",
"icon": "https://raw.githubusercontent.com/jupyterlab/jupyterlab/main/packages/ui-components/style/icons/jupyter/jupyter.svg"
},
{
"git": "https://github.com/my_org/private-tutorial.git",
"account": "name-of-the-account-or-app-owning-the-token",
"token": "access-token-for-example-starting-with-github_pat_",
"title": "My private tutorial",
"description": "A tutorial which is not public.",
}
]
```

Using the Python file enables including the PAT access token in the `git` stanza (note: while the `git` value is never exposed to the user, the `repository` is and should not contain the secret if you do not want it to be shared with the users).
Using the Python file enables injecting the personal access token (PAT) into the `token` stanza if you prefer to store it in an environment variable rather than in the configuration file (recommended).

## Requirements

Expand Down
11 changes: 11 additions & 0 deletions jupyterlab_gallery/git_askpass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env python

import os
import sys

what = sys.argv[1].lower()

if "username" in what:
print(os.environ["GIT_PULLER_ACCOUNT"])
if "password" in what:
print(os.environ["GIT_PULLER_TOKEN"])
63 changes: 63 additions & 0 deletions jupyterlab_gallery/git_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from contextlib import contextmanager
from pathlib import Path
from subprocess import run
from typing import Optional
import re
import os


def extract_repository_owner(git_url: str) -> str:
fragments = git_url.strip("/").split("/")
return fragments[-2] if len(fragments) >= 2 else ""


def extract_repository_name(git_url: str) -> str:
fragment = git_url.split("/")[-1]
if fragment.endswith(".git"):
return fragment[:-4]
return fragment


def has_updates(repo_path: Path) -> bool:
try:
run(
"git fetch origin $(git branch --show-current) --quiet",
cwd=repo_path,
shell=True,
)
result = run(
"git status -b --porcelain -u n --ignored n",
cwd=repo_path,
capture_output=True,
shell=True,
)
except FileNotFoundError:
return False
data = re.match(
r"^## (.*?)( \[(ahead (?P<ahead>\d+))?(, )?(behind (?P<behind>\d+))?\])?$",
result.stdout.decode("utf-8"),
)
if not data:
return False
return data["behind"] is not None


@contextmanager
def git_credentials(token: Optional[str], account: Optional[str]):
if token and account:
try:
path = Path(__file__).parent
os.environ["GIT_ASKPASS"] = str(path / "git_askpass.py")
os.environ["GIT_PULLER_ACCOUNT"] = account
os.environ["GIT_PULLER_TOKEN"] = token
# do not prompt user if askpass fails as this would
# dead lock execution!
os.environ["GIT_TERMINAL_PROMPT"] = "0"
yield
finally:
del os.environ["GIT_PULLER_ACCOUNT"]
del os.environ["GIT_PULLER_TOKEN"]
del os.environ["GIT_TERMINAL_PROMPT"]
del os.environ["GIT_ASKPASS"]
else:
yield
45 changes: 37 additions & 8 deletions jupyterlab_gallery/gitpuller.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@
from queue import Queue, Empty
from collections import defaultdict
from numbers import Number

from typing import Optional

import git
from jupyter_server.base.handlers import JupyterHandler
from nbgitpuller.pull import GitPuller
from tornado.iostream import StreamClosedError

from .git_utils import git_credentials


class CloneProgress(git.RemoteProgress):
def __init__(self):
Expand Down Expand Up @@ -59,19 +61,32 @@ def update(self, op_code: int, cur_count, max_count=None, message=""):


class ProgressGitPuller(GitPuller):
def __init__(
self, git_url, repo_dir, token: Optional[str], account: Optional[str], **kwargs
):
self._token = token
self._account = account
# it will attempt to resolve default branch which requires credentials too
with git_credentials(token=self._token, account=self._account):
super().__init__(git_url, repo_dir, **kwargs)

def initialize_repo(self):
logging.info("Repo {} doesn't exist. Cloning...".format(self.repo_dir))
progress = CloneProgress()

def clone_task():
git.Repo.clone_from(
self.git_url, self.repo_dir, branch=self.branch_name, progress=progress
)
progress.queue.put(None)
with git_credentials(token=self._token, account=self._account):
git.Repo.clone_from(
self.git_url,
self.repo_dir,
branch=self.branch_name,
progress=progress,
)
progress.queue.put(None)

threading.Thread(target=clone_task).start()

timeout = 60
# TODO: add configurable timeout
# timeout = 60

while True:
item = progress.queue.get(True) # , timeout)
Expand All @@ -81,6 +96,10 @@ def clone_task():

logging.info("Repo {} initialized".format(self.repo_dir))

def update(self):
with git_credentials(token=self._token, account=self._account):
yield from super().update()


class SyncHandlerBase(JupyterHandler):
def __init__(self, *args, **kwargs):
Expand All @@ -107,7 +126,14 @@ def get_login_url(self):
def git_lock(self):
return self.settings["git_lock"]

async def _pull(self, repo: str, targetpath: str, exhibit_id: int):
async def _pull(
self,
repo: str,
targetpath: str,
exhibit_id: int,
token: Optional[str],
account: Optional[str],
):
q = self.settings["pull_status_queues"][exhibit_id]
try:
q.put_nowait({"phase": "waiting", "message": "Waiting for a git lock"})
Expand Down Expand Up @@ -147,6 +173,9 @@ async def _pull(self, repo: str, targetpath: str, exhibit_id: int):
branch=branch,
depth=depth,
parent=self.settings["nbapp"],
# our additions
token=token,
account=account,
)

def pull():
Expand Down
2 changes: 2 additions & 0 deletions jupyterlab_gallery/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ async def post(self):
return await super()._pull(
repo=exhibit["git"],
exhibit_id=exhibit_id,
account=exhibit.get("account"),
token=exhibit.get("token"),
# branch
# depth
targetpath=str(self.gallery_manager.get_local_path(exhibit)),
Expand Down
65 changes: 24 additions & 41 deletions jupyterlab_gallery/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,13 @@

from traitlets.config.configurable import LoggingConfigurable
from traitlets import Dict, List, Unicode
from subprocess import run
import re


def extract_repository_owner(git_url: str) -> str:
fragments = git_url.strip("/").split("/")
return fragments[-2] if len(fragments) >= 2 else ""


def extract_repository_name(git_url: str) -> str:
fragment = git_url.split("/")[-1]
if fragment.endswith(".git"):
return fragment[:-4]
return fragment


def has_updates(repo_path: Path) -> bool:
try:
result = run(
"git status -b --porcelain -u n --ignored n",
cwd=repo_path,
capture_output=True,
shell=True,
)
except FileNotFoundError:
return False
data = re.match(
r"^## (.*?)( \[(ahead (?P<ahead>\d+))?(, )?(behind (?P<behind>\d+))?\])?$",
result.stdout.decode("utf-8"),
)
if not data:
return False
return data["behind"] is not None
from .git_utils import (
extract_repository_owner,
extract_repository_name,
git_credentials,
has_updates,
)


class GalleryManager(LoggingConfigurable):
Expand All @@ -47,12 +21,16 @@ class GalleryManager(LoggingConfigurable):
exhibits = List(
Dict(
per_key_traits={
"git": Unicode(
help="Git URL used for cloning (can include branch, PAT) - not show to the user"
),
"repository": Unicode(help="User-facing URL of the repository"),
"git": Unicode(help="Git URL used for cloning"),
"homepage": Unicode(help="User-facing URL to open if any"),
"title": Unicode(help="Name of the exhibit"),
"description": Unicode(help="Short description"),
"token": Unicode(
help="Personal access token - required if the repository is private"
),
"account": Unicode(
help="Username or name of application - required if the repository is private"
),
# TODO: validate path exists
"icon": Unicode(help="Path to an svg or png, or base64 encoded string"),
# other ideas: `path_in_repository`, `documentation_url`
Expand All @@ -63,13 +41,13 @@ class GalleryManager(LoggingConfigurable):
default_value=[
{
"git": "https://github.com/nebari-dev/nebari.git",
"repository": "https://github.com/nebari-dev/nebari/",
"homepage": "https://github.com/nebari-dev/nebari/",
"title": "Nebari",
"description": "🪴 Nebari - your open source data science platform",
},
{
"git": "https://github.com/nebari-dev/nebari-docker-images.git",
"repository": "https://github.com/nebari-dev/nebari-docker-images/",
"homepage": "https://github.com/nebari-dev/nebari-docker-images/",
"title": "Nebari docker images",
"description": "Nebari Docker images",
},
Expand Down Expand Up @@ -97,9 +75,10 @@ def get_exhibit_data(self, exhibit):
data = {}

if "icon" not in exhibit:
if exhibit["repository"].startswith("https://github.com/"):
homepage = exhibit.get("homepage")
if homepage and homepage.startswith("https://github.com/"):
repository_name = extract_repository_name(exhibit["git"])
repository_owner = extract_repository_owner(exhibit["repository"])
repository_owner = extract_repository_owner(homepage)
data["icon"] = (
f"https://opengraph.githubassets.com/1/{repository_owner}/{repository_name}"
)
Expand All @@ -115,6 +94,10 @@ def get_exhibit_data(self, exhibit):
data["lastUpdated"] = datetime.fromtimestamp(
fetch_head.stat().st_mtime
).isoformat()
data["updatesAvailable"] = has_updates(local_path)
with git_credentials(
account=exhibit.get("account"), token=exhibit.get("token")
):
# TODO: this is blocking initial load; can we make it async?
data["updatesAvailable"] = has_updates(local_path)

return data

0 comments on commit 57a8b07

Please sign in to comment.