Skip to content

Commit

Permalink
Add progress bar and fetch button (#9)
Browse files Browse the repository at this point in the history
* Implement progress bar and refresh button

* Bump version

* Fix for initial state

* Do not fail if stream was cloased

* Lint
  • Loading branch information
krassowski authored May 28, 2024
1 parent 4d009c2 commit 2324e31
Show file tree
Hide file tree
Showing 8 changed files with 262 additions and 50 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ c.GalleryManager.exhibits = [
"git": "https://github.com/jupyterlab/jupyterlab.git",
"repository": "https://github.com/jupyterlab/jupyterlab/",
"title": "JupyterLab",
"description": "JupyterLab",
"description": "JupyterLab is a highly extensible, feature-rich notebook authoring application and editing environment.",
"icon": "https://raw.githubusercontent.com/jupyterlab/jupyterlab/main/packages/ui-components/style/icons/jupyter/jupyter.svg"
}
]
Expand Down
81 changes: 75 additions & 6 deletions jupyterlab_gallery/gitpuller.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,79 @@
# - reconnecting to the event stream when refreshing the browser
# - handling multiple waiting pulls
from tornado import gen, web, locks
import logging
import traceback

import threading
import json
import os
from queue import Queue, Empty
from collections import defaultdict
from numbers import Number


import git
from jupyter_server.base.handlers import JupyterHandler
from nbgitpuller.pull import GitPuller
from tornado.iostream import StreamClosedError


class CloneProgress(git.RemoteProgress):
def __init__(self):
self.queue = Queue()
self.max_stage = 0.01
self.prev_stage = 0
super().__init__()

def update(self, op_code: int, cur_count, max_count=None, message=""):
if op_code & git.RemoteProgress.BEGIN:
new_stage = None
if op_code & git.RemoteProgress.COUNTING:
new_stage = 0.05
elif op_code & git.RemoteProgress.COMPRESSING:
new_stage = 0.10
elif op_code & git.RemoteProgress.RECEIVING:
new_stage = 0.90
elif op_code & git.RemoteProgress.RESOLVING:
new_stage = 1

if new_stage:
self.prev_stage = self.max_stage
self.max_stage = new_stage

if isinstance(cur_count, Number) and isinstance(max_count, Number):
self.queue.put(
{
"progress": self.prev_stage
+ cur_count / max_count * (self.max_stage - self.prev_stage),
"message": message,
}
)
# self.queue.join()


class ProgressGitPuller(GitPuller):
def initialize_repo(self):
logging.info("Repo {} doesn't exist. Cloning...".format(self.repo_dir))
progress = CloneProgress()

def clone_task():
git.Repo.clone_from(
self.git_url, self.repo_dir, branch=self.branch_name, progress=progress
)
progress.queue.put(None)

threading.Thread(target=clone_task).start()

timeout = 60

while True:
item = progress.queue.get(True) # , timeout)
if item is None:
break
yield item

logging.info("Repo {} initialized".format(self.repo_dir))


class SyncHandlerBase(JupyterHandler):
Expand Down Expand Up @@ -78,7 +141,7 @@ async def _pull(self, repo: str, targetpath: str, exhibit_id: int):
)
repo_dir = os.path.join(repo_parent_dir, targetpath or repo.split("/")[-1])

gp = GitPuller(
gp = ProgressGitPuller(
repo,
repo_dir,
branch=branch,
Expand All @@ -104,10 +167,6 @@ def pull():

async def emit(self, data: dict):
serialized_data = json.dumps(data)
if "output" in data:
self.log.info(data["output"])
else:
self.log.info(data)
self.write("data: {}\n\n".format(serialized_data))
await self.flush()

Expand Down Expand Up @@ -137,6 +196,12 @@ async def _stream(self):
if progress is None:
msg = {"phase": "finished", "exhibit_id": exhibit_id}
del self.settings["pull_status_queues"][exhibit_id]
elif isinstance(progress, dict):
msg = {
"output": progress,
"phase": "progress",
"exhibit_id": exhibit_id,
}
elif isinstance(progress, Exception):
msg = {
"phase": "error",
Expand All @@ -159,7 +224,11 @@ async def _stream(self):
}

self.last_message[exhibit_id] = msg
await self.emit(msg)
try:
await self.emit(msg)
except StreamClosedError:
self.log.warn("git puller stream got closed")
pass

if empty_queues == len(queues_view):
await gen.sleep(0.5)
51 changes: 32 additions & 19 deletions jupyterlab_gallery/manager.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from datetime import datetime
from pathlib import Path

from traitlets.config.configurable import LoggingConfigurable
from traitlets import Dict, List, Unicode
from subprocess import run
import re


def extract_repository_owner(git_url: str) -> str:
Expand All @@ -16,6 +19,25 @@ def extract_repository_name(git_url: str) -> str:
return fragment


def has_updates(repo_path: Path) -> bool:
try:
result = run(
"git status -b --porcelain -u n --ignored n",
cwd=repo_path,
capture_output=True,
shell=True,
)
except FileNotFoundError:
return False
data = re.match(
r"^## (.*?)( \[(ahead (?P<ahead>\d+))?(, )?(behind (?P<behind>\d+))?\])?$",
result.stdout.decode("utf-8"),
)
if not data:
return False
return data["behind"] is not None


class GalleryManager(LoggingConfigurable):
root_dir = Unicode(
config=False,
Expand Down Expand Up @@ -85,23 +107,14 @@ def get_exhibit_data(self, exhibit):
local_path = self.get_local_path(exhibit)

data["localPath"] = str(local_path)
data["revision"] = "2a2f2ee779ac21b70339da6551c2f6b0b00f6efe"
# timestamp from .git/FETCH_HEAD of the cloned repo
data["lastUpdated"] = "2024-05-01"
data["currentTag"] = "v3.2.4"
# the UI can show that there are X updates available; it could also show
# a summary of the commits available, or tags available; possibly the name
# of the most recent tag and would be sufficient over sending the list of commits,
# which can be long and delay the initialization.
data["updatesAvailable"] = False
data["isCloned"] = local_path.exists()
data["newestTag"] = "v3.2.5"
data["updates"] = [
{
"revision": "02f04c339f880540064d2223176830afdd02f5fa",
"title": "commit description",
"description": "long commit description",
"date": "date in format returned by git",
}
]
exists = local_path.exists()
data["isCloned"] = exists
if exists:
fetch_head = local_path / ".git" / "FETCH_HEAD"
if fetch_head.exists():
data["lastUpdated"] = datetime.fromtimestamp(
fetch_head.stat().st_mtime
).isoformat()
data["updatesAvailable"] = has_updates(local_path)

return data
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "jupyterlab-gallery",
"version": "0.1.2",
"version": "0.1.3",
"description": "A JupyterLab gallery extension for presenting and downloading examples from remote repositories",
"keywords": [
"jupyter",
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ classifiers = [
]
dependencies = [
"jupyter_server>=2.0.1,<3",
"nbgitpuller>=1.2.1"
"nbgitpuller>=1.2.1",
"GitPython>=3.1.43"
]
dynamic = ["version", "description", "authors", "urls", "keywords"]

Expand Down
Loading

0 comments on commit 2324e31

Please sign in to comment.