Skip to content

Commit

Permalink
Merge commit 'a318b9e' into release-depositar
Browse files Browse the repository at this point in the history
  • Loading branch information
u10313335 committed Sep 26, 2024
2 parents 4951e8b + a318b9e commit 504873d
Show file tree
Hide file tree
Showing 13 changed files with 171 additions and 67 deletions.
6 changes: 4 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
repos:
# Autoformat: Python code, syntax patterns are modernized
- repo: https://github.com/asottile/pyupgrade
rev: v3.15.2
rev: v3.16.0
hooks:
- id: pyupgrade
args:
Expand Down Expand Up @@ -68,10 +68,12 @@ repos:
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: requirements-txt-fixer
# exclude ci/refreeze generated requirements.txt
exclude: ^.*images\/.*\/requirements\.txt$

# Lint: Python code
- repo: https://github.com/PyCQA/flake8
rev: "7.0.0"
rev: "7.1.0"
hooks:
- id: flake8

Expand Down
2 changes: 1 addition & 1 deletion binderhub/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ def _default_build_namespace(self):
return os.environ.get("BUILD_NAMESPACE", "default")

build_image = Unicode(
"quay.io/jupyterhub/repo2docker:2023.06.0",
"quay.io/jupyterhub/repo2docker:2024.07.0",
help="""
DEPRECATED: Use c.KubernetesBuildExecutor.build_image
Expand Down
2 changes: 1 addition & 1 deletion binderhub/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def _default_namespace(self):
return os.getenv("BUILD_NAMESPACE", "default")

build_image = Unicode(
"quay.io/jupyterhub/repo2docker:2023.06.0",
"quay.io/jupyterhub/repo2docker:2024.07.0",
help="Docker image containing repo2docker that is used to spawn the build pods.",
config=True,
)
Expand Down
2 changes: 1 addition & 1 deletion binderhub/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ async def get(self, provider_prefix, _unescaped_spec):
await self.emit(
{
"phase": "failed",
"message": f"Sorry, {spec} has been temporarily disabled from launching. Please contact admins for more info!",
"message": f"Sorry, {spec} is not allowed to launch. Please contact admins for more info!",
}
)
return
Expand Down
64 changes: 48 additions & 16 deletions binderhub/repoproviders.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import time
import urllib.parse
from datetime import datetime, timedelta, timezone
from urllib.parse import parse_qs, urlparse
from urllib.parse import parse_qs, urlencode, urlparse

import escapism
from prometheus_client import Gauge
Expand Down Expand Up @@ -67,11 +67,29 @@ class RepoProvider(LoggingConfigurable):
"""
)

allowed_specs = List(
help="""
List of specs to allow building.
Should be a list of regexes (not regex objects) that match specs which
should be allowed.
A spec is allowed if:
1. it matches allowed_specs and does not match banned_specs or
2. allowed_specs is unspecified and the spec does not match banned_specs.
""",
config=True,
)

banned_specs = List(
help="""
List of specs to blacklist building.
Should be a list of regexes (not regex objects) that match specs which should be blacklisted
A spec is allowed if:
1. it matches allowed_specs and does not match banned_specs or
2. allowed_specs is unspecified and the spec does not match banned_specs.
""",
config=True,
)
Expand Down Expand Up @@ -112,13 +130,22 @@ class RepoProvider(LoggingConfigurable):

def is_banned(self):
"""
Return true if the given spec has been banned
Return true if the given spec has been banned or explicitly
not allowed.
"""
for banned in self.banned_specs:
# Ignore case, because most git providers do not
# count DS-100/textbook as different from ds-100/textbook
if re.match(banned, self.spec, re.IGNORECASE):
return True
if self.allowed_specs and len(self.allowed_specs):
for allowed in self.allowed_specs:
if re.match(allowed, self.spec, re.IGNORECASE):
return False
# allowed_specs is not empty but spec is not in it: banned.
return True
# allowed_specs unspecified or empty and spec does not match
# banned_specs: not banned.
return False

def has_higher_quota(self):
Expand Down Expand Up @@ -471,30 +498,35 @@ def __init__(self, *args, **kwargs):
async def get_resolved_ref(self):
parsed_repo = urlparse(self.repo)

url_parts_1 = parsed_repo.path.split("/history/")
url_parts_2 = url_parts_1[0].split("/")
if url_parts_2[-2] == "dataset":
self.dataset_id = url_parts_2[-1]
else:
if "/dataset/" not in parsed_repo.path:
# Not actually a dataset
return None

api_url_path = "/api/3/action/"
# CKAN may be under a URL prefix, and we should accomodate that
url_prefix, dataset_url = parsed_repo.path.split("/dataset/")

dataset_url_parts = dataset_url.split("/")
self.dataset_id = dataset_url_parts[0]

api = parsed_repo._replace(
path="/".join(url_parts_2[:-2]) + api_url_path, query=""
path=f"{url_prefix}/api/3/action/", query=""
).geturl()

# handle the activites
# Activity ID may be present either as a query parameter, activity_id
# or as part of the URL, under `/history/<activity-id>`. If `/history/`
# is present, that takes precedence over `activity_id`
activity_id = None
if parse_qs(parsed_repo.query).get("activity_id") is not None:
if "history" in dataset_url_parts:
activity_id = dataset_url_parts[dataset_url_parts.index("history") + 1]
elif parse_qs(parsed_repo.query).get("activity_id") is not None:
activity_id = parse_qs(parsed_repo.query).get("activity_id")[0]
if len(url_parts_1) == 2:
activity_id = url_parts_1[-1]

if activity_id:
fetch_url = (
f"{api}activity_data_show?" f"id={activity_id}&object_type=package"
fetch_url = f"{api}activity_data_show?" + urlencode(
{"id": activity_id, "object_type": "package"}
)
else:
fetch_url = f"{api}package_show?id={self.dataset_id}"
fetch_url = f"{api}package_show?" + urlencode({"id": self.dataset_id})

client = AsyncHTTPClient()
try:
Expand Down
42 changes: 42 additions & 0 deletions binderhub/tests/test_repoproviders.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,12 +220,31 @@ async def test_dataverse(
"https://demo.ckan.org/dataset/sample-dataset-1",
"ckan-sample-dataset-1",
],
[
"https://demo.datashades.com/dataset/chart-test?activity_id=061888e9-e3c2-4769-b097-9c195a841e2f",
"https://demo.datashades.com/dataset/chart-test?activity_id=061888e9-e3c2-4769-b097-9c195a841e2f",
"chart-test.v1717501747",
"https://demo.datashades.com/dataset/chart-test?activity_id=061888e9-e3c2-4769-b097-9c195a841e2f",
"ckan-chart-test",
],
[
"https://demo.datashades.com/dataset/chart-test/history/061888e9-e3c2-4769-b097-9c195a841e2f",
"https://demo.datashades.com/dataset/chart-test/history/061888e9-e3c2-4769-b097-9c195a841e2f",
"chart-test.v1717501747",
"https://demo.datashades.com/dataset/chart-test/history/061888e9-e3c2-4769-b097-9c195a841e2f",
"ckan-chart-test",
],
["https://demo.ckan.org/group/roger", None, None, None, None],
["https://demo.ckan.org/dataset/nosuchdataset", None, None, None, None],
],
)
async def test_ckan(spec, resolved_spec, resolved_ref, resolved_ref_url, build_slug):
provider = CKANProvider(spec=spec)

ref = await provider.get_resolved_ref()
if not resolved_ref:
# We are done here if we don't expect to resolve
return
assert resolved_ref in ref

slug = provider.get_build_slug()
Expand Down Expand Up @@ -293,6 +312,29 @@ def test_banned():
assert provider.is_banned()


def test_allowed():
provider = GitHubRepoProvider(
spec="jupyterhub/zero-to-jupyterhub-k8s/v0.4", allowed_specs=["^jupyterhub.*"]
)
assert not provider.is_banned()


def test_not_allowed():
provider = GitHubRepoProvider(
spec="jupyterhub/zero-to-jupyterhub-k8s/v0.4", allowed_specs=["^yuvipanda.*"]
)
assert provider.is_banned()


def test_allowed_but_banned():
provider = GitHubRepoProvider(
spec="jupyterhub/zero-to-jupyterhub-k8s/v0.4",
allowed_specs=["^jupyterhub.*"],
banned_specs=[".*zero-to-.*"],
)
assert provider.is_banned()


def test_higher_quota():
provider = GitHubRepoProvider(
spec="jupyterhub/zero-to-jupyterhub-k8s/v0.4", high_quota_specs=["^yuvipanda.*"]
Expand Down
2 changes: 1 addition & 1 deletion ci/refreeze
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ docker run --rm \
--workdir=/io \
--user=root \
python:3.11-bullseye \
sh -c 'pip install pip-tools==6.* && pip-compile --upgrade helm-chart/images/binderhub/requirements.in'
sh -c 'pip install pip-tools==7.* && pip-compile --allow-unsafe --strip-extras --upgrade helm-chart/images/binderhub/requirements.in'
28 changes: 25 additions & 3 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
BinderHub API Documentation
===========================

Endpoint
--------
Endpoints
---------

`/build`
~~~~~~~~

There's one API endpoint, which is:
This is the main API endpoint, which is:

::

Expand Down Expand Up @@ -40,6 +43,25 @@ When the request is received, the following happens:
4. If the build succeeds, we contact the JupyterHub API and start
launching the server.

`/health`
~~~~~~~~~

This reports the health of BinderHub and the services it needs to run.

`/metrics`
~~~~~~~~~~

This reports the metrics for `Prometheus <https://prometheus.io/>`_.

`/versions`
~~~~~~~~~~~

This reports the version of BinderHub and the services it needs to run.

`/_config`
~~~~~~~~~~

This reports which :doc:`repository providers </reference/repoproviders>` are enabled.

Events
------
Expand Down
12 changes: 6 additions & 6 deletions docs/source/authentication.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ you need to add the following into ``config.yaml``:
jupyterhub:
cull:
# don't cull authenticated users
users: False
# don't cull authenticated users (reverts binderhub chart's default)
users: false
hub:
redirectToServer: false
config:
BinderSpawner:
auth_enabled: true
# specify the desired authenticator
JupyterHub:
redirect_to_server: false
# specify the desired authenticator
authenticator_class: <desired-authenticator>
# use config of your authenticator here
# use the docs at https://zero-to-jupyterhub.readthedocs.io/en/stable/authentication.html
Expand All @@ -39,10 +39,10 @@ you need to add the following into ``config.yaml``:
user:
scopes:
- self
- "access:services"
- "access:services!service=binder"
singleuser:
# to make notebook servers aware of hub
# make notebook servers aware of hub (reverts binderhub chart's default to z2jh chart's default)
cmd: jupyterhub-singleuser
If the configuration above was entered correctly, once you upgrade your
Expand Down
12 changes: 8 additions & 4 deletions helm-chart/binderhub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,12 @@ jupyterhub:
- binder
scopes:
- servers
# we don't need admin:users if auth is not enabled!
- "admin:users"
# admin:users is required in order to create a jupyterhub user for an
# anonymous binderhub web-server visitor in non-authenticated
# deployments, and read:users is required for authenticated
# deployments to check the state of a jupyterhub user's running
# servers before trying to launch.
- admin:users
extraConfig:
0-binderspawnermixin: |
"""
Expand Down Expand Up @@ -276,7 +280,7 @@ dind:
daemonset:
image:
name: docker.io/library/docker
tag: "26.1.1-dind" # ref: https://hub.docker.com/_/docker/tags
tag: "27.1.1-dind" # ref: https://hub.docker.com/_/docker/tags
pullPolicy: ""
pullSecrets: []
# Additional command line arguments to pass to dockerd
Expand All @@ -296,7 +300,7 @@ pink:
daemonset:
image:
name: quay.io/podman/stable
tag: "v5.0.2" # ref: https://quay.io/repository/podman/stable
tag: "v5.1.2" # ref: https://quay.io/repository/podman/stable
pullPolicy: ""
pullSecrets: []
lifecycle: {}
Expand Down
Loading

0 comments on commit 504873d

Please sign in to comment.