Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Count repos across federation members in quota #1461

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ jobs:
steps:
- uses: actions/checkout@v2

- name: install requirements
run: pip install ruamel.yaml

- name: check embedded chart code
run: ./ci/check_embedded_chart_code.py

Expand Down
90 changes: 90 additions & 0 deletions binderhub/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
import logging
import os
import random
import re
import secrets
from binascii import a2b_hex
Expand Down Expand Up @@ -612,6 +613,41 @@ def _default_build_token_secret(self):
"""
)

federation_id = Unicode(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it feels a bit weird that we are now encoding the notion of a "federation" in BinderHub itself - do we imagine any other usecase than mybinder.org where a user would want this? Or are we OK with designing BinderHub features that are just for mybinder.org?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the main difference between the two - this PR requires BinderHubs to 'pull' quotas and know about its peers, whereas #1460 uses a 'push' model, which works if we tell all the instances to push their quota consumption to the same place.

I can also adjust #1460 to change less - keep the concurrent-session quota counting instead of also switching to rate limiting. But then frankly, we are starting to reimplement a KV store, and should probably deploy etcd/memcache/redis to handle that.

config=True,
help="""
My id in the federation.

Used to exclude myself when checking federation members,
so all federation members can share the same federation config.
""",
)
federation_members = Dict(
key_trait=Unicode(),
value_trait=Unicode(),
config=True,
help="""
Dict of "federation-id": "url" for federation members.

Used for federation-wide application of quotas.
""",
)
federation_check_seconds = Integer(
180,
config=True,
help="""
Interval (in seconds) on which to check the health of other federtion members.
""",
)
federation_status = Dict(
key_trait=Unicode(),
value_trait=Dict(),
help="""status dict of federation members

Includes current counts of repos
"""
)

ban_networks = Dict(
config=True,
help="""
Expand Down Expand Up @@ -812,6 +848,7 @@ def initialize(self, *args, **kwargs):
"auth_enabled": self.auth_enabled,
"event_log": self.event_log,
"normalized_origin": self.normalized_origin,
"federation_status": self.federation_status,
}
)
if self.auth_enabled:
Expand Down Expand Up @@ -874,6 +911,56 @@ def stop(self):
self.http_server.stop()
self.build_pool.shutdown()

def watch_federation(self):
"""Watch federation members for their health, repo counts"""
if not self.federation_members:
# nothing to do
return
if not self.federation_id:
self.log.warning(
"BinderHub.federation_id not set, I don't know who I am in the federation!"
)

for federation_id, url in self.federation_members.items():
if federation_id != self.federation_id:
health_url = url_path_join(url, "health")
self.log.info(
f"Watching federation member {federation_id} at {health_url}"
)
asyncio.ensure_future(
self.watch_federation_member(federation_id, health_url)
)

async def watch_federation_member(self, federation_id, health_url):
"""Long-running coroutine for checking health of one federation member"""
while True:
repo_counts = {}
try:
self.log.debug(
f"Checking federation member {federation_id} at {health_url}"
)
response = await AsyncHTTPClient().fetch(health_url)
status = json.loads(response.body)
for check in status["checks"]:
if check.get("service") == "Pod quota":
print(check)
total = check["total_pods"]
quota = check["quota"]
self.log.debug(f"Federation member {federation_id} running {total}/{quota} pods")
repo_counts = check.get("repo_counts", {})
break
else:
self.log.warning(f"No Pod quota check for {federation_id}: {status}")
except Exception as e:
self.log.error(f"Error checking federation member {federation_id}: {e}")

self.federation_status[federation_id] = {
"repo_counts": repo_counts,
}
# add some jitter, +/- 10%
t = self.federation_check_seconds * (0.9 + 0.2 * random.random())
await asyncio.sleep(t)

async def watch_build_pods(self):
"""Watch build pods

Expand Down Expand Up @@ -905,6 +992,9 @@ def start(self, run_loop=True):
self.http_server.listen(self.port)
if self.builder_required:
asyncio.ensure_future(self.watch_build_pods())
if self.federation_members:
self.watch_federation()

if run_loop:
tornado.ioloop.IOLoop.current().start()

Expand Down
15 changes: 10 additions & 5 deletions binderhub/binderspawner_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
Helpers for creating BinderSpawners

FIXME:
This file is defined in helm-chart/binderhub/values.yaml and is copied to
binderhub/binderspawner_mixin.py by setup.py so that it can be used by other JupyterHub
container spawners.
This file is defined in binderhub/binderspawner_mixin.py
and is copied to helm-chart/binderhub/values.yaml
by ci/check_embedded_chart_code.py

The BinderHub repo is just used as the distribution mechanism for this spawner, BinderHub
itself doesn't require this code.
The BinderHub repo is just used as the distribution mechanism for this spawner,
BinderHub itself doesn't require this code.

Longer term options include:
- Move BinderSpawnerMixin to a separate Python package and include it in the Z2JH Hub
Expand Down Expand Up @@ -87,6 +87,11 @@ def get_args(self):
return args

def start(self):
annotations = self.extra_annotations
if "repo_url" in self.user_options:
annotations["binder.hub.jupyter.org/repo_url"]: self.user_options["repo_url"]
if "binder_ref_url" in self.user_options:
annotations["binder.hub.jupyter.org/ref_url"]: self.user_options["binder_ref_url"]
if not self.auth_enabled:
if 'token' not in self.user_options:
raise web.HTTPError(400, "token required")
Expand Down
12 changes: 5 additions & 7 deletions binderhub/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,13 +551,11 @@ async def launch(self, provider):
return

for pod in pods:
for container in pod["spec"]["containers"]:
# is the container running the same image as us?
# if so, count one for the current repo.
image = container["image"].rsplit(":", 1)[0]
if image == image_no_tag:
matching_pods += 1
break
if pod["metadata"]["annotations"].get("binder.hub.jupyter.org/repo_url") == self.repo_url:
matching_pods += 1
for federation_id, fed_info in self.settings["federation_status"].items():
repo_counts = fed_info.get("repo_counts", {})
matching_pods += repo_counts.get(self.repo_url, 0)

if repo_quota and matching_pods >= repo_quota:
LAUNCH_COUNT.labels(
Expand Down
18 changes: 18 additions & 0 deletions binderhub/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,28 @@ async def check_pod_quota(self):

quota = self.settings["pod_quota"]
total_pods = n_user_pods + n_build_pods
repo_counts = {}
for pod in user_pods:
# old way
repo_url = pod["metadata"]["annotations"].get(
"binder.hub.jupyter.org/repo_url", None
)
# old way, get it from container env
if not repo_url:
for container in pod["spec"]["containers"]:
for env in container["env"]:
if env["name"] == "BINDER_REPO_URL":
repo_url = env.get("value")
break
if repo_url:
repo_counts.setdefault(repo_url, 0)
repo_counts[repo_url] += 1

usage = {
"total_pods": total_pods,
"build_pods": n_build_pods,
"user_pods": n_user_pods,
"repo_counts": repo_counts,
"quota": quota,
"ok": total_pods <= quota if quota is not None else True,
}
Expand Down
31 changes: 21 additions & 10 deletions ci/check_embedded_chart_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
import difflib
import os
import sys
import yaml
from ruamel.yaml import YAML

yaml = YAML(typ="rt")
yaml.preserve_quotes = True
yaml.indent(mapping=2, sequence=4, offset=2)

parser = argparse.ArgumentParser(description='Check embedded chart code')
parser.add_argument('--update', action='store_true', help='Update binderhub code from values.yaml')
Expand All @@ -21,21 +25,28 @@
binderspawner_mixin_py = os.path.join(root, 'binderhub', 'binderspawner_mixin.py')
values_yaml = os.path.join(root, 'helm-chart', 'binderhub', 'values.yaml')

with open(values_yaml) as f:
values = yaml.safe_load(f)
values_code = values['jupyterhub']['hub']['extraConfig']['0-binderspawnermixin'].splitlines()
with open(binderspawner_mixin_py, 'r') as f:
py_code = f.read()


if args.update:
with open(binderspawner_mixin_py, 'w') as f:
f.write(values['jupyterhub']['hub']['extraConfig']['0-binderspawnermixin'])
with open(values_yaml) as f:
values = yaml.load(f)
values_code = values['jupyterhub']['hub']['extraConfig']['0-binderspawnermixin']
if values_code != py_code:
print(f"Generating {values_yaml} from {binderspawner_mixin_py}")
values['jupyterhub']['hub']['extraConfig']['0-binderspawnermixin'] = py_code
with open(values_yaml, "w") as f:
yaml.dump(values, f)
else:
with open(binderspawner_mixin_py, 'r') as f:
py_code = f.read().splitlines()
with open(values_yaml) as f:
values = yaml.load(f)
values_code = values['jupyterhub']['hub']['extraConfig']['0-binderspawnermixin']

difflines = list(difflib.context_diff(values_code, py_code))
difflines = list(difflib.context_diff(values_code.splitlines(), py_code.splitlines()))
if difflines:
print('\n'.join(difflines))
print('\n')
print('Values code is not in sync with binderhub/binderspawner_mixin.py')
print('Run `python {} --update` to update binderhub/binderspawner_mixin.py from values.yaml'.format(sys.argv[0]))
print('Run `python {} --update` to update values.yaml from binderhub/binderspawner_mixin.py'.format(sys.argv[0]))
sys.exit(1)
4 changes: 0 additions & 4 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ pycurl
pytest
pytest-asyncio
pytest-cov
# pyyaml is used by the scripts in tools/ and could absolutely be replaced by
# using ruamel.yaml. ruamel.yaml is more powerful than pyyaml, and that is
# relevant when writing YAML files back after having loaded them from disk.
pyyaml
jupyter-repo2docker>=2021.08.0
requests
ruamel.yaml>=0.15
6 changes: 4 additions & 2 deletions helm-chart/binderhub/files/binderhub_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import os
from functools import lru_cache
from urllib.parse import urlparse
import yaml
from ruamel.yaml import YAML

yaml = YAML(typ="safe")


def _merge_dictionaries(a, b):
Expand Down Expand Up @@ -34,7 +36,7 @@ def _load_values():
if os.path.exists(path):
print(f"Loading {path}")
with open(path) as f:
values = yaml.safe_load(f)
values = yaml.load(f)
cfg = _merge_dictionaries(cfg, values)
else:
print(f"No config at {path}")
Expand Down
15 changes: 10 additions & 5 deletions helm-chart/binderhub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ jupyterhub:
Helpers for creating BinderSpawners

FIXME:
This file is defined in helm-chart/binderhub/values.yaml and is copied to
binderhub/binderspawner_mixin.py by setup.py so that it can be used by other JupyterHub
container spawners.
This file is defined in binderhub/binderspawner_mixin.py
and is copied to helm-chart/binderhub/values.yaml
by ci/check_embedded_chart_code.py

The BinderHub repo is just used as the distribution mechanism for this spawner, BinderHub
itself doesn't require this code.
The BinderHub repo is just used as the distribution mechanism for this spawner,
BinderHub itself doesn't require this code.

Longer term options include:
- Move BinderSpawnerMixin to a separate Python package and include it in the Z2JH Hub
Expand Down Expand Up @@ -153,6 +153,11 @@ jupyterhub:
return args

def start(self):
annotations = self.extra_annotations
if "repo_url" in self.user_options:
annotations["binder.hub.jupyter.org/repo_url"]: self.user_options["repo_url"]
if "binder_ref_url" in self.user_options:
annotations["binder.hub.jupyter.org/ref_url"]: self.user_options["binder_ref_url"]
if not self.auth_enabled:
if 'token' not in self.user_options:
raise web.HTTPError(400, "token required")
Expand Down
5 changes: 3 additions & 2 deletions tools/generate-json-schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@

from collections.abc import MutableMapping

import yaml
from ruamel.yaml import YAML
yaml = YAML(typ="safe")

here_dir = os.path.abspath(os.path.dirname(__file__))
schema_yaml = os.path.join(
Expand Down Expand Up @@ -50,7 +51,7 @@ def run():
# Using these sets, we can validate further manually by printing the results
# of set operations.
with open(schema_yaml) as f:
schema = yaml.safe_load(f)
schema = yaml.load(f)

# Drop what isn't relevant for a values.schema.json file packaged with the
# Helm chart, such as the description keys only relevant for our
Expand Down
14 changes: 8 additions & 6 deletions tools/validate-against-schema.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#!/usr/bin/env python3
import jsonschema
import os
import yaml

import jsonschema
from ruamel.yaml import YAML
yaml = YAML(typ="safe")

here_dir = os.path.abspath(os.path.dirname(__file__))
schema_yaml = os.path.join(
Expand All @@ -18,13 +20,13 @@
)

with open(schema_yaml) as f:
schema = yaml.safe_load(f)
schema = yaml.load(f)
with open(values_yaml) as f:
values = yaml.safe_load(f)
values = yaml.load(f)
with open(lint_and_validate_values_yaml) as f:
lint_and_validate_values = yaml.safe_load(f)
lint_and_validate_values = yaml.load(f)
with open(binderhub_chart_config_yaml) as f:
binderhub_chart_config_yaml = yaml.safe_load(f)
binderhub_chart_config_yaml = yaml.load(f)

# Validate values.yaml against schema
print("Validating values.yaml against schema.yaml...")
Expand Down