Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into log-elapsed-time
Browse files Browse the repository at this point in the history
  • Loading branch information
hugovk committed Sep 17, 2024
2 parents f2d2ffb + e1b1138 commit d32c793
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 36 deletions.
129 changes: 94 additions & 35 deletions build_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,10 @@
from typing import Iterable
from urllib.parse import urljoin

import zc.lockfile
import jinja2
import requests
import tomlkit

import urllib3
import zc.lockfile

try:
from os import EX_OK, EX_SOFTWARE as EX_FAILURE
Expand Down Expand Up @@ -433,7 +432,8 @@ def build_robots_txt(
www_root: Path,
group,
skip_cache_invalidation,
):
http: urllib3.PoolManager,
) -> None:
"""Disallow crawl of EOL versions in robots.txt."""
if not www_root.exists():
logging.info("Skipping robots.txt generation (www root does not even exist).")
Expand All @@ -448,7 +448,7 @@ def build_robots_txt(
robots_file.chmod(0o775)
run(["chgrp", group, robots_file])
if not skip_cache_invalidation:
purge("robots.txt")
purge(http, "robots.txt")


def build_sitemap(
Expand Down Expand Up @@ -641,7 +641,7 @@ def full_build(self):
"""
return not self.quick and not self.language.html_only

def run(self) -> bool:
def run(self, http: urllib3.PoolManager) -> bool:
"""Build and publish a Python doc, for a language, and a version."""
start_time = perf_counter()
logging.info("Running.")
Expand All @@ -652,7 +652,7 @@ def run(self) -> bool:
if self.should_rebuild():
self.build_venv()
self.build()
self.copy_build_to_webroot()
self.copy_build_to_webroot(http)
self.save_state(build_duration=perf_counter() - start_time)
except Exception as err:
logging.exception("Badly handled exception, human, please help.")
Expand Down Expand Up @@ -798,7 +798,7 @@ def build_venv(self):
run([venv_path / "bin" / "python", "-m", "pip", "freeze", "--all"])
self.venv = venv_path

def copy_build_to_webroot(self):
def copy_build_to_webroot(self, http: urllib3.PoolManager) -> None:
"""Copy a given build to the appropriate webroot with appropriate rights."""
logging.info("Publishing start.")
start_time = perf_counter()
Expand Down Expand Up @@ -911,9 +911,9 @@ def copy_build_to_webroot(self):
prefixes = run(["find", "-L", targets_dir, "-samefile", target]).stdout
prefixes = prefixes.replace(targets_dir + "/", "")
prefixes = [prefix + "/" for prefix in prefixes.split("\n") if prefix]
purge(*prefixes)
purge(http, *prefixes)
for prefix in prefixes:
purge(*[prefix + p for p in changed])
purge(http, *[prefix + p for p in changed])
logging.info(
"Publishing done (%s).", format_seconds(perf_counter() - start_time)
)
Expand Down Expand Up @@ -981,7 +981,15 @@ def save_state(self, build_duration: float):
state_file.write_text(tomlkit.dumps(states), encoding="UTF-8")


def symlink(www_root: Path, language: Language, directory: str, name: str, group: str, skip_cache_invalidation: bool):
def symlink(
www_root: Path,
language: Language,
directory: str,
name: str,
group: str,
skip_cache_invalidation: bool,
http: urllib3.PoolManager,
) -> None:
"""Used by major_symlinks and dev_symlink to maintain symlinks."""
if language.tag == "en": # English is rooted on /, no /en/
path = www_root
Expand All @@ -998,12 +1006,17 @@ def symlink(www_root: Path, language: Language, directory: str, name: str, group
link.symlink_to(directory)
run(["chown", "-h", ":" + group, str(link)])
if not skip_cache_invalidation:
purge_path(www_root, link)
purge_path(http, www_root, link)


def major_symlinks(
www_root: Path, group, versions: Iterable[Version], languages: Iterable[Language], skip_cache_invalidation: bool
):
www_root: Path,
group: str,
versions: Iterable[Version],
languages: Iterable[Language],
skip_cache_invalidation: bool,
http: urllib3.PoolManager,
) -> None:
"""Maintains the /2/ and /3/ symlinks for each language.
Like:
Expand All @@ -1013,11 +1026,26 @@ def major_symlinks(
"""
current_stable = Version.current_stable(versions).name
for language in languages:
symlink(www_root, language, current_stable, "3", group, skip_cache_invalidation)
symlink(www_root, language, "2.7", "2", group, skip_cache_invalidation)
symlink(
www_root,
language,
current_stable,
"3",
group,
skip_cache_invalidation,
http,
)
symlink(www_root, language, "2.7", "2", group, skip_cache_invalidation, http)


def dev_symlink(www_root: Path, group, versions, languages, skip_cache_invalidation: bool):
def dev_symlink(
www_root: Path,
group,
versions,
languages,
skip_cache_invalidation: bool,
http: urllib3.PoolManager,
) -> None:
"""Maintains the /dev/ symlinks for each language.
Like:
Expand All @@ -1027,10 +1055,18 @@ def dev_symlink(www_root: Path, group, versions, languages, skip_cache_invalidat
"""
current_dev = Version.current_dev(versions).name
for language in languages:
symlink(www_root, language, current_dev, "dev", group, skip_cache_invalidation)
symlink(
www_root,
language,
current_dev,
"dev",
group,
skip_cache_invalidation,
http,
)


def purge(*paths):
def purge(http: urllib3.PoolManager, *paths: Path | str) -> None:
"""Remove one or many paths from docs.python.org's CDN.
To be used when a file changes, so the CDN fetches the new one.
Expand All @@ -1039,20 +1075,22 @@ def purge(*paths):
for path in paths:
url = urljoin(base, str(path))
logging.debug("Purging %s from CDN", url)
requests.request("PURGE", url, timeout=30)
http.request("PURGE", url, timeout=30)


def purge_path(www_root: Path, path: Path):
def purge_path(http: urllib3.PoolManager, www_root: Path, path: Path) -> None:
"""Recursively remove a path from docs.python.org's CDN.
To be used when a directory changes, so the CDN fetches the new one.
"""
purge(*[file.relative_to(www_root) for file in path.glob("**/*")])
purge(path.relative_to(www_root))
purge(str(path.relative_to(www_root)) + "/")
purge(http, *[file.relative_to(www_root) for file in path.glob("**/*")])
purge(http, path.relative_to(www_root))
purge(http, str(path.relative_to(www_root)) + "/")


def proofread_canonicals(www_root: Path, skip_cache_invalidation: bool) -> None:
def proofread_canonicals(
www_root: Path, skip_cache_invalidation: bool, http: urllib3.PoolManager
) -> None:
"""In www_root we check that all canonical links point to existing contents.
It can happen that a canonical is "broken":
Expand All @@ -1074,11 +1112,12 @@ def proofread_canonicals(www_root: Path, skip_cache_invalidation: bool) -> None:
html = html.replace(canonical.group(0), "")
file.write_text(html, encoding="UTF-8", errors="surrogateescape")
if not skip_cache_invalidation:
purge(str(file).replace("/srv/docs.python.org/", ""))
purge(http, str(file).replace("/srv/docs.python.org/", ""))


def parse_versions_from_devguide():
releases = requests.get(
def parse_versions_from_devguide(http: urllib3.PoolManager) -> list[Version]:
releases = http.request(
"GET",
"https://raw.githubusercontent.com/"
"python/devguide/main/include/release-cycle.json",
timeout=30,
Expand Down Expand Up @@ -1124,7 +1163,8 @@ def build_docs(args) -> bool:
"""Build all docs (each language and each version)."""
logging.info("Full build start.")
start_time = perf_counter()
versions = parse_versions_from_devguide()
http = urllib3.PoolManager()
versions = parse_versions_from_devguide(http)
languages = parse_languages_from_config()
todo = [
(version, language)
Expand All @@ -1137,7 +1177,6 @@ def build_docs(args) -> bool:
cpython_repo = Repository(
"https://github.com/python/cpython.git", args.build_root / "cpython"
)
cpython_repo.update()
while todo:
version, language = todo.pop()
logging.root.handlers[0].setFormatter(
Expand All @@ -1149,22 +1188,42 @@ def build_docs(args) -> bool:
scope = sentry_sdk.get_isolation_scope()
scope.set_tag("version", version.name)
scope.set_tag("language", language.tag)
cpython_repo.update()
builder = DocBuilder(
version, versions, language, languages, cpython_repo, **vars(args)
)
all_built_successfully &= builder.run()
all_built_successfully &= builder.run(http)
logging.root.handlers[0].setFormatter(
logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
)

build_sitemap(versions, languages, args.www_root, args.group)
build_404(args.www_root, args.group)
build_robots_txt(
versions, languages, args.www_root, args.group, args.skip_cache_invalidation
versions,
languages,
args.www_root,
args.group,
args.skip_cache_invalidation,
http,
)
major_symlinks(
args.www_root,
args.group,
versions,
languages,
args.skip_cache_invalidation,
http,
)
dev_symlink(
args.www_root,
args.group,
versions,
languages,
args.skip_cache_invalidation,
http,
)
major_symlinks(args.www_root, args.group, versions, languages, args.skip_cache_invalidation)
dev_symlink(args.www_root, args.group, versions, languages, args.skip_cache_invalidation)
proofread_canonicals(args.www_root, args.skip_cache_invalidation)
proofread_canonicals(args.www_root, args.skip_cache_invalidation, http)

logging.info("Full build done (%s).", format_seconds(perf_counter() - start_time))

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
jinja2
requests
sentry-sdk>=2
tomlkit
urllib3>=2
zc.lockfile

0 comments on commit d32c793

Please sign in to comment.