diff --git a/CHANGES.rst b/CHANGES.rst index 471d99626f0..73cb7409de0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -53,6 +53,10 @@ Bugs fixed * #11715: Apply ``tls_verify`` and ``tls_cacerts`` config to ``ImageDownloader``. Patch by Nick Touran. +* #11433: Added the ``linkcheck_allow_unauthorized`` configuration option. + Set this option to ``False`` to report HTTP 401 (unauthorized) server + responses as broken. + Patch by James Addison. Testing ------- diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index f2367a8e54c..5eff8eb5728 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -2915,6 +2915,18 @@ Options for the linkcheck builder .. versionadded:: 4.4 +.. confval:: linkcheck_allow_unauthorized + + When a webserver responds with an HTTP 401 (unauthorized) response, the + current default behaviour of Sphinx is to treat the link as "working". To + change that behaviour, set this option to ``False``. + + The default value for this option will be changed in Sphinx 8.0; from that + version onwards, HTTP 401 responses to checked hyperlinks will be treated + as "broken" by default. + + .. versionadded:: 7.3 + Options for the XML builder --------------------------- diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 460598ef24e..85351b948db 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -7,6 +7,7 @@ import re import socket import time +import warnings from html.parser import HTMLParser from os import path from queue import PriorityQueue, Queue @@ -18,6 +19,7 @@ from requests.exceptions import ConnectionError, HTTPError, SSLError, TooManyRedirects from sphinx.builders.dummy import DummyBuilder +from sphinx.deprecation import RemovedInSphinx80Warning from sphinx.locale import __ from sphinx.transforms.post_transforms import SphinxPostTransform from sphinx.util import encode_uri, logging, requests @@ -66,6 +68,15 @@ def init(self) -> None: # set a timeout for non-responding servers socket.setdefaulttimeout(5.0) + if not self.config.linkcheck_allow_unauthorized: + deprecation_msg = ( + "The default value for 'linkcheck_allow_unauthorized' will change " + "from `True` in Sphinx 7.3+ to `False`, meaning that HTTP 401 " + "unauthorized responses will be reported as broken by default. " + "See https://github.com/sphinx-doc/sphinx/issues/11433 for details." + ) + warnings.warn(deprecation_msg, RemovedInSphinx80Warning, stacklevel=1) + def finish(self) -> None: checker = HyperlinkAvailabilityChecker(self.config) logger.info('') @@ -283,6 +294,7 @@ def __init__(self, config: Config, self.allowed_redirects = config.linkcheck_allowed_redirects self.retries: int = config.linkcheck_retries self.rate_limit_timeout = config.linkcheck_rate_limit_timeout + self._allow_unauthorized = config.linkcheck_allow_unauthorized self.user_agent = config.user_agent self.tls_verify = config.tls_verify @@ -437,9 +449,30 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> tuple[str, str, int]: except HTTPError as err: error_message = str(err) - # Unauthorised: the reference probably exists + # Unauthorized: the client did not provide required credentials if status_code == 401: - return 'working', 'unauthorized', 0 + if self._allow_unauthorized: + deprecation_msg = ( + "\n---\n" + "The linkcheck builder encountered an HTTP 401 " + "(unauthorized) response, and will report it as " + "'working' in this version of Sphinx to maintain " + "backwards-compatibility." + "\n" + "This logic will change in Sphinx 8.0 which will " + "report the hyperlink as 'broken'." + "\n" + "To explicitly continue treating unauthorized " + "hyperlink responses as 'working', set the " + "'linkcheck_allow_unauthorized' config option to " + "``True``." + "\n" + "See sphinx-doc/sphinx#11433 for details." + "\n---" + ) + warnings.warn(deprecation_msg, RemovedInSphinx80Warning, stacklevel=1) + status = 'working' if self._allow_unauthorized else 'broken' + return status, 'unauthorized', 0 # Rate limiting; back-off if allowed, or report failure otherwise if status_code == 429: @@ -625,6 +658,7 @@ def setup(app: Sphinx) -> dict[str, Any]: app.add_config_value('linkcheck_anchors_ignore', ['^!'], '') app.add_config_value('linkcheck_anchors_ignore_for_url', (), '', (tuple, list)) app.add_config_value('linkcheck_rate_limit_timeout', 300.0, '') + app.add_config_value('linkcheck_allow_unauthorized', True, '') app.add_event('linkcheck-process-uri') diff --git a/tests/test_build_linkcheck.py b/tests/test_build_linkcheck.py index 2ba817579d4..28529d9b0cb 100644 --- a/tests/test_build_linkcheck.py +++ b/tests/test_build_linkcheck.py @@ -348,8 +348,12 @@ class CustomHandler(http.server.BaseHTTPRequestHandler): def authenticated(method): def method_if_authenticated(self): - if (expected_token is None - or self.headers["Authorization"] == f"Basic {expected_token}"): + if expected_token is None: + return method(self) + elif not self.headers["Authorization"]: + self.send_response(401, "Unauthorized") + self.end_headers() + elif self.headers["Authorization"] == f"Basic {expected_token}": return method(self) else: self.send_response(403, "Forbidden") @@ -392,6 +396,21 @@ def test_auth_header_uses_first_match(app): assert content["status"] == "working" +@pytest.mark.filterwarnings('ignore::sphinx.deprecation.RemovedInSphinx80Warning') +@pytest.mark.sphinx( + 'linkcheck', testroot='linkcheck-localserver', freshenv=True, + confoverrides={'linkcheck_allow_unauthorized': False}) +def test_unauthorized_broken(app): + with http_server(custom_handler(valid_credentials=("user1", "password"))): + app.build() + + with open(app.outdir / "output.json", encoding="utf-8") as fp: + content = json.load(fp) + + assert content["info"] == "unauthorized" + assert content["status"] == "broken" + + @pytest.mark.sphinx( 'linkcheck', testroot='linkcheck-localserver', freshenv=True, confoverrides={'linkcheck_auth': [(r'^$', ('user1', 'password'))]}) @@ -402,10 +421,9 @@ def test_auth_header_no_match(app): with open(app.outdir / "output.json", encoding="utf-8") as fp: content = json.load(fp) - # TODO: should this test's webserver return HTTP 401 here? - # https://github.com/sphinx-doc/sphinx/issues/11433 - assert content["info"] == "403 Client Error: Forbidden for url: http://localhost:7777/" - assert content["status"] == "broken" + # This link is considered working based on the default linkcheck_allow_unauthorized=true + assert content["info"] == "unauthorized" + assert content["status"] == "working" @pytest.mark.sphinx(