diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index b2bde5a9b1d696..950290ada38b3b 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -3,6 +3,7 @@ import unittest import urllib.parse from test import support +from string import ascii_letters, digits RFC1808_BASE = "http://a/b/c/d;p?q#f" RFC2396_BASE = "http://a/b/c/d;p?q" @@ -1419,6 +1420,17 @@ def test_invalid_bracketed_hosts(self): self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip[suffix') self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip') self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[suffix') + # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + unreserved = ascii_letters + digits + "-" + "." + "_" + "~" + # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" + sub_delims = "!" + "$" + "&" + "'" + "(" + ")" + "*" + "+" + "," + ";" + "=" + ipvfuture_authorized_characters = unreserved + sub_delims + ":" + removed_characters = "\t\n\r" + for character in range(256): + character = chr(character) + if character in ipvfuture_authorized_characters or character in removed_characters: + continue + self.assertRaises(ValueError, urllib.parse.urlsplit, f'scheme://[v7.invalid{character}invalid]/') def test_splitting_bracketed_hosts(self): p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]:1234/path?query') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 67d9bbea0d3150..1ff5fa419ed421 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -460,7 +460,7 @@ def _check_bracketed_netloc(netloc): # https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/ def _check_bracketed_host(hostname): if hostname.startswith('v'): - if not re.match(r"\Av[a-fA-F0-9]+\..+\z", hostname): + if not re.match(r"\Av[a-fA-F0-9]+\.[\w\.~!$&*+,;=:'()-]+\z", hostname, flags=re.ASCII): raise ValueError(f"IPvFuture address is invalid") else: ip = ipaddress.ip_address(hostname) # Throws Value Error if not IPv6 or IPv4 diff --git a/Misc/NEWS.d/next/Library/2025-07-27-15-17-43.gh-issue-137146.4QsMAT.rst b/Misc/NEWS.d/next/Library/2025-07-27-15-17-43.gh-issue-137146.4QsMAT.rst new file mode 100644 index 00000000000000..14e057ec1efdb1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-27-15-17-43.gh-issue-137146.4QsMAT.rst @@ -0,0 +1 @@ +Fix overly permissive validation of IPvFuture hostnames in :func:`urllib.parse.urlparse`, in compliance with RFC 3986. Invalid characters are now correctly rejected.