diff --git a/python/pydantic_core/_pydantic_core.pyi b/python/pydantic_core/_pydantic_core.pyi index 2b06ee54c..d2ef07e3b 100644 --- a/python/pydantic_core/_pydantic_core.pyi +++ b/python/pydantic_core/_pydantic_core.pyi @@ -606,6 +606,18 @@ class Url(SupportsAllComparisons): An instance of URL """ + def join(self, path: str, append_trailing_slash: bool = False) -> Self: + """ + Parse a string `path` as an URL, using this URL as the base. + + Args: + path: The string (typically a relative URL) to parse and join with the base URL. + append_trailing_slash: Whether to append a trailing slash at the end of the URL. + + Returns: + A new `Url` instance + """ + class MultiHostUrl(SupportsAllComparisons): """ A URL type with support for multiple hosts, as used by some databases for DSNs, e.g. `https://foo.com,bar.com/path`. diff --git a/src/url.rs b/src/url.rs index 881347f25..4b3e66f58 100644 --- a/src/url.rs +++ b/src/url.rs @@ -155,6 +155,14 @@ impl PyUrl { (self.__str__(),) } + fn __truediv__(&self, other: &str) -> PyResult { + self.join(other, true) + } + + fn __floordiv__(&self, other: &str) -> PyResult { + self.join(other, false) + } + #[classmethod] #[pyo3(signature=(*, scheme, host, username=None, password=None, port=None, path=None, query=None, fragment=None))] #[allow(clippy::too_many_arguments)] @@ -190,6 +198,27 @@ impl PyUrl { } cls.call1((url,)) } + + #[pyo3(signature=(path, append_trailing_slash=false))] + pub fn join(&self, path: &str, append_trailing_slash: bool) -> PyResult { + let mut new_url = self + .lib_url + .join(path) + .map_err(|err| PyValueError::new_err(err.to_string()))?; + + if append_trailing_slash && !(new_url.query().is_some() || new_url.fragment().is_some()) { + let path_segments_result = new_url.path_segments_mut().map(|mut segments| { + segments.pop_if_empty().push(""); + }); + + if path_segments_result.is_err() { + let mut new_path = new_url.path().to_string(); + new_path.push('/'); + new_url.set_path(&new_path); + } + } + Ok(PyUrl::new(new_url)) + } } #[pyclass(name = "MultiHostUrl", module = "pydantic_core._pydantic_core", subclass, frozen)] diff --git a/tests/validators/test_url.py b/tests/validators/test_url.py index 59489dd00..1156c3e99 100644 --- a/tests/validators/test_url.py +++ b/tests/validators/test_url.py @@ -9,6 +9,10 @@ from ..conftest import Err, PyAndJson +SIMPLE_BASE = 'http://a/b/c/d' +QUERY_BASE = 'http://a/b/c/d;p?q' +QUERY_FRAGMENT_BASE = 'http://a/b/c/d;p?q#f' + def test_url_ok(py_and_json: PyAndJson): v = py_and_json(core_schema.url_schema()) @@ -1305,3 +1309,150 @@ def test_url_build() -> None: ) assert url == Url('postgresql://testuser:testpassword@127.0.0.1:5432/database?sslmode=require#test') assert str(url) == 'postgresql://testuser:testpassword@127.0.0.1:5432/database?sslmode=require#test' + + +@pytest.mark.parametrize( + 'base_url,join_path,expected_with_slash,expected_without_slash', + [ + # Tests are based on the URL specification from https://url.spec.whatwg.org/ + # Joining empty path with or without trailing slash should not affect the base url. + ('http://example.com/', '', 'http://example.com/', 'http://example.com/'), + ('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2/', 'svn://pathtorepo/dir2'), + ('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2/', 'svn+ssh://pathtorepo/dir2'), + ('ws://a/b', 'g', 'ws://a/g/', 'ws://a/g'), + ('wss://a/b', 'g', 'wss://a/g/', 'wss://a/g'), + ('http://a/b/c/de', ';x', 'http://a/b/c/;x/', 'http://a/b/c/;x'), + # Non-RFC-defined tests, covering variations of base and trailing + # slashes + ('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/', 'http://a/b/c/f/g/'), + ('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/', 'http://a/b/f/g/'), + ('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/', 'http://a/f/g/'), + ('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/', 'http://a/f/g/'), + ('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g/', 'http://a/b/c/f/g'), + ('http://a/b/', '../../f/g/', 'http://a/f/g/', 'http://a/f/g/'), + (SIMPLE_BASE, 'g:h', 'g:h/', 'g:h'), + (SIMPLE_BASE, 'g', 'http://a/b/c/g/', 'http://a/b/c/g'), + (SIMPLE_BASE, './g', 'http://a/b/c/g/', 'http://a/b/c/g'), + (SIMPLE_BASE, 'g/', 'http://a/b/c/g/', 'http://a/b/c/g/'), + (SIMPLE_BASE, '/g', 'http://a/g/', 'http://a/g'), + (SIMPLE_BASE, '//g', 'http://g/', 'http://g/'), + (SIMPLE_BASE, '?y', 'http://a/b/c/d?y', 'http://a/b/c/d?y'), + (SIMPLE_BASE, 'g?y', 'http://a/b/c/g?y', 'http://a/b/c/g?y'), + (SIMPLE_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x', 'http://a/b/c/g?y/./x'), + (SIMPLE_BASE, '.', 'http://a/b/c/', 'http://a/b/c/'), + (SIMPLE_BASE, './', 'http://a/b/c/', 'http://a/b/c/'), + (SIMPLE_BASE, '..', 'http://a/b/', 'http://a/b/'), + (SIMPLE_BASE, '../', 'http://a/b/', 'http://a/b/'), + (SIMPLE_BASE, '../g', 'http://a/b/g/', 'http://a/b/g'), + (SIMPLE_BASE, '../..', 'http://a/', 'http://a/'), + (SIMPLE_BASE, '../../g', 'http://a/g/', 'http://a/g'), + (SIMPLE_BASE, './../g', 'http://a/b/g/', 'http://a/b/g'), + (SIMPLE_BASE, './g/.', 'http://a/b/c/g/', 'http://a/b/c/g/'), + (SIMPLE_BASE, 'g/./h', 'http://a/b/c/g/h/', 'http://a/b/c/g/h'), + (SIMPLE_BASE, 'g/../h', 'http://a/b/c/h/', 'http://a/b/c/h'), + (SIMPLE_BASE, 'http:g', 'http://a/b/c/g/', 'http://a/b/c/g'), + (SIMPLE_BASE, 'http:g?y', 'http://a/b/c/g?y', 'http://a/b/c/g?y'), + (SIMPLE_BASE, 'http:g?y/./x', 'http://a/b/c/g?y/./x', 'http://a/b/c/g?y/./x'), + (SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo/', SIMPLE_BASE + '/foo'), + (QUERY_BASE, '?y', 'http://a/b/c/d;p?y', 'http://a/b/c/d;p?y'), + (QUERY_BASE, ';x', 'http://a/b/c/;x/', 'http://a/b/c/;x'), + (QUERY_BASE, 'g:h', 'g:h/', 'g:h'), + (QUERY_BASE, 'g', 'http://a/b/c/g/', 'http://a/b/c/g'), + (QUERY_BASE, './g', 'http://a/b/c/g/', 'http://a/b/c/g'), + (QUERY_BASE, 'g/', 'http://a/b/c/g/', 'http://a/b/c/g/'), + (QUERY_BASE, '/g', 'http://a/g/', 'http://a/g'), + (QUERY_BASE, '//g', 'http://g/', 'http://g/'), + (QUERY_BASE, '?y', 'http://a/b/c/d;p?y', 'http://a/b/c/d;p?y'), + (QUERY_BASE, 'g?y', 'http://a/b/c/g?y', 'http://a/b/c/g?y'), + (QUERY_BASE, '#s', 'http://a/b/c/d;p?q#s', 'http://a/b/c/d;p?q#s'), + (QUERY_BASE, 'g#s', 'http://a/b/c/g#s', 'http://a/b/c/g#s'), + (QUERY_BASE, 'g?y#s', 'http://a/b/c/g?y#s', 'http://a/b/c/g?y#s'), + (QUERY_BASE, ';x', 'http://a/b/c/;x/', 'http://a/b/c/;x'), + (QUERY_BASE, 'g;x', 'http://a/b/c/g;x/', 'http://a/b/c/g;x'), + (QUERY_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s', 'http://a/b/c/g;x?y#s'), + (QUERY_BASE, '', 'http://a/b/c/d;p?q', 'http://a/b/c/d;p?q'), + (QUERY_BASE, '.', 'http://a/b/c/', 'http://a/b/c/'), + (QUERY_BASE, './', 'http://a/b/c/', 'http://a/b/c/'), + (QUERY_BASE, '..', 'http://a/b/', 'http://a/b/'), + (QUERY_BASE, '../', 'http://a/b/', 'http://a/b/'), + (QUERY_BASE, '../g', 'http://a/b/g/', 'http://a/b/g'), + (QUERY_BASE, '../..', 'http://a/', 'http://a/'), + (QUERY_BASE, '../../', 'http://a/', 'http://a/'), + (QUERY_BASE, '../../g', 'http://a/g/', 'http://a/g'), + (QUERY_BASE, '../../../g', 'http://a/g/', 'http://a/g'), + # Abnormal Examples + (QUERY_BASE, '../../../g', 'http://a/g/', 'http://a/g'), + (QUERY_BASE, '../../../../g', 'http://a/g/', 'http://a/g'), + (QUERY_BASE, '/./g', 'http://a/g/', 'http://a/g'), + (QUERY_BASE, '/../g', 'http://a/g/', 'http://a/g'), + (QUERY_BASE, 'g.', 'http://a/b/c/g./', 'http://a/b/c/g.'), + (QUERY_BASE, '.g', 'http://a/b/c/.g/', 'http://a/b/c/.g'), + (QUERY_BASE, 'g..', 'http://a/b/c/g../', 'http://a/b/c/g..'), + (QUERY_BASE, '..g', 'http://a/b/c/..g/', 'http://a/b/c/..g'), + (QUERY_BASE, './../g', 'http://a/b/g/', 'http://a/b/g'), + (QUERY_BASE, './g/.', 'http://a/b/c/g/', 'http://a/b/c/g/'), + (QUERY_BASE, 'g/./h', 'http://a/b/c/g/h/', 'http://a/b/c/g/h'), + (QUERY_BASE, 'g/../h', 'http://a/b/c/h/', 'http://a/b/c/h'), + (QUERY_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y/', 'http://a/b/c/g;x=1/y'), + (QUERY_BASE, 'g;x=1/../y', 'http://a/b/c/y/', 'http://a/b/c/y'), + (QUERY_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x', 'http://a/b/c/g?y/./x'), + (QUERY_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x', 'http://a/b/c/g?y/../x'), + (QUERY_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x', 'http://a/b/c/g#s/./x'), + (QUERY_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x', 'http://a/b/c/g#s/../x'), + (QUERY_BASE, 'http:g', 'http://a/b/c/g/', 'http://a/b/c/g'), + # Test with empty (but defined) components. + (QUERY_FRAGMENT_BASE, '', 'http://a/b/c/d;p?q', 'http://a/b/c/d;p?q'), + (QUERY_FRAGMENT_BASE, '#', 'http://a/b/c/d;p?q#', 'http://a/b/c/d;p?q#'), + (QUERY_FRAGMENT_BASE, '#z', 'http://a/b/c/d;p?q#z', 'http://a/b/c/d;p?q#z'), + (QUERY_FRAGMENT_BASE, '?', 'http://a/b/c/d;p?', 'http://a/b/c/d;p?'), + (QUERY_FRAGMENT_BASE, '?#z', 'http://a/b/c/d;p?#z', 'http://a/b/c/d;p?#z'), + (QUERY_FRAGMENT_BASE, '?y', 'http://a/b/c/d;p?y', 'http://a/b/c/d;p?y'), + (QUERY_FRAGMENT_BASE, ';', 'http://a/b/c/;/', 'http://a/b/c/;'), + (QUERY_FRAGMENT_BASE, ';?y', 'http://a/b/c/;?y', 'http://a/b/c/;?y'), + (QUERY_FRAGMENT_BASE, ';#z', 'http://a/b/c/;#z', 'http://a/b/c/;#z'), + (QUERY_FRAGMENT_BASE, ';x', 'http://a/b/c/;x/', 'http://a/b/c/;x'), + (QUERY_FRAGMENT_BASE, '/w', 'http://a/w/', 'http://a/w'), + (QUERY_FRAGMENT_BASE, '//;x', 'http://;x/', 'http://;x/'), + (QUERY_FRAGMENT_BASE, '//v', 'http://v/', 'http://v/'), + # For backward compatibility with RFC1630, the scheme name is allowed + # to be present in a relative reference if it is the same as the base + # URI scheme. + (QUERY_FRAGMENT_BASE, 'http:', 'http://a/b/c/d;p?q', 'http://a/b/c/d;p?q'), + (QUERY_FRAGMENT_BASE, 'http:#', 'http://a/b/c/d;p?q#', 'http://a/b/c/d;p?q#'), + (QUERY_FRAGMENT_BASE, 'http:#z', 'http://a/b/c/d;p?q#z', 'http://a/b/c/d;p?q#z'), + (QUERY_FRAGMENT_BASE, 'http:?', 'http://a/b/c/d;p?', 'http://a/b/c/d;p?'), + (QUERY_FRAGMENT_BASE, 'http:?#z', 'http://a/b/c/d;p?#z', 'http://a/b/c/d;p?#z'), + (QUERY_FRAGMENT_BASE, 'http:?y', 'http://a/b/c/d;p?y', 'http://a/b/c/d;p?y'), + (QUERY_FRAGMENT_BASE, 'http:;', 'http://a/b/c/;/', 'http://a/b/c/;'), + (QUERY_FRAGMENT_BASE, 'http:;?y', 'http://a/b/c/;?y', 'http://a/b/c/;?y'), + (QUERY_FRAGMENT_BASE, 'http:;#z', 'http://a/b/c/;#z', 'http://a/b/c/;#z'), + (QUERY_FRAGMENT_BASE, 'http:;x', 'http://a/b/c/;x/', 'http://a/b/c/;x'), + (QUERY_FRAGMENT_BASE, 'http:/w', 'http://a/w/', 'http://a/w'), + (QUERY_FRAGMENT_BASE, 'http://;x', 'http://;x/', 'http://;x/'), + (QUERY_FRAGMENT_BASE, 'http:///w', 'http://w/', 'http://w/'), + (QUERY_FRAGMENT_BASE, 'http://v', 'http://v/', 'http://v/'), + # Different scheme is not ignored. + (QUERY_FRAGMENT_BASE, 'https:;', 'https://;/', 'https://;/'), + (QUERY_FRAGMENT_BASE, 'https:;x', 'https://;x/', 'https://;x/'), + ], +) +def test_url_join(base_url, join_path, expected_with_slash, expected_without_slash) -> None: + """Tests are based on + https://github.com/python/cpython/blob/3a0e7f57628466aedcaaf6c5ff7c8224f5155a2c/Lib/test/test_urlparse.py + and the URL specification from https://url.spec.whatwg.org/ + """ + url = Url(base_url) + assert str(url.join(join_path, append_trailing_slash=True)) == expected_with_slash + assert str(url.join(join_path, append_trailing_slash=False)) == expected_without_slash + + +def test_url_join_operators() -> None: + url = Url('http://a/b/c/d') + assert str(url / 'e' / 'f') == 'http://a/b/c/e/f/' + assert str(url / 'e' // 'f') == 'http://a/b/c/e/f' + assert str(url // 'e' // 'f') == 'http://a/b/c/f' + assert str(url / 'e' / '?x=1') == 'http://a/b/c/e/?x=1' + assert str(url / 'e' / '?x=1' / '#y') == 'http://a/b/c/e/?x=1#y' + assert str(url / 'e' / '?x=1' // '#y') == 'http://a/b/c/e/?x=1#y' + assert str(url / 'e' // '?x=1' / '#y') == 'http://a/b/c/e/?x=1#y' + assert str(url // 'e' / '?x=1' / '#y') == 'http://a/b/c/e?x=1#y'