diff --git a/readthedocs/core/unresolver.py b/readthedocs/core/unresolver.py index aaaeffa0f26..e0386816cf6 100644 --- a/readthedocs/core/unresolver.py +++ b/readthedocs/core/unresolver.py @@ -208,7 +208,7 @@ def unresolve_url(self, url, append_indexhtml=True): parsed_url = urlparse(url) if parsed_url.scheme not in ["http", "https"]: raise InvalidSchemeError(parsed_url.scheme) - domain = self.get_domain_from_host(parsed_url.netloc) + domain = parsed_url.hostname unresolved_domain = self.unresolve_domain(domain) return self._unresolve( unresolved_domain=unresolved_domain, @@ -551,13 +551,23 @@ def unresolve_domain(self, domain): Unresolve domain by extracting relevant information from it. :param str domain: Domain to extract the information from. + It can be a full URL, in that case, only the domain is used. :returns: A UnresolvedDomain object. """ + parsed_domain = urlparse(domain) + if parsed_domain.scheme: + if parsed_domain.scheme not in ["http", "https"]: + raise InvalidSchemeError(parsed_domain.scheme) + domain = parsed_domain.hostname + public_domain = self.get_domain_from_host(settings.PUBLIC_DOMAIN) external_domain = self.get_domain_from_host( settings.RTD_EXTERNAL_VERSION_DOMAIN ) + if not domain: + raise InvalidSubdomainError(domain) + subdomain, *root_domain = domain.split(".", maxsplit=1) root_domain = root_domain[0] if root_domain else "" diff --git a/readthedocs/rtd_tests/tests/test_unresolver.py b/readthedocs/rtd_tests/tests/test_unresolver.py index 62c69f902e5..f74421976c2 100644 --- a/readthedocs/rtd_tests/tests/test_unresolver.py +++ b/readthedocs/rtd_tests/tests/test_unresolver.py @@ -11,11 +11,13 @@ InvalidExternalVersionError, InvalidPathForVersionedProjectError, InvalidSchemeError, + InvalidSubdomainError, SuspiciousHostnameError, TranslationNotFoundError, TranslationWithoutVersionError, VersionNotFoundError, unresolve, + unresolver, ) from readthedocs.projects.constants import SINGLE_VERSION_WITHOUT_TRANSLATIONS from readthedocs.projects.models import Domain @@ -372,8 +374,32 @@ def test_unresolve_invalid_scheme(self): "fttp://pip.readthedocs.io/en/latest/", "fttps://pip.readthedocs.io/en/latest/", "ssh://pip.readthedocs.io/en/latest/", + "javascript://pip.readthedocs.io/en/latest/", "://pip.readthedocs.io/en/latest/", ] for url in invalid_urls: with pytest.raises(InvalidSchemeError): unresolve(url) + + with pytest.raises(InvalidSubdomainError): + unresolve("https:///pip.readthedocs.io/en/latest/") + + def test_unresolve_domain_with_full_url(self): + result = unresolver.unresolve_domain("https://pip.readthedocs.io/en/latest/") + self.assertIsNone(result.domain) + self.assertEqual(result.project, self.pip) + self.assertTrue(result.is_from_public_domain) + self.assertEqual(result.source_domain, "pip.readthedocs.io") + + def test_unresolve_domain_with_full_url_invalid_protocol(self): + invalid_protocols = [ + "fttp", + "fttps", + "ssh", + "javascript", + ] + for protocol in invalid_protocols: + with pytest.raises(InvalidSchemeError): + unresolver.unresolve_domain( + f"{protocol}://pip.readthedocs.io/en/latest/" + )