From 034766743f0a00a6df880c9dd550aced69d5f3e2 Mon Sep 17 00:00:00 2001 From: Greg Kempe Date: Wed, 17 Jan 2024 16:25:45 +0200 Subject: [PATCH 1/2] cross-origin popups --- peachjam/resolver.py | 114 ++++++++++++++++++ peachjam/settings.py | 6 + .../templates/peachjam/document_popup.html | 14 ++- peachjam/views/documents.py | 113 +---------------- peachjam/views/widgets.py | 92 ++++++++++++-- pyproject.toml | 1 + 6 files changed, 220 insertions(+), 120 deletions(-) create mode 100644 peachjam/resolver.py diff --git a/peachjam/resolver.py b/peachjam/resolver.py new file mode 100644 index 000000000..f1906ea13 --- /dev/null +++ b/peachjam/resolver.py @@ -0,0 +1,114 @@ +from django.conf import settings + + +class RedirectResolver: + RESOLVER_MAPPINGS = { + "africanlii": { + "country_code": "aa", + "domain": "africanlii.org", + }, + "eswatinilii": { + "country_code": "sz", + "domain": "eswatinilii.org", + }, + "ghalii": { + "country_code": "gh", + "domain": "ghalii.org", + }, + "lawlibrary": { + "country_code": "za", + "domain": "lawlibrary.org.za", + }, + "leslii": { + "country_code": "ls", + "domain": "lesotholii.org", + }, + "malawilii": { + "country_code": "mw", + "domain": "malawilii.org", + }, + "mauritiuslii": { + "country_code": "mu", + "domain": "mauritiuslii.org", + }, + "namiblii": { + "country_code": "na", + "domain": "namiblii.org", + }, + "nigerialii": { + "country_code": "ng", + "domain": "nigerialii.org", + }, + "open by-laws": { + "place_code": [], + "domain": "openbylaws.org.za", + }, + "rwandalii": { + "country_code": "rw", + "domain": "rwandalii.org", + }, + "seylii": { + "country_code": "sc", + "domain": "seylii.org", + }, + "sierralii": { + "country_code": "sl", + "domain": "sierralii.org", + }, + "tanzlii": { + "country_code": "tz", + "domain": "tanzlii.org", + }, + "tcilii": { + "country_code": "tc", + "domain": "tcilii.org", + }, + "ulii": { + "country_code": "ug", + "domain": "ulii.org", + }, + "zambialii": { + "country_code": "zm", + "domain": "zambialii.org", + }, + "zanzibarlii": { + "place_code": "tz-znz", + "domain": "zanzibarlii.org", + }, + "zimlii": { + "country_code": "zw", + "domain": "zimlii.org", + }, + } + + def __init__(self, app_name): + self.current_authority = self.RESOLVER_MAPPINGS[app_name.lower()] + + def get_domain_for_frbr_uri(self, parsed_frbr_uri): + best_domain = self.get_best_domain(parsed_frbr_uri) + if best_domain != self.current_authority["domain"]: + return best_domain + return None + + def get_url_for_frbr_uri(self, parsed_frbr_uri, raw_frbr_uri): + domain = self.get_domain_for_frbr_uri(parsed_frbr_uri) + if domain: + return f"https://{domain}{raw_frbr_uri}" + + def get_best_domain(self, parsed_uri): + country_code = parsed_uri.country + place_code = parsed_uri.place + + if country_code != place_code: + for key, mapping in self.RESOLVER_MAPPINGS.items(): + if mapping.get("place_code") == place_code: + return mapping.get("domain") + + # if no domain matching with place code is found use country code + for key, mapping in self.RESOLVER_MAPPINGS.items(): + if mapping.get("country_code") == country_code: + return mapping.get("domain") + return None + + +resolver = RedirectResolver(settings.PEACHJAM["APP_NAME"]) diff --git a/peachjam/settings.py b/peachjam/settings.py index 5e8ebaafe..56608962c 100644 --- a/peachjam/settings.py +++ b/peachjam/settings.py @@ -77,6 +77,7 @@ "drf_spectacular", "django_advanced_password_validation", "martor", + "corsheaders", ] MIDDLEWARE = [ @@ -87,6 +88,7 @@ "whitenoise.middleware.WhiteNoiseMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", "django.middleware.locale.LocaleMiddleware", + "corsheaders.middleware.CorsMiddleware", "django.middleware.common.CommonMiddleware", "django.middleware.csrf.CsrfViewMiddleware", "django.contrib.auth.middleware.AuthenticationMiddleware", @@ -607,3 +609,7 @@ def before_send(event, hint): } # disable the normal martor theme which pulls in another bootstrap version MARTOR_ALTERNATIVE_CSS_FILE_THEME = "martor/css/peachjam.css" + +# CORS +# disable regex matches, we do matching using signals +CORS_URLS_REGEX = r"^$" diff --git a/peachjam/templates/peachjam/document_popup.html b/peachjam/templates/peachjam/document_popup.html index 14f0d179a..f8a7b8f9c 100644 --- a/peachjam/templates/peachjam/document_popup.html +++ b/peachjam/templates/peachjam/document_popup.html @@ -7,7 +7,7 @@ {% endblock %} {% else %} -
+
{% block title %} {{ document.title }} @@ -19,13 +19,21 @@ {% endblock %}
{% block citation %} - {% if document.citation %} + {% if document.citation and document.citation != document.title %}
{{ document.citation }}
{% endif %} {% endblock %} + {% block date %}
{{ document.date }}
{% endblock %}
- {% block date %}
{{ document.date }}
{% endblock %} + {% block offsite_request %} + {% if offsite_request %} + + {% endif %} + {% endblock %} {% endif %}
diff --git a/peachjam/views/documents.py b/peachjam/views/documents.py index 535a038a6..64a3a4560 100644 --- a/peachjam/views/documents.py +++ b/peachjam/views/documents.py @@ -1,5 +1,4 @@ from cobalt import FrbrUri -from django.conf import settings from django.http import Http404, HttpResponse from django.shortcuts import get_object_or_404, redirect, reverse from django.utils.decorators import method_decorator @@ -9,111 +8,7 @@ from peachjam.helpers import add_slash, add_slash_to_frbr_uri from peachjam.models import CoreDocument from peachjam.registry import registry - - -class RedirectResolver: - RESOLVER_MAPPINGS = { - "africanlii": { - "country_code": "aa", - "domain": "africanlii.org", - }, - "eswatinilii": { - "country_code": "sz", - "domain": "eswatinilii.org", - }, - "ghalii": { - "country_code": "gh", - "domain": "ghalii.org", - }, - "lawlibrary": { - "country_code": "za", - "domain": "lawlibrary.org.za", - }, - "leslii": { - "country_code": "ls", - "domain": "lesotholii.org", - }, - "malawilii": { - "country_code": "mw", - "domain": "malawilii.org", - }, - "mauritiuslii": { - "country_code": "mu", - "domain": "mauritiuslii.org", - }, - "namiblii": { - "country_code": "na", - "domain": "namiblii.org", - }, - "nigerialii": { - "country_code": "ng", - "domain": "nigerialii.org", - }, - "open by-laws": { - "place_code": [], - "domain": "openbylaws.org.za", - }, - "rwandalii": { - "country_code": "rw", - "domain": "rwandalii.org", - }, - "seylii": { - "country_code": "sc", - "domain": "seylii.org", - }, - "sierralii": { - "country_code": "sl", - "domain": "sierralii.org", - }, - "tanzlii": { - "country_code": "tz", - "domain": "tanzlii.org", - }, - "tcilii": { - "country_code": "tc", - "domain": "tcilii.org", - }, - "ulii": { - "country_code": "ug", - "domain": "ulii.org", - }, - "zambialii": { - "country_code": "zm", - "domain": "zambialii.org", - }, - "zanzibarlii": { - "place_code": "tz-znz", - "domain": "zanzibarlii.org", - }, - "zimlii": { - "country_code": "zw", - "domain": "zimlii.org", - }, - } - - def __init__(self, app_name): - self.current_authority = self.RESOLVER_MAPPINGS[app_name.lower()] - - def get_domain_for_frbr_uri(self, parsed_uri): - best_domain = self.get_best_domain(parsed_uri) - if best_domain != self.current_authority["domain"]: - return best_domain - return None - - def get_best_domain(self, parsed_uri): - country_code = parsed_uri.country - place_code = parsed_uri.place - - if country_code != place_code: - for key, mapping in self.RESOLVER_MAPPINGS.items(): - if mapping.get("place_code") == place_code: - return mapping.get("domain") - - # if no domain matching with place code is found use country code - for key, mapping in self.RESOLVER_MAPPINGS.items(): - if mapping.get("country_code") == country_code: - return mapping.get("domain") - return None +from peachjam.resolver import resolver class DocumentDetailViewResolver(View): @@ -145,10 +40,8 @@ def dispatch(self, request, *args, **kwargs): ) if not obj: - resolver = RedirectResolver(settings.PEACHJAM["APP_NAME"]) - domain = resolver.get_domain_for_frbr_uri(parsed_frbr_uri) - if domain: - url = f"https://{domain}{frbr_uri}" + url = resolver.get_url_for_frbr_uri(parsed_frbr_uri, frbr_uri) + if url: return redirect(url) raise Http404() diff --git a/peachjam/views/widgets.py b/peachjam/views/widgets.py index c7f2d4cd1..1187a4f47 100644 --- a/peachjam/views/widgets.py +++ b/peachjam/views/widgets.py @@ -1,32 +1,87 @@ +import re +from urllib.parse import urlparse + import lxml.html from cobalt.uri import FrbrUri +from corsheaders.signals import check_request_enabled +from django.conf import settings from django.http import Http404 +from django.shortcuts import redirect from django.utils.translation import get_language from django.views.generic import DetailView from peachjam.helpers import add_slash, parse_utf8_html from peachjam.models import CoreDocument +from peachjam.resolver import RedirectResolver, resolver class DocumentPopupView(DetailView): - """Shows a popup with basic details for a document.""" + """Shows a popup with basic details for a document. + + An affiliate site may use this by redirecting a local popup to a popup on (this) LII website. + So we allow CORS requests, provided the origin matches the partner website. + + For example: + + 1. The user hovers over a link to /akn/xx/act/2009/1 on africanlii.org + 2. The browser asks africanlii.org for the popup, but it doesn't exist on africanlii.org + 3. So africanlii.org uses peachjam's resolver logic to identify that xxlii.org is responsible for /akn/xx/... + and redirects the user's browser to xxlii.org/p/africanlii.org/e/popup/akn/xx/act/2009/1 + 4. This view loads on xxlii.org and shows the popup, because the request came from africanlii.org + which matches the partner code in the URL + """ model = CoreDocument context_object_name = "document" template_name = "peachjam/document_popup.html" + partner_domains = [x["domain"] for x in RedirectResolver.RESOLVER_MAPPINGS.values()] + localhost = ["localhost", "127.0.0.1"] + frbr_uri = None + + def get(self, request, partner, frbr_uri, *args, **kwargs): + # check partner matches requesting host + if not self.valid_partner(request, partner): + raise Http404() + + try: + self.object = self.get_object() + except Http404: + if self.frbr_uri: + # use the resolver to send a redirect if it's probably off-site somewhere + domain = resolver.get_domain_for_frbr_uri(self.frbr_uri) + if domain: + return redirect(f"https://{domain}{self.request.path}") + raise + + context = self.get_context_data(object=self.object) + return self.render_to_response(context) + + def valid_partner(self, request, partner): + # only allow this page to be embedded from valid partners + # first, the partner must match the referer (or origin, for CORS requests) + referrer = request.META.get("HTTP_REFERER") or request.META.get("HTTP_ORIGIN") + if referrer and not settings.DEBUG: + try: + parsed = urlparse(referrer) + if parsed.hostname != partner and parsed.hostname not in self.localhost: + return False + except ValueError: + return False + # second, the partner must be in the list of valid partners + return partner in self.partner_domains or partner in self.localhost def get_object(self, *args, **kwargs): try: - frbr_uri = FrbrUri.parse(add_slash(self.kwargs["frbr_uri"])) + self.frbr_uri = FrbrUri.parse(add_slash(self.kwargs["frbr_uri"])) except ValueError: raise Http404() - self.portion = frbr_uri.portion - frbr_uri.portion = None - if frbr_uri.expression_date: - uri = frbr_uri.expression_uri() + self.portion = self.frbr_uri.portion + self.frbr_uri.portion = None + if self.frbr_uri.expression_date: + uri = self.frbr_uri.expression_uri() else: - uri = frbr_uri.work_uri() + uri = self.frbr_uri.work_uri() obj = self.model.objects.best_for_frbr_uri(uri, get_language())[0] if not obj: @@ -51,4 +106,27 @@ def get_context_data(self, **kwargs): except ValueError: raise Http404() + # is this a CORS request from off-site? (the partner host is not the same as the local host) + context["offsite_request"] = self.request.get_host() != self.kwargs["partner"] + return context + + +url_re = re.compile("^/p/([^/]+)/e/.*") + + +def check_cors_and_partner(sender, request, **kwargs): + """Check if we should mark this request as CORS-enabled. We do so if it's popup URL and + the origin matches the partner domain.""" + match = url_re.match(request.path_info) + if match: + # allow a CORS request if the partner portion of the URL matches the origin + origin = request.META.get("HTTP_ORIGIN") + if origin: + try: + return urlparse(origin).hostname == match.group(1) + except ValueError: + return False + + +check_request_enabled.connect(check_cors_and_partner) diff --git a/pyproject.toml b/pyproject.toml index f039ec1a0..1f5738797 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ "django-background-tasks>=1.2.5", "django-ckeditor>=6.4.2", "django-compressor>=3.1", + "django-cors-headers>=4.3.1", "django-countries-plus>=1.3.2", "django-debug-toolbar>=3.2.4,<4.2.0", "django-elasticsearch-debug-toolbar>=3.0.2", From d6b59602a772dcfe37baedb01cb2e98806d74628 Mon Sep 17 00:00:00 2001 From: Greg Kempe Date: Sat, 20 Jan 2024 17:26:00 +0200 Subject: [PATCH 2/2] handle resolver without current authority; tests --- peachjam/resolver.py | 4 ++-- peachjam/tests/test_resolver.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/peachjam/resolver.py b/peachjam/resolver.py index f1906ea13..c92900812 100644 --- a/peachjam/resolver.py +++ b/peachjam/resolver.py @@ -82,11 +82,11 @@ class RedirectResolver: } def __init__(self, app_name): - self.current_authority = self.RESOLVER_MAPPINGS[app_name.lower()] + self.current_authority = self.RESOLVER_MAPPINGS.get(app_name.lower()) def get_domain_for_frbr_uri(self, parsed_frbr_uri): best_domain = self.get_best_domain(parsed_frbr_uri) - if best_domain != self.current_authority["domain"]: + if self.current_authority and best_domain != self.current_authority["domain"]: return best_domain return None diff --git a/peachjam/tests/test_resolver.py b/peachjam/tests/test_resolver.py index d170ed195..4796b4880 100644 --- a/peachjam/tests/test_resolver.py +++ b/peachjam/tests/test_resolver.py @@ -1,7 +1,7 @@ from cobalt import FrbrUri from django.test import TestCase -from peachjam.views.documents import RedirectResolver +from peachjam.resolver import RedirectResolver urls = [ "/akn/zm/judgment/zmsc/2021/7/eng@2021-01-19",