Skip to content

Commit

Permalink
feat: render html meta tags in Django, with video metadata for entity…
Browse files Browse the repository at this point in the history
… pages (#2037)

* feat: render html meta tags in Django to add video metadata in entity pages

* fix nginx config: use rewrite instead of proxy_pass uri

* fix test_api_preview

* fix rewrite rule in nginx configuration

* clean up redundant or conflicting security headers between django and nginx
  • Loading branch information
amatissart authored Dec 12, 2024
1 parent 59f57e7 commit fea1fbb
Show file tree
Hide file tree
Showing 15 changed files with 182 additions and 38 deletions.
11 changes: 8 additions & 3 deletions backend/settings/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent

# On deployed environments, Django needs to access the index.html template
# built separately by the frontend toolchain.
FRONTEND_STATIC_FILES_PATH = Path("/srv/tournesol-frontend")

load_dotenv()

server_settings = {}
Expand Down Expand Up @@ -71,6 +75,7 @@
MEDIA_ROOT = server_settings.get("MEDIA_ROOT", f"{base_folder}{MEDIA_URL}")

MAIN_URL = server_settings.get("MAIN_URL", "http://localhost:8000/")
TOURNESOL_MAIN_URL = server_settings.get("TOURNESOL_MAIN_URL", "http://localhost:3000/")

TOURNESOL_VERSION = server_settings.get("TOURNESOL_VERSION", "")

Expand All @@ -95,16 +100,15 @@
"drf_spectacular",
"rest_registration",
"vouch",
"ssr",
]

# Workaround for tests using TransactionTestCase with `serialized_rollback=True`
# (e.g tests running ml and depending on the default Poll defined by migrations)
# See bug https://code.djangoproject.com/ticket/30751
TEST_NON_SERIALIZED_APPS = ["django.contrib.contenttypes", "django.contrib.auth"]

REST_REGISTRATION_MAIN_URL = server_settings.get(
"REST_REGISTRATION_MAIN_URL", "http://localhost:3000/"
)
REST_REGISTRATION_MAIN_URL = TOURNESOL_MAIN_URL
REST_REGISTRATION = {
"REGISTER_VERIFICATION_ENABLED": True,
"REGISTER_VERIFICATION_URL": REST_REGISTRATION_MAIN_URL + "verify-user/",
Expand Down Expand Up @@ -170,6 +174,7 @@
"django.middleware.clickjacking.XFrameOptionsMiddleware",
"django_prometheus.middleware.PrometheusAfterMiddleware",
]
X_FRAME_OPTIONS = "SAMEORIGIN"

ROOT_URLCONF = "settings.urls"

Expand Down
1 change: 1 addition & 0 deletions backend/settings/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,5 @@
SpectacularSwaggerView.as_view(url_name="schema"),
name="swagger-ui",
),
path("ssr/", include("ssr.urls")),
]
Empty file added backend/ssr/__init__.py
Empty file.
6 changes: 6 additions & 0 deletions backend/ssr/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class SsrConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "ssr"
3 changes: 3 additions & 0 deletions backend/ssr/templates/opengraph/meta_tags.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{% for key, value in meta_tags.items %}
<meta property="{{ key }}" content="{{ value }}">
{% endfor %}
46 changes: 46 additions & 0 deletions backend/ssr/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from unittest.mock import patch

from django.test import Client, TestCase

from tournesol.tests.factories.entity import VideoFactory


def mock_get_static_index_html():
return """
<!DOCTYPE html>
<html lang="en">
<head>
<title>Tournesol</title>
<!--DJANGO_META_TAGS-->
</head>
<body>
<h1>Mocked html page</h1>
</body>
</html>
"""


@patch("ssr.views.get_static_index_html", new=mock_get_static_index_html)
class RenderedHtmlTestCase(TestCase):
def setUp(self):
self.client = Client()

def test_index_html_root(self):
response = self.client.get("/ssr/")
self.assertEqual(response.status_code, 200)
self.assertContains(response, '<meta property="og:title" content="Tournesol">')

def test_index_html_arbitrary_path(self):
response = self.client.get("/ssr/faq")
self.assertEqual(response.status_code, 200)
self.assertContains(response, '<meta property="og:title" content="Tournesol">')

def test_index_html_video_entity(self):
video = VideoFactory()

response = self.client.get(f"/ssr/entities/{video.uid}")
self.assertEqual(response.status_code, 200)
self.assertContains(
response, f'<meta property="og:title" content="{video.metadata["name"]}">'
)
self.assertContains(response, '<meta property="og:type" content="video">')
17 changes: 17 additions & 0 deletions backend/ssr/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from django.urls import path, re_path

from . import views


urlpatterns = [
path(
"entities/<str:uid>",
views.render_tournesol_html_with_dynamic_tags,
name="ssr_entities",
),
re_path(
r".*",
views.render_tournesol_html_with_dynamic_tags,
name="ssr_default",
),
]
80 changes: 80 additions & 0 deletions backend/ssr/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import typing as tp

import requests
from django.http import HttpResponse, HttpRequest
from django.conf import settings
from django.template.loader import render_to_string

from tournesol.models import Entity
from tournesol.models.entity import TYPE_VIDEO


def get_static_index_html() -> str:
if settings.DEBUG:
try:
# Try to get index.html from dev-env frontend container
resp = requests.get("http://tournesol-dev-front:3000")
except requests.ConnectionError:
resp = requests.get(settings.TOURNESOL_MAIN_URL)
resp.raise_for_status()
return resp.text
return (settings.FRONTEND_STATIC_FILES_PATH / "index.html").read_text()


def get_default_meta_tags(request: HttpRequest) -> dict[str, str]:
full_frontend_path = request.get_full_path().removeprefix("/ssr/")
return {
"og:site_name": "Tournesol",
"og:type": "website",
"og:title": "Tournesol",
"og:description": (
"Compare online content and contribute to the development of "
"responsible content recommendations."
),
"og:image": f"{settings.MAIN_URL}preview/{full_frontend_path}",
"og:url": f"{settings.TOURNESOL_MAIN_URL}{full_frontend_path}",
"twitter:card": "summary_large_image",
}


def get_entity_meta_tags(uid: str) -> dict[str, str]:
try:
entity: Entity = Entity.objects.get(uid=uid)
except Entity.DoesNotExist:
return {}

if entity.type != TYPE_VIDEO:
return {}

meta_tags = {
"og:type": "video",
"og:video:url": f"https://youtube.com/embed/{entity.video_id}",
"og:video:type": "text/html",
}

if video_title := entity.metadata.get("name"):
meta_tags["og:title"] = video_title

if video_channel_name := entity.metadata.get("uploader"):
meta_tags["og:description"] = video_channel_name

return meta_tags


def render_tournesol_html_with_dynamic_tags(request: HttpRequest, uid: tp.Optional[str] = None):
index_html = get_static_index_html()
meta_tags = get_default_meta_tags(request)
if uid is not None:
meta_tags |= get_entity_meta_tags(uid)

rendered_html = index_html.replace(
"<!--DJANGO_META_TAGS-->",
render_to_string(
"opengraph/meta_tags.html",
{
"meta_tags": meta_tags,
},
),
1,
)
return HttpResponse(rendered_html)
2 changes: 1 addition & 1 deletion backend/tournesol/models/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def link_to_tournesol(self):
return None

video_uri = urljoin(
settings.REST_REGISTRATION_MAIN_URL, f"entities/yt:{self.video_id}"
settings.TOURNESOL_MAIN_URL, f"entities/yt:{self.video_id}"
)
return format_html('<a href="{}" target="_blank">Play ▶</a>', video_uri)

Expand Down
17 changes: 9 additions & 8 deletions backend/tournesol/tests/test_api_preview.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest.mock import patch

import requests
from django.test import TestCase
from PIL import Image
from requests import Response
Expand All @@ -11,14 +12,14 @@
from tournesol.entities.video import TYPE_VIDEO
from tournesol.models import Entity, EntityPollRating

from .factories.entity import EntityFactory, VideoCriteriaScoreFactory, VideoFactory
from .factories.entity import VideoCriteriaScoreFactory, VideoFactory


def raise_(exception):
raise exception


def mock_yt_thumbnail_response(url, timeout=None) -> Response:
def mock_yt_thumbnail_response(self, url, timeout=None) -> Response:
resp = Response()
resp.status_code = 200
resp._content = Image.new("1", (1, 1)).tobitmap()
Expand Down Expand Up @@ -104,7 +105,7 @@ def setUp(self):
self.preview_url = "/preview/entities/"
self.valid_uid = "yt:sDPk-r18sb0"

@patch("requests.get", mock_yt_thumbnail_response)
@patch.object(requests.Session, "get", mock_yt_thumbnail_response)
@patch("tournesol.entities.video.VideoEntity.update_search_vector", lambda x: None)
def test_auth_200_get(self):
"""
Expand All @@ -131,7 +132,7 @@ def test_auth_200_get(self):
# check is not very robust.
self.assertNotIn("Content-Disposition", response.headers)

@patch("requests.get", mock_yt_thumbnail_response)
@patch.object(requests.Session, "get", mock_yt_thumbnail_response)
@patch("tournesol.entities.video.VideoEntity.update_search_vector", lambda x: None)
def test_anon_200_get_existing_entity(self):
"""
Expand Down Expand Up @@ -188,7 +189,7 @@ def test_get_preview_no_duration(self):
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEqual(response.headers["Content-Type"], "image/jpeg")

@patch("requests.get", lambda x, timeout=None: raise_(ConnectionError))
@patch.object(requests.Session, "get", lambda *args, **kwargs: raise_(ConnectionError))
@patch("tournesol.entities.video.VideoEntity.update_search_vector", lambda x: None)
def test_anon_200_get_with_yt_connection_error(self):
"""
Expand Down Expand Up @@ -231,7 +232,7 @@ def setUp(self):
self.valid_uid = "yt:sDPk-r18sb0"
self.valid_uid2 = "yt:VKsekCHBuHI"

@patch("requests.get", mock_yt_thumbnail_response)
@patch.object(requests.Session, "get", mock_yt_thumbnail_response)
@patch("tournesol.entities.video.VideoEntity.update_search_vector", lambda x: None)
def test_auth_200_get(self):
"""
Expand Down Expand Up @@ -279,7 +280,7 @@ def test_auth_200_get(self):
self.assertEqual(response.headers["Content-Type"], "image/jpeg")
self.assertNotIn("Content-Disposition", response.headers)

@patch("requests.get", mock_yt_thumbnail_response)
@patch.object(requests.Session, "get", mock_yt_thumbnail_response)
@patch("tournesol.entities.video.VideoEntity.update_search_vector", lambda x: None)
def test_anon_200_get_existing_entities(self):
"""
Expand Down Expand Up @@ -412,7 +413,7 @@ def test_anon_200_get_invalid_entity_type(self):
'inline; filename="tournesol_screenshot_og.png"',
)

@patch("requests.get", lambda x, timeout=None: raise_(ConnectionError))
@patch.object(requests.Session, "get", lambda *args, **kwargs: raise_(ConnectionError))
@patch("tournesol.entities.video.VideoEntity.update_search_vector", lambda x: None)
def test_anon_200_get_with_yt_connection_error(self):
"""
Expand Down
4 changes: 3 additions & 1 deletion backend/tournesol/views/previews/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@

YT_THUMBNAIL_MQ_SIZE = (320, 180)

session = requests.Session()


class BasePreviewAPIView(APIView):
"""
Expand Down Expand Up @@ -102,7 +104,7 @@ def get_yt_thumbnail(
# Quality can be: hq, mq, sd, or maxres (https://stackoverflow.com/a/34784842/188760)
url = f"https://img.youtube.com/vi/{entity.video_id}/{quality}default.jpg"
try:
thumbnail_response = requests.get(url, timeout=REQUEST_TIMEOUT)
thumbnail_response = session.get(url, timeout=REQUEST_TIMEOUT)
except (ConnectionError, Timeout) as exc:
logger.error("Preview failed for entity with UID %s.", entity.uid)
logger.error("Exception caught: %s", exc)
Expand Down
2 changes: 1 addition & 1 deletion backend/twitterbot/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,6 @@ def get_video_link(obj):
Return the Tournesol front end URI of the video, in the poll `videos`.
"""
video_uri = urljoin(
settings.REST_REGISTRATION_MAIN_URL, f"entities/yt:{obj.video.video_id}"
settings.TOURNESOL_MAIN_URL, f"entities/yt:{obj.video.video_id}"
)
return format_html('<a href="{}" target="_blank">Play ▶</a>', video_uri)
11 changes: 1 addition & 10 deletions frontend/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,7 @@
name="description"
content="Tournesol is an open source platform which aims to collaboratively identify top videos of public interest by eliciting contributors' judgements on content quality. We hope to contribute to making today's and tomorrow's large-scale algorithms robustly beneficial for all of humanity."
/>
<meta property="og:type" content="website">
<meta property="og:site_name" content="Tournesol">
<meta property="og:title" content="Tournesol">
<meta property="og:description" content="Compare online content and contribute to the development of responsible content recommendations.">
<!--
og:image and og:url will be built dynamically by Nginx, based on the request URL
-->
<meta property="og:image" content="__META_OG_IMAGE__">
<meta property="og:url" content="__META_OG_URL__">
<meta property="twitter:card" content="summary_large_image">
<!--DJANGO_META_TAGS-->
<link rel="apple-touch-icon" href="/icons/maskable-icon-512x512.png" />
<!--
manifest.json provides metadata used when your web app is installed on a
Expand Down
5 changes: 3 additions & 2 deletions infra/ansible/roles/django/templates/settings.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ ALLOWED_HOSTS:
- 127.0.0.1
- 0.0.0.0
- {{api_domain_name}}
- {{domain_name}} # django also serves HTML with dynamic meta tags in "ssr" app

CORS_ALLOWED_ORIGINS:
- "{{frontend_scheme}}://{{domain_name}}"
Expand Down Expand Up @@ -33,9 +34,9 @@ OIDC_ISS_ENDPOINT: {{django_main_url}}
OIDC_RSA_PRIVATE_KEY: |
{{django_oidc_rsa_private_key | b64decode | indent(2)}}

{% if django_email_backend == "smtp" %}
REST_REGISTRATION_MAIN_URL: "{{frontend_scheme}}://{{domain_name}}/"
TOURNESOL_MAIN_URL: "{{frontend_scheme}}://{{domain_name}}/"

{% if django_email_backend == "smtp" %}
EMAIL_BACKEND: 'django.core.mail.backends.smtp.EmailBackend'
EMAIL_HOST: {{django_email_host}}
EMAIL_PORT: {{django_email_port}}
Expand Down
15 changes: 3 additions & 12 deletions infra/ansible/roles/django/templates/tournesol.j2
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,9 @@ server {
}

location @index {
root /srv/tournesol-frontend;
sub_filter '__META_OG_IMAGE__' '$scheme://{{api_domain_name}}/preview$request_uri';
sub_filter '__META_OG_URL__' '$scheme://$server_name$request_uri';
try_files /index.html =404;
# The HTML content is served by Django to inject dynamic meta tags
rewrite ^ /ssr$uri break;
include /etc/nginx/snippets/gunicorn_proxy.conf;
}

{% if letsencrypt_email is defined %}
Expand All @@ -61,10 +60,6 @@ server {
ssl_certificate_key /etc/letsencrypt/live/{{domain_name}}/privkey.pem;
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem;

add_header X-Frame-Options "SAMEORIGIN";
add_header X-XSS-Protection "1; mode=block";
add_header X-Content-Type-Options nosniff;

# HSTS set to 180 days (15552000 seconds)
# For information, according to Mozilla 1 year is acceptable, and 2 year
# is recommended.
Expand Down Expand Up @@ -146,10 +141,6 @@ server {
ssl_certificate_key /etc/letsencrypt/live/{{api_domain_name}}/privkey.pem;
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem;

add_header X-Frame-Options "SAMEORIGIN";
add_header X-XSS-Protection "1; mode=block";
add_header X-Content-Type-Options nosniff;

# HSTS set to 180 days (15552000 seconds)
# For information, according to Mozilla 1 year is acceptable, and 2 year
# is recommended.
Expand Down

0 comments on commit fea1fbb

Please sign in to comment.