From 9e1c72bbd9c00481f664d5720145497cd77243b1 Mon Sep 17 00:00:00 2001
From: Greg Kempe <greg@kempe.net>
Date: Thu, 29 Aug 2024 10:34:34 +0200
Subject: [PATCH] always rewrite images in document content

this ensures that all images, even in AKN HTML, have the correct URL
including the document's full expression FRBR URI.
---
 peachjam/views/generic_views.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/peachjam/views/generic_views.py b/peachjam/views/generic_views.py
index b2d586605..884d6db41 100644
--- a/peachjam/views/generic_views.py
+++ b/peachjam/views/generic_views.py
@@ -19,6 +19,7 @@
     Predicate,
     Relationship,
 )
+from peachjam.xmlutils import parse_html_str
 from peachjam_api.serializers import (
     CitationLinkSerializer,
     PredicateSerializer,
@@ -280,8 +281,7 @@ def get_context_data(self, **kwargs):
             context["display_type"] = (
                 "akn" if context["document"].content_html_is_akn else "html"
             )
-            if not context["document"].content_html_is_akn:
-                self.prefix_images(context["document"])
+            self.prefix_images(context["document"])
         elif hasattr(context["document"], "source_file"):
             context["display_type"] = "pdf"
         else:
@@ -408,14 +408,14 @@ def get_notices(self):
 
     def prefix_images(self, document):
         """Rewrite image URLs so that we can serve them correctly."""
-        root = html.fromstring(document.content_html)
+        root = parse_html_str(document.content_html)
 
         for img in root.xpath(".//img[@src]"):
             src = img.attrib["src"]
             if not src.startswith("/") and not src.startswith("data:"):
-                img.attrib["src"] = (
-                    document.expression_frbr_uri + "/media/" + img.attrib["src"]
-                )
+                if not src.startswith("media/"):
+                    src = "media/" + src
+                img.attrib["src"] = document.expression_frbr_uri + "/" + src
 
         document.content_html = html.tostring(root, encoding="unicode")