Skip to content

Commit

Permalink
Merge pull request #2055 from rebeccacremona/export-images
Browse files Browse the repository at this point in the history
Fix export of newly-uploaded images
  • Loading branch information
rebeccacremona authored Aug 1, 2024
2 parents eaeab81 + d962c45 commit 4da4c5e
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 0 deletions.
4 changes: 4 additions & 0 deletions web/frontend/libs/tinymce_extensions.js
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,10 @@ export function getInitConfig(selector, enhanced, code) {
paste_remove_styles: true,
paste_remove_styles_if_webkit: true,
paste_strip_class_attributes: "all",
// image URLs
relative_urls: false,
convert_urls: false,
remove_script_host : false,
media_dimensions: false,
extended_valid_elements: extend_valid_elements,
setup: (editor) => {
Expand Down
33 changes: 33 additions & 0 deletions web/main/test/test_export.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from io import BytesIO
import itertools
from pathlib import Path
from zipfile import ZipFile

Expand Down Expand Up @@ -366,3 +367,35 @@ def test_annotated_export_invalid_clamped(annotations_factory):
resource = annotations_factory("LegalDocument", input)[1]
resource.annotations.update(global_end_offset=1000) # move end offset past end of text
assert annotated_content_for_export(resource) == expected


def test_disallowed_images_stripped(rf, text_block_factory, resource_factory):
request = rf.get("/spoof-export-request")

disallowed_srcs = ["/etc/hosts", "../../images/foo", "http://example.com"]

allowed_srcs = [
f"http://{request.get_host()}/foo",
f"https://{request.get_host()}/foo",
]

text = ""
for src in itertools.chain(disallowed_srcs, allowed_srcs):
text = text + f'<img src="{src}">'

text_block = text_block_factory(content=text)
resource = resource_factory(resource=text_block, resource_type="TextBlock")

# Establish that all images are present in the unaltered HTML
unaltered_html = resource.export(False, None, file_type="html")
for src in itertools.chain(disallowed_srcs, allowed_srcs):
assert src in unaltered_html

# Provide a spoofed `request` object which is a required argument for proper export of rich text
# https://github.com/harvard-lil/h2o/blob/dd67276720fe3a7af7e110da958448399a92399f/web/main/utils.py#L282
# Then, establish that only allowed image sources are present.
html = resource.export(False, None, file_type="html", export_options={"request": request})
for src in disallowed_srcs:
assert src not in html
for src in allowed_srcs:
assert src in html

0 comments on commit 4da4c5e

Please sign in to comment.