Skip to content

Commit

Permalink
fix: don't wrap HTML data with newlines when serializing for LC
Browse files Browse the repository at this point in the history
  • Loading branch information
kdmccormick committed Sep 24, 2024
1 parent 75d111a commit 639b45f
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 7 deletions.
81 changes: 81 additions & 0 deletions openedx/core/djangoapps/content_libraries/tests/test_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
import json
from gettext import GNUTranslations
from django.test import TestCase

from completion.test_utils import CompletionWaffleTestMixin
from django.db import connections, transaction
Expand All @@ -24,6 +25,7 @@
from openedx.core.djangoapps.dark_lang.models import DarkLangConfig
from openedx.core.djangoapps.xblock import api as xblock_api
from openedx.core.djangolib.testing.utils import skip_unless_lms, skip_unless_cms
from openedx.core.lib.xblock_serializer import api as serializer_api
from common.djangoapps.student.tests.factories import UserFactory


Expand Down Expand Up @@ -59,6 +61,85 @@ def setUp(self):
)


@skip_unless_cms
class ContentLibraryOlxTests(ContentLibraryContentTestMixin, TestCase):
"""
Basic tests of the Learning-Core-based XBlock serialization-deserializatin using XBlocks in a content library.
"""

def test_round_trip(self):
"""
Test that if we deserialized and serialize a block repeatedly, two things hold true:
1. Even if the OLX changes format, the inner content does not change format.
2. The OLX settles into a stable state after 1 round trip.
"""
# Let's use HTML, since it's easy, but the general concept here should apply to other blocks.
usage_key = library_api.create_library_block(self.library.key, "html", "roundtrip").usage_key

# The block's actual content (HTML in this case) has some extraneous spaces and newlines, as well as comment.
# We expect this to be preserved through the round-trips.
block_content = '''\
<div class="double quot attr">
<div class='single quot attr'>
<p> There is a space on either side of this sentence. </p>
<p>\tThere is a tab on either side of this sentence.\t<p>
<p>🙃There is an emoji on either side of this sentence.🙂</p>
<p>There is nothing on either side of this sentence.</p>
</div>
<p><![CDATA[ This is an inner CDATA 🤯 Technically illegal in HTML, but let's test it anyway. ?!&<>\t ]]&gt;</p>
<!-- This is a comment within the HTML. -->
</div>'''

# Outside the block content, our original OLX has some extraneous spaces and newlines, as well as a comment.
olx_1 = f'''\
<html
display_name="Round Trip Test HTML Block"
some_fake_attribute="some fake value"
><![CDATA[{block_content}]]><!--
I am an OLX comment.
--></html>
'''

# Here is what we expect the OLX to settle down to. Notable changes:
# * The url_name is added.
# * The fake field is gone.
# * The OLX comment is gone.
# * A trailing newline is added at the end of the export.
# DEVS: If you are purposefully tweaking the formatting of the xblock serializer, then it's fine to
# update the value of this variable, as long as:
# 1. the {block_content} remains unchanged, and
# 2. the canonical_olx remains stable through the 2nd round trip.
canonical_olx = f'''\
<html url_name="roundtrip" display_name="Round Trip Test HTML Block"><![CDATA[{block_content}]]></html>
'''

# Save the block to LC, and re-load it.
library_api.set_library_block_olx(usage_key, olx_1)
library_api.publish_changes(self.library.key)
block_saved_1 = xblock_api.load_block(usage_key, self.staff_user)

# Content should be preserved...
assert block_saved_1.data == block_content

# ...but the serialized OLX will have changed to match the 'canonical' OLX.
olx_2 = serializer_api.serialize_xblock_to_olx(block_saved_1).olx_str
assert olx_2 == canonical_olx

# Now, save that OLX back to LC, and re-load it again.
library_api.set_library_block_olx(usage_key, olx_2)
library_api.publish_changes(self.library.key)
block_saved_2 = xblock_api.load_block(usage_key, self.staff_user)

# Content should be preserved...
assert block_saved_2.data == block_content

# ...and this time, the OLX should have settled too.
olx_3 = serializer_api.serialize_xblock_to_olx(block_saved_2).olx_str
assert olx_3 == canonical_olx


class ContentLibraryRuntimeTests(ContentLibraryContentTestMixin):
"""
Basic tests of the Learning-Core-based XBlock runtime using XBlocks in a
Expand Down
11 changes: 5 additions & 6 deletions openedx/core/djangoapps/content_staging/tests/test_clipboard.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""
Tests for the clipboard functionality
"""
from textwrap import dedent
from xml.etree import ElementTree

from rest_framework.test import APIClient
Expand Down Expand Up @@ -155,11 +154,11 @@ def test_copy_html(self):
assert olx_response.get("Content-Type") == "application/vnd.openedx.xblock.v1.html+xml"
# For HTML, we really want to be sure that the OLX is serialized in this exact format (using CDATA), so we check
# the actual string directly rather than using assertXmlEqual():
assert olx_response.content.decode() == dedent("""
<html url_name="toyhtml" display_name="Text"><![CDATA[
<a href='/static/handouts/sample_handout.txt'>Sample</a>
]]></html>
""").lstrip()
assert olx_response.content.decode() == (
"<html url_name=\"toyhtml\" display_name=\"Text\"><![CDATA["
"<a href='/static/handouts/sample_handout.txt'>Sample</a>"
"]]></html>\n"
)

# Now if we GET the clipboard again, the GET response should exactly equal the last POST response:
assert client.get(CLIPBOARD_ENDPOINT).json() == response_data
Expand Down
2 changes: 1 addition & 1 deletion openedx/core/lib/xblock_serializer/block_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def _serialize_html_block(self, block) -> etree.Element:

# Escape any CDATA special chars
escaped_block_data = block.data.replace("]]>", "]]&gt;")
olx_node.text = etree.CDATA("\n" + escaped_block_data + "\n")
olx_node.text = etree.CDATA(escaped_block_data)
return olx_node


Expand Down

0 comments on commit 639b45f

Please sign in to comment.