Skip to content

Commit

Permalink
Markdown link fix (#8328)
Browse files Browse the repository at this point in the history
* Improve cleaning of markdown content

* Update unit test with new check
  • Loading branch information
SchrodingersGat authored Oct 22, 2024
1 parent ddea9fa commit cb0248d
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 13 deletions.
32 changes: 29 additions & 3 deletions src/backend/InvenTree/InvenTree/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from django.utils import timezone
from django.utils.translation import gettext_lazy as _

import bleach
import pytz
import regex
from bleach import clean
Expand Down Expand Up @@ -829,7 +830,6 @@ def clean_markdown(value: str):
This function will remove javascript and other potentially harmful content from the markdown string.
"""
import markdown
from markdownify.templatetags.markdownify import markdownify

try:
markdownify_settings = settings.MARKDOWNIFY['default']
Expand All @@ -848,8 +848,34 @@ def clean_markdown(value: str):
output_format='html',
)

# Clean the HTML content (for comparison). Ideally, this should be the same as the original content
clean_html = markdownify(value)
# Bleach settings
whitelist_tags = markdownify_settings.get(
'WHITELIST_TAGS', bleach.sanitizer.ALLOWED_TAGS
)
whitelist_attrs = markdownify_settings.get(
'WHITELIST_ATTRS', bleach.sanitizer.ALLOWED_ATTRIBUTES
)
whitelist_styles = markdownify_settings.get(
'WHITELIST_STYLES', bleach.css_sanitizer.ALLOWED_CSS_PROPERTIES
)
whitelist_protocols = markdownify_settings.get(
'WHITELIST_PROTOCOLS', bleach.sanitizer.ALLOWED_PROTOCOLS
)
strip = markdownify_settings.get('STRIP', True)

css_sanitizer = bleach.css_sanitizer.CSSSanitizer(
allowed_css_properties=whitelist_styles
)
cleaner = bleach.Cleaner(
tags=whitelist_tags,
attributes=whitelist_attrs,
css_sanitizer=css_sanitizer,
protocols=whitelist_protocols,
strip=strip,
)

# Clean the HTML content (for comparison). This must be the same as the original content
clean_html = cleaner.clean(html)

if html != clean_html:
raise ValidationError(_('Data contains prohibited markdown content'))
Expand Down
25 changes: 15 additions & 10 deletions src/backend/InvenTree/company/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def test_company_active(self):
def test_company_notes(self):
"""Test the markdown 'notes' field for the Company model."""
pk = Company.objects.first().pk
url = reverse('api-company-detail', kwargs={'pk': pk})

# Attempt to inject malicious markdown into the "notes" field
xss = [
Expand All @@ -166,16 +167,23 @@ def test_company_notes(self):
]

for note in xss:
response = self.patch(
reverse('api-company-detail', kwargs={'pk': pk}),
{'notes': note},
expected_code=400,
)
response = self.patch(url, {'notes': note}, expected_code=400)

self.assertIn(
'Data contains prohibited markdown content', str(response.data)
)

# Tests with disallowed tags
invalid_tags = [
'<iframe src="javascript:alert(123)"></iframe>',
'<canvas>A disallowed tag!</canvas>',
]

for note in invalid_tags:
response = self.patch(url, {'notes': note}, expected_code=400)

self.assertIn('Remove HTML tags from this value', str(response.data))

# The following markdown is safe, and should be accepted
good = [
'This is a **bold** statement',
Expand All @@ -184,14 +192,11 @@ def test_company_notes(self):
'This is an ![image](https://www.google.com/test.jpg)',
'This is a `code` block',
'This text has ~~strikethrough~~ formatting',
'This text has a raw link - https://www.google.com - and should still pass the test',
]

for note in good:
response = self.patch(
reverse('api-company-detail', kwargs={'pk': pk}),
{'notes': note},
expected_code=200,
)
response = self.patch(url, {'notes': note}, expected_code=200)

self.assertEqual(response.data['notes'], note)

Expand Down

0 comments on commit cb0248d

Please sign in to comment.