Skip to content

Commit

Permalink
fixup! improve regex efficiency
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentporte committed Sep 30, 2024
1 parent a0a4cb1 commit f54d5a1
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 20 deletions.
14 changes: 8 additions & 6 deletions lacommunaute/utils/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@


def wrap_iframe_in_div_tag(text):
# iframe tags must be wrapped in a div tag to be displayed correctly
# add div tag if not present
"""
given a markdown text, wrap all iframe tags in a div tag
this is required for iframes to be displayed correctly
"""

iframe_regex = r"((<div>)?<iframe[^>]*>.*?<\/iframe>(<\/div>)?)"
iframe_regex = r"((<div>)?(<iframe[^>]*><\/iframe>)(<\/div>)?)"

for match, starts_with, ends_with in re.findall(iframe_regex, text, re.DOTALL):
if not starts_with and not ends_with:
text = text.replace(match, f"<div>{match}</div>")
for match, starts_with, iframe, ends_with in re.findall(iframe_regex, text):

Check failure

Code scanning / CodeQL

Polynomial regular expression used on uncontrolled data High

This
regular expression
that depends on a
user-provided value
may run slow on strings starting with '<iframe' and with many repetitions of '<iframe'.
This
regular expression
that depends on a
user-provided value
may run slow on strings starting with '<iframe' and with many repetitions of '<iframe'.
if not (starts_with and ends_with):
text = text.replace(match, f"<div>{iframe}</div>")

return text
37 changes: 23 additions & 14 deletions lacommunaute/utils/tests/tests_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,17 +711,26 @@ def test_the_last_sunday(self, day, expected_sunday):


class TestWrapIframeInDiv:
def test_wrap_iframe_in_div_tag(self):
inputs = [
"<iframe src='xxx'></iframe>",
"<div><iframe src='yyy'></iframe></div>",
"<div><iframe src='zzz'></iframe>",
"<iframe src='www'></iframe></div>",
]
outputs = [
"<div><iframe src='xxx'></iframe></div>",
"<div><iframe src='yyy'></iframe></div>",
"<div><iframe src='zzz'></iframe>",
"<iframe src='www'></iframe></div>",
]
assert wrap_iframe_in_div_tag(" ".join(inputs)) == " ".join(outputs)
@pytest.mark.parametrize(
"input,output",
[
("<iframe src='xxx'></iframe>", "<div><iframe src='xxx'></iframe></div>"),
(
"markdown text <iframe src='xxx'></iframe> markdown text",
"markdown text <div><iframe src='xxx'></iframe></div> markdown text",
),
("<div><iframe src='xxx'></iframe></div>", "<div><iframe src='xxx'></iframe></div>"),
("<div><iframe src='xxx'></iframe> text", "<div><iframe src='xxx'></iframe></div> text"),
("<iframe src='xxx'></iframe></div>", "<div><iframe src='xxx'></iframe></div>"),
(
"<iframe src='xxx'></iframe><iframe src='yyy'></iframe>",
"<div><iframe src='xxx'></iframe></div><div><iframe src='yyy'></iframe></div>",
),
(
"<div><iframe src='xxx'></iframe><iframe src='yyy'></iframe></div>",
"<div><iframe src='xxx'></iframe></div><div><iframe src='yyy'></iframe></div>",
),
],
)
def test_wrap_iframe_in_div_tag(self, input, output):
assert wrap_iframe_in_div_tag(input) == output

0 comments on commit f54d5a1

Please sign in to comment.