Skip to content

Commit 5354daf

Browse files
facelessuserwaylan
authored andcommitted
Fix an HTML comment parsing case that can cause an infinite loop
Fixes #1554
1 parent f39cf84 commit 5354daf

File tree

3 files changed

+42
-1
lines changed

3 files changed

+42
-1
lines changed

docs/changelog.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ and this project adheres to the
1010
[Python Version Specification](https://packaging.python.org/en/latest/specifications/version-specifiers/).
1111
See the [Contributing Guide](contributing.md) for details.
1212

13+
## [Unreleased]
14+
15+
### Fixed
16+
17+
* Fix an HTML comment parsing case in some Python versions that can cause an infinite loop (#1554).
18+
1319
## [3.9.0] - 2025-09-04
1420

1521
### Changed

markdown/htmlparser.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333
if TYPE_CHECKING: # pragma: no cover
3434
from markdown import Markdown
3535

36+
# Included for versions which do not have current comment fix
37+
commentclose = re.compile(r'--!?>')
38+
commentabruptclose = re.compile(r'-?>')
3639

3740
# Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it.
3841
# Users can still do `from html import parser` and get the default behavior.
@@ -302,6 +305,22 @@ def parse_pi(self, i: int) -> int:
302305
self.handle_data('<?')
303306
return i + 2
304307

308+
if not hasattr(htmlparser, 'commentabruptclose'):
309+
# Internal -- parse comment, return length or -1 if not terminated
310+
# see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
311+
def parse_comment(self, i, report=True):
312+
rawdata = self.rawdata
313+
assert rawdata.startswith('<!--', i), 'unexpected call to parse_comment()'
314+
match = commentclose.search(rawdata, i+4)
315+
if not match:
316+
match = commentabruptclose.match(rawdata, i+4)
317+
if not match:
318+
return -1
319+
if report:
320+
j = match.start()
321+
self.handle_comment(rawdata[i+4: j])
322+
return match.end()
323+
305324
def parse_html_declaration(self, i: int) -> int:
306325
if self.at_line_start() or self.intail:
307326
if self.rawdata[i:i+3] == '<![' and not self.rawdata[i:i+9] == '<![CDATA[':

tests/test_syntax/blocks/test_html_blocks.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1018,7 +1018,7 @@ def test_comment_in_code_block(self):
10181018
# Note: This is a change in behavior. Previously, Python-Markdown interpreted this in the same manner
10191019
# as browsers and all text after the opening comment tag was considered to be in a comment. However,
10201020
# that did not match the reference implementation. The new behavior does.
1021-
def test_unclosed_comment_(self):
1021+
def test_unclosed_comment(self):
10221022
self.assertMarkdownRenders(
10231023
self.dedent(
10241024
"""
@@ -1035,6 +1035,22 @@ def test_unclosed_comment_(self):
10351035
)
10361036
)
10371037

1038+
def test_invalid_comment_end(self):
1039+
self.assertMarkdownRenders(
1040+
self.dedent(
1041+
"""
1042+
<!-- This comment is malformed and never closes -- >
1043+
Some content after the bad comment.
1044+
"""
1045+
),
1046+
self.dedent(
1047+
"""
1048+
<p>&lt;!-- This comment is malformed and never closes -- &gt;
1049+
Some content after the bad comment.</p>
1050+
"""
1051+
)
1052+
)
1053+
10381054
def test_raw_processing_instruction_one_line(self):
10391055
self.assertMarkdownRenders(
10401056
"<?php echo '>'; ?>",

0 commit comments

Comments
 (0)