File tree Expand file tree Collapse file tree 3 files changed +42
-1
lines changed Expand file tree Collapse file tree 3 files changed +42
-1
lines changed Original file line number Diff line number Diff line change @@ -10,6 +10,12 @@ and this project adheres to the
10
10
[ Python Version Specification] ( https://packaging.python.org/en/latest/specifications/version-specifiers/ ) .
11
11
See the [ Contributing Guide] ( contributing.md ) for details.
12
12
13
+ ## [ Unreleased]
14
+
15
+ ### Fixed
16
+
17
+ * Fix an HTML comment parsing case in some Python versions that can cause an infinite loop (#1554 ).
18
+
13
19
## [ 3.9.0] - 2025-09-04
14
20
15
21
### Changed
Original file line number Diff line number Diff line change 33
33
if TYPE_CHECKING : # pragma: no cover
34
34
from markdown import Markdown
35
35
36
+ # Included for versions which do not have current comment fix
37
+ commentclose = re .compile (r'--!?>' )
38
+ commentabruptclose = re .compile (r'-?>' )
36
39
37
40
# Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it.
38
41
# Users can still do `from html import parser` and get the default behavior.
@@ -302,6 +305,22 @@ def parse_pi(self, i: int) -> int:
302
305
self .handle_data ('<?' )
303
306
return i + 2
304
307
308
+ if not hasattr (htmlparser , 'commentabruptclose' ):
309
+ # Internal -- parse comment, return length or -1 if not terminated
310
+ # see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
311
+ def parse_comment (self , i , report = True ):
312
+ rawdata = self .rawdata
313
+ assert rawdata .startswith ('<!--' , i ), 'unexpected call to parse_comment()'
314
+ match = commentclose .search (rawdata , i + 4 )
315
+ if not match :
316
+ match = commentabruptclose .match (rawdata , i + 4 )
317
+ if not match :
318
+ return - 1
319
+ if report :
320
+ j = match .start ()
321
+ self .handle_comment (rawdata [i + 4 : j ])
322
+ return match .end ()
323
+
305
324
def parse_html_declaration (self , i : int ) -> int :
306
325
if self .at_line_start () or self .intail :
307
326
if self .rawdata [i :i + 3 ] == '<![' and not self .rawdata [i :i + 9 ] == '<![CDATA[' :
Original file line number Diff line number Diff line change @@ -1018,7 +1018,7 @@ def test_comment_in_code_block(self):
1018
1018
# Note: This is a change in behavior. Previously, Python-Markdown interpreted this in the same manner
1019
1019
# as browsers and all text after the opening comment tag was considered to be in a comment. However,
1020
1020
# that did not match the reference implementation. The new behavior does.
1021
- def test_unclosed_comment_ (self ):
1021
+ def test_unclosed_comment (self ):
1022
1022
self .assertMarkdownRenders (
1023
1023
self .dedent (
1024
1024
"""
@@ -1035,6 +1035,22 @@ def test_unclosed_comment_(self):
1035
1035
)
1036
1036
)
1037
1037
1038
+ def test_invalid_comment_end (self ):
1039
+ self .assertMarkdownRenders (
1040
+ self .dedent (
1041
+ """
1042
+ <!-- This comment is malformed and never closes -- >
1043
+ Some content after the bad comment.
1044
+ """
1045
+ ),
1046
+ self .dedent (
1047
+ """
1048
+ <p><!-- This comment is malformed and never closes -- >
1049
+ Some content after the bad comment.</p>
1050
+ """
1051
+ )
1052
+ )
1053
+
1038
1054
def test_raw_processing_instruction_one_line (self ):
1039
1055
self .assertMarkdownRenders (
1040
1056
"<?php echo '>'; ?>" ,
You can’t perform that action at this time.
0 commit comments