From 26eda4eb55916db049a5873ca82510c1fc8e4201 Mon Sep 17 00:00:00 2001 From: Simon Cross Date: Sun, 25 Aug 2024 12:55:13 +0200 Subject: [PATCH 1/3] Fix HTMLParser error handling which referenced Python's html.HTMLParseError which was never raised and removed in Python 3.5. --- genshi/input.py | 10 +++++++--- genshi/tests/test_input.py | 11 +++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/genshi/input.py b/genshi/input.py index fa18c38..4277057 100644 --- a/genshi/input.py +++ b/genshi/input.py @@ -346,9 +346,13 @@ def _generate(): for tag in open_tags: yield END, QName(tag), pos break - except html.HTMLParseError as e: - msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset) - raise ParseError(msg, self.filename, e.lineno, e.offset) + except Exception as e: + # Python simple HTMLParser does not raise detailed + # errors except in strict mode which was deprecated + # in Python 3.3 and removed in Python 3.5 and which in + # any case is not used is this code. + msg = str(e) + raise ParseError(msg, self.filename) return Stream(_generate()).filter(_coalesce) def __iter__(self): diff --git a/genshi/tests/test_input.py b/genshi/tests/test_input.py index 44b7442..28840d0 100644 --- a/genshi/tests/test_input.py +++ b/genshi/tests/test_input.py @@ -294,6 +294,17 @@ def test_convert_ElementTree_to_markup_stream(self): self.assertEqual((Stream.END, QName("span")), events[4][:2]) self.assertEqual((Stream.END, QName("div")), events[5][:2]) + def test_parsing_error(self): + text = u'
'.encode('utf-8') + events = HTMLParser(BytesIO(text)) + self.assertRaisesRegex( + ParseError, + r"source returned bytes, but no encoding specified", + list, + events, + ) + + def suite(): suite = unittest.TestSuite() suite.addTest(doctest_suite(XMLParser.__module__)) From 0993bf649fe1053abe94d495a7c501921345721b Mon Sep 17 00:00:00 2001 From: Simon Cross Date: Sun, 25 Aug 2024 13:03:09 +0200 Subject: [PATCH 2/3] Don't use assertRaisesRegex on Python 2. --- genshi/tests/test_input.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/genshi/tests/test_input.py b/genshi/tests/test_input.py index 28840d0..e68515d 100644 --- a/genshi/tests/test_input.py +++ b/genshi/tests/test_input.py @@ -15,7 +15,7 @@ from genshi.core import Attrs, QName, Stream from genshi.input import XMLParser, HTMLParser, ParseError, ET -from genshi.compat import StringIO, BytesIO +from genshi.compat import IS_PYTHON2, StringIO, BytesIO from genshi.tests.utils import doctest_suite from xml.etree import ElementTree @@ -297,12 +297,15 @@ def test_convert_ElementTree_to_markup_stream(self): def test_parsing_error(self): text = u'
'.encode('utf-8') events = HTMLParser(BytesIO(text)) - self.assertRaisesRegex( - ParseError, - r"source returned bytes, but no encoding specified", - list, - events, - ) + if IS_PYTHON2: + self.assertRaises(ParseError, list, events) + else: + self.assertRaisesRegex( + ParseError, + r"source returned bytes, but no encoding specified", + list, + events, + ) def suite(): From 8a32fa1212532afbb74ac7cfeb9471c31aca17df Mon Sep 17 00:00:00 2001 From: Simon Cross Date: Sun, 25 Aug 2024 13:03:30 +0200 Subject: [PATCH 3/3] Fix typo in HTMLParser error handling comment. --- genshi/input.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genshi/input.py b/genshi/input.py index 4277057..c21990a 100644 --- a/genshi/input.py +++ b/genshi/input.py @@ -347,7 +347,7 @@ def _generate(): yield END, QName(tag), pos break except Exception as e: - # Python simple HTMLParser does not raise detailed + # Python's simple HTMLParser does not raise detailed # errors except in strict mode which was deprecated # in Python 3.3 and removed in Python 3.5 and which in # any case is not used is this code.