@@ -109,12 +109,13 @@ def get_events(self):
109109
110110class  TestCaseBase (unittest .TestCase ):
111111
112-     def  get_collector (self ):
113-         return  EventCollector (convert_charrefs = False )
112+     def  get_collector (self ,  convert_charrefs = False ):
113+         return  EventCollector (convert_charrefs = convert_charrefs )
114114
115-     def  _run_check (self , source , expected_events , collector = None ):
115+     def  _run_check (self , source , expected_events ,
116+                    * , collector = None , convert_charrefs = False ):
116117        if  collector  is  None :
117-             collector  =  self .get_collector ()
118+             collector  =  self .get_collector (convert_charrefs = convert_charrefs )
118119        parser  =  collector 
119120        for  s  in  source :
120121            parser .feed (s )
@@ -128,7 +129,7 @@ def _run_check(self, source, expected_events, collector=None):
128129
129130    def  _run_check_extra (self , source , events ):
130131        self ._run_check (source , events ,
131-                          EventCollectorExtra (convert_charrefs = False ))
132+             collector = EventCollectorExtra (convert_charrefs = False ))
132133
133134
134135class  HTMLParserTestCase (TestCaseBase ):
@@ -187,10 +188,75 @@ def test_malformatted_charref(self):
187188        ])
188189
189190    def  test_unclosed_entityref (self ):
190-         self ._run_check ("&entityref foo" , [
191-             ("entityref" , "entityref" ),
192-             ("data" , " foo" ),
193-             ])
191+         self ._run_check ('> z' , [('entityref' , 'gt' ), ('data' , ' z' )],
192+                         convert_charrefs = False )
193+         self ._run_check ('> z' , [('data' , '> z' )], convert_charrefs = True )
194+ 
195+         self ._run_check ('&undefined z' ,
196+                         [('entityref' , 'undefined' ), ('data' , ' z' )],
197+                         convert_charrefs = False )
198+         self ._run_check ('&undefined z' , [('data' , '&undefined z' )],
199+                         convert_charrefs = True )
200+ 
201+         self ._run_check ('& z' , [('data' , '& z' )], convert_charrefs = False )
202+         self ._run_check ('& z' , [('data' , '& z' )], convert_charrefs = True )
203+ 
204+     def  test_eof_in_entityref (self ):
205+         self ._run_check ('>' , [('entityref' , 'gt' )], convert_charrefs = False )
206+         self ._run_check ('>' , [('data' , '>' )], convert_charrefs = True )
207+ 
208+         self ._run_check ('&g' , [('entityref' , 'g' )], convert_charrefs = False )
209+         self ._run_check ('&g' , [('data' , '&g' )], convert_charrefs = True )
210+ 
211+         self ._run_check ('&undefined' , [('entityref' , 'undefined' )],
212+                         convert_charrefs = False )
213+         self ._run_check ('&undefined' , [('data' , '&undefined' )],
214+                         convert_charrefs = True )
215+ 
216+         self ._run_check ('&' , [('data' , '&' )], convert_charrefs = False )
217+         self ._run_check ('&' , [('data' , '&' )], convert_charrefs = True )
218+ 
219+     def  test_unclosed_charref (self ):
220+         self ._run_check ('{ z' , [('charref' , '123' ), ('data' , ' z' )],
221+                         convert_charrefs = False )
222+         self ._run_check ('{ z' , [('data' , '{ z' )], convert_charrefs = True )
223+         self ._run_check ('« z' , [('charref' , 'xab' ), ('data' , ' z' )],
224+                         convert_charrefs = False )
225+         self ._run_check ('« z' , [('data' , '\xab  z' )], convert_charrefs = True )
226+ 
227+         self ._run_check ('� z' ,
228+                         [('charref' , '123456789' ), ('data' , ' z' )],
229+                         convert_charrefs = False )
230+         self ._run_check ('� z' , [('data' , '\ufffd  z' )],
231+                         convert_charrefs = True )
232+         self ._run_check ('� z' ,
233+                         [('charref' , 'x123456789' ), ('data' , ' z' )],
234+                         convert_charrefs = False )
235+         self ._run_check ('� z' , [('data' , '\ufffd  z' )],
236+                         convert_charrefs = True )
237+ 
238+         self ._run_check ('&# z' , [('data' , '&# z' )], convert_charrefs = False )
239+         self ._run_check ('&# z' , [('data' , '&# z' )], convert_charrefs = True )
240+         self ._run_check ('&#x z' , [('data' , '&#x z' )], convert_charrefs = False )
241+         self ._run_check ('&#x z' , [('data' , '&#x z' )], convert_charrefs = True )
242+ 
243+     def  test_eof_in_charref (self ):
244+         self ._run_check ('{' , [('charref' , '123' )], convert_charrefs = False )
245+         self ._run_check ('{' , [('data' , '{' )], convert_charrefs = True )
246+         self ._run_check ('«' , [('charref' , 'xab' )], convert_charrefs = False )
247+         self ._run_check ('«' , [('data' , '\xab ' )], convert_charrefs = True )
248+ 
249+         self ._run_check ('�' , [('charref' , '123456789' )],
250+                         convert_charrefs = False )
251+         self ._run_check ('�' , [('data' , '\ufffd ' )], convert_charrefs = True )
252+         self ._run_check ('�' , [('charref' , 'x123456789' )],
253+                         convert_charrefs = False )
254+         self ._run_check ('�' , [('data' , '\ufffd ' )], convert_charrefs = True )
255+ 
256+         self ._run_check ('&#' , [('data' , '&#' )], convert_charrefs = False )
257+         self ._run_check ('&#' , [('data' , '&#' )], convert_charrefs = True )
258+         self ._run_check ('&#x' , [('data' , '&#x' )], convert_charrefs = False )
259+         self ._run_check ('&#x' , [('data' , '&#x' )], convert_charrefs = True )
194260
195261    def  test_bad_nesting (self ):
196262        # Strangely, this *is* supposed to test that overlapping 
@@ -762,20 +828,6 @@ def test_correct_detection_of_start_tags(self):
762828        ]
763829        self ._run_check (html , expected )
764830
765-     def  test_EOF_in_charref (self ):
766-         # see #17802 
767-         # This test checks that the UnboundLocalError reported in the issue 
768-         # is not raised, however I'm not sure the returned values are correct. 
769-         # Maybe HTMLParser should use self.unescape for these 
770-         data  =  [
771-             ('a&' , [('data' , 'a&' )]),
772-             ('a&b' , [('data' , 'ab' )]),
773-             ('a&b ' , [('data' , 'a' ), ('entityref' , 'b' ), ('data' , ' ' )]),
774-             ('a&b;' , [('data' , 'a' ), ('entityref' , 'b' )]),
775-         ]
776-         for  html , expected  in  data :
777-             self ._run_check (html , expected )
778- 
779831    def  test_eof_in_comments (self ):
780832        data  =  [
781833            ('<!--' , [('comment' , '' )]),
0 commit comments