From e2b72bfa4bcc68ac232e4bec7e9a0087da8305b6 Mon Sep 17 00:00:00 2001 From: Jano Svitok Date: Fri, 10 Nov 2017 08:59:57 +0100 Subject: [PATCH] FSM generator: replace repr() with simplified hard coded version This should avoid problem with non-unicode systems (like Windows with cp1250): UnicodeEncodeError: 'cp1250' codec can't encode character '\xa1' --- src/htmlparser/generate_fsm.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/htmlparser/generate_fsm.py b/src/htmlparser/generate_fsm.py index 6202c36..6369cb0 100755 --- a/src/htmlparser/generate_fsm.py +++ b/src/htmlparser/generate_fsm.py @@ -244,6 +244,19 @@ def ListNext(input_list): return ''.join(out) + def _Repr(self, c): + """Simplified repr() to avoid problems in non-Unicode systems + if c == 9: + return "'\\t'" + elif c == 10: + return "'\\n'" + elif c == 13: + return "'\\r'" + elif c >= 0x20 and c < 0x7f: + return repr(chr(c)) + else: + return "'\\x%02x'" % c + def _CreateTransitionTable(self): """Print the state transition list. @@ -282,8 +295,8 @@ def _CreateTransitionTable(self): for state in self._config.states: transition_row = [] for c in range(0, 255): - transition_row.append(' /* %06s */ %s' % (repr(chr(c)), - state_table[state][c])) + transition_row.append(' /* %06s */ %s' % (self._Repr(c), + state_table[state][c])) out.append(self._CreateStructList('%stransition_row_%s' % (self._Prefix(),