diff --git a/third_party/ply/DIR_METADATA b/third_party/ply/DIR_METADATA new file mode 100644 index 000000000000..983ea0140f45 --- /dev/null +++ b/third_party/ply/DIR_METADATA @@ -0,0 +1,3 @@ +monorail { + component: "Tools" +} diff --git a/third_party/ply/METADATA b/third_party/ply/METADATA index 3ff16701952c..63d375c159bb 100644 --- a/third_party/ply/METADATA +++ b/third_party/ply/METADATA @@ -6,12 +6,12 @@ description: third_party { identifier { type: "ChromiumVersion" - value: "74.0.3729.169" # from https://chromereleases.googleblog.com/2019/05/stable-channel-update-for-desktop_21.html + value: "114.0.5735.358" # from https://chromereleases.googleblog.com/2024/03/long-term-support-channel-update-for_26.html } identifier { type: "Git" value: "https://chromium.googlesource.com/chromium/src.git" - version: "84108231f6e6e0772fb9a4643679ce76aa771e67" + version: "1759c6ae9316996b9f150c0ce9d0ca78a3d15c02" } identifier { type: "UpstreamSubdir" diff --git a/third_party/ply/OWNERS b/third_party/ply/OWNERS new file mode 100644 index 000000000000..93a641dd7a3e --- /dev/null +++ b/third_party/ply/OWNERS @@ -0,0 +1,3 @@ +rockot@google.com + +file://tools/idl_parser/OWNERS diff --git a/third_party/ply/README.chromium b/third_party/ply/README.chromium index 6466e5428a5e..45e10b87b6b6 100644 --- a/third_party/ply/README.chromium +++ b/third_party/ply/README.chromium @@ -1,21 +1,25 @@ Name: PLY (Python Lex-Yacc) -Current version: 3.4 -URL: http://www.dabeaz.com/ply/ply-3.4.tar.gz +Current version: 3.11 +URL: http://www.dabeaz.com/ply/ply-3.11.tar.gz License: BSD License File: LICENSE Security Critical: no -Version: 3.4 +Version: 3.11 -This directory contains a copy of these ply-3.4 components: +PLY is used by (at least) the Mojo python bindings, the PPAPI +IDL generator, and the Blink IDL generator. -README ply-3.4/README -Sources ply-3.4/ply/__init__.py - ply-3.4/ply/lex.py - ply-3.4/ply/yacc.py +This directory contains a copy of these ply-3.11 components: + +README ply-3.11/README.md +Sources ply-3.11/ply/__init__.py + ply-3.11/ply/lex.py + ply-3.11/ply/yacc.py The license is in LICENSE. -Modifications made with initial commit: +Modifications made: - Added the file README.chromium (this file) - Applies license.patch +- Added ply.gni to list sources diff --git a/third_party/ply/README b/third_party/ply/README.md similarity index 64% rename from third_party/ply/README rename to third_party/ply/README.md index f384d1a93853..05df32a5b9fc 100644 --- a/third_party/ply/README +++ b/third_party/ply/README.md @@ -1,6 +1,8 @@ -PLY (Python Lex-Yacc) Version 3.4 +# PLY (Python Lex-Yacc) Version 3.11 -Copyright (C) 2001-2011, +[![Build Status](https://travis-ci.org/dabeaz/ply.svg?branch=master)](https://travis-ci.org/dabeaz/ply) + +Copyright (C) 2001-2018 David M. Beazley (Dabeaz LLC) All rights reserved. @@ -96,7 +98,7 @@ A simple example is found at the end of this document Requirements ============ -PLY requires the use of Python 2.2 or greater. However, you should +PLY requires the use of Python 2.6 or greater. However, you should use the latest Python release if possible. It should work on just about any platform. PLY has been tested with both CPython and Jython. It also seems to work with IronPython. @@ -112,7 +114,11 @@ book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown may also be useful. -A Google group for PLY can be found at +The GitHub page for PLY can be found at: + + https://github.com/dabeaz/ply + +An old and relatively inactive discussion group for PLY is found at: http://groups.google.com/group/ply-hack @@ -130,7 +136,7 @@ and testing a revised LALR(1) implementation for PLY-2.0. Special Note for PLY-3.0 ======================== PLY-3.0 the first PLY release to support Python 3. However, backwards -compatibility with Python 2.2 is still preserved. PLY provides dual +compatibility with Python 2.6 is still preserved. PLY provides dual Python 2/3 compatibility by restricting its implementation to a common subset of basic language features. You should not convert PLY using 2to3--it is not necessary and may in fact break the implementation. @@ -141,109 +147,109 @@ Example Here is a simple example showing a PLY implementation of a calculator with variables. -# ----------------------------------------------------------------------------- -# calc.py -# -# A simple calculator with variables. -# ----------------------------------------------------------------------------- - -tokens = ( - 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', - ) - -# Tokens - -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' -t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' - -def t_NUMBER(t): - r'\d+' - t.value = int(t.value) - return t - -# Ignored characters -t_ignore = " \t" - -def t_newline(t): - r'\n+' - t.lexer.lineno += t.value.count("\n") - -def t_error(t): - print("Illegal character '%s'" % t.value[0]) - t.lexer.skip(1) - -# Build the lexer -import ply.lex as lex -lex.lex() - -# Precedence rules for the arithmetic operators -precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), - ('right','UMINUS'), - ) - -# dictionary of names (for storing variables) -names = { } - -def p_statement_assign(p): - 'statement : NAME EQUALS expression' - names[p[1]] = p[3] - -def p_statement_expr(p): - 'statement : expression' - print(p[1]) - -def p_expression_binop(p): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if p[2] == '+' : p[0] = p[1] + p[3] - elif p[2] == '-': p[0] = p[1] - p[3] - elif p[2] == '*': p[0] = p[1] * p[3] - elif p[2] == '/': p[0] = p[1] / p[3] - -def p_expression_uminus(p): - 'expression : MINUS expression %prec UMINUS' - p[0] = -p[2] - -def p_expression_group(p): - 'expression : LPAREN expression RPAREN' - p[0] = p[2] - -def p_expression_number(p): - 'expression : NUMBER' - p[0] = p[1] - -def p_expression_name(p): - 'expression : NAME' - try: - p[0] = names[p[1]] - except LookupError: - print("Undefined name '%s'" % p[1]) - p[0] = 0 - -def p_error(p): - print("Syntax error at '%s'" % p.value) - -import ply.yacc as yacc -yacc.yacc() - -while 1: - try: - s = raw_input('calc > ') # use input() on Python 3 - except EOFError: - break - yacc.parse(s) + # ----------------------------------------------------------------------------- + # calc.py + # + # A simple calculator with variables. + # ----------------------------------------------------------------------------- + + tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + + # Tokens + + t_PLUS = r'\+' + t_MINUS = r'-' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_EQUALS = r'=' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + def t_NUMBER(t): + r'\d+' + t.value = int(t.value) + return t + + # Ignored characters + t_ignore = " \t" + + def t_newline(t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + def t_error(t): + print("Illegal character '%s'" % t.value[0]) + t.lexer.skip(1) + + # Build the lexer + import ply.lex as lex + lex.lex() + + # Precedence rules for the arithmetic operators + precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + + # dictionary of names (for storing variables) + names = { } + + def p_statement_assign(p): + 'statement : NAME EQUALS expression' + names[p[1]] = p[3] + + def p_statement_expr(p): + 'statement : expression' + print(p[1]) + + def p_expression_binop(p): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if p[2] == '+' : p[0] = p[1] + p[3] + elif p[2] == '-': p[0] = p[1] - p[3] + elif p[2] == '*': p[0] = p[1] * p[3] + elif p[2] == '/': p[0] = p[1] / p[3] + + def p_expression_uminus(p): + 'expression : MINUS expression %prec UMINUS' + p[0] = -p[2] + + def p_expression_group(p): + 'expression : LPAREN expression RPAREN' + p[0] = p[2] + + def p_expression_number(p): + 'expression : NUMBER' + p[0] = p[1] + + def p_expression_name(p): + 'expression : NAME' + try: + p[0] = names[p[1]] + except LookupError: + print("Undefined name '%s'" % p[1]) + p[0] = 0 + + def p_error(p): + print("Syntax error at '%s'" % p.value) + + import ply.yacc as yacc + yacc.yacc() + + while True: + try: + s = raw_input('calc > ') # use input() on Python 3 + except EOFError: + break + yacc.parse(s) Bug Reports and Patches @@ -252,12 +258,10 @@ My goal with PLY is to simply have a decent lex/yacc implementation for Python. As a general rule, I don't spend huge amounts of time working on it unless I receive very specific bug reports and/or patches to fix problems. I also try to incorporate submitted feature -requests and enhancements into each new version. To contact me about -bugs and/or new features, please send email to dave@dabeaz.com. - -In addition there is a Google group for discussing PLY related issues at - - http://groups.google.com/group/ply-hack +requests and enhancements into each new version. Please visit the PLY +github page at https://github.com/dabeaz/ply to submit issues and pull +requests. To contact me about bugs and/or new features, please send +email to dave@dabeaz.com. -- Dave diff --git a/third_party/ply/__init__.py b/third_party/ply/__init__.py index f3da03eade07..e48880f50fbc 100644 --- a/third_party/ply/__init__.py +++ b/third_party/ply/__init__.py @@ -1,9 +1,9 @@ # PLY package # Author: David Beazley (dave@dabeaz.com) # ----------------------------------------------------------------------------- -# ply: yacc.py +# ply: __init__.py # -# Copyright (C) 2001-2011, +# Copyright (C) 2001-2018 # David M. Beazley (Dabeaz LLC) # All rights reserved. # @@ -33,4 +33,5 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- +__version__ = '3.11' __all__ = ['lex','yacc'] diff --git a/third_party/ply/lex.py b/third_party/ply/lex.py index bd32da932762..f95bcdbf1bb5 100644 --- a/third_party/ply/lex.py +++ b/third_party/ply/lex.py @@ -1,22 +1,22 @@ # ----------------------------------------------------------------------------- # ply: lex.py # -# Copyright (C) 2001-2011, +# Copyright (C) 2001-2018 # David M. Beazley (Dabeaz LLC) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. +# and/or other materials provided with the distribution. # * Neither the name of the David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without -# specific prior written permission. +# specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -31,10 +31,15 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- -__version__ = "3.4" -__tabversion__ = "3.2" # Version of table file used +__version__ = '3.11' +__tabversion__ = '3.10' -import re, sys, types, copy, os +import re +import sys +import types +import copy +import os +import inspect # This tuple contains known string types try: @@ -44,59 +49,55 @@ # Python 3.0 StringTypes = (str, bytes) -# Extract the code attribute of a function. Different implementations -# are for Python 2/3 compatibility. - -if sys.version_info[0] < 3: - def func_code(f): - return f.func_code -else: - def func_code(f): - return f.__code__ - # This regular expression is used to match valid token names _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') # Exception thrown when invalid token encountered and no default error # handler is defined. - class LexError(Exception): - def __init__(self,message,s): - self.args = (message,) - self.text = s + def __init__(self, message, s): + self.args = (message,) + self.text = s + # Token class. This class is used to represent the tokens produced. class LexToken(object): def __str__(self): - return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) + return 'LexToken(%s,%r,%d,%d)' % (self.type, self.value, self.lineno, self.lexpos) + def __repr__(self): return str(self) -# This object is a stand-in for a logging object created by the -# logging module. + +# This object is a stand-in for a logging object created by the +# logging module. class PlyLogger(object): - def __init__(self,f): + def __init__(self, f): self.f = f - def critical(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") + def critical(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') info = critical debug = critical + # Null logger is used when no output is generated. Does nothing. class NullLogger(object): - def __getattribute__(self,name): + def __getattribute__(self, name): return self - def __call__(self,*args,**kwargs): + + def __call__(self, *args, **kwargs): return self + # ----------------------------------------------------------------------------- # === Lexing Engine === # @@ -114,31 +115,33 @@ def __call__(self,*args,**kwargs): class Lexer: def __init__(self): self.lexre = None # Master regular expression. This is a list of - # tuples (re,findex) where re is a compiled + # tuples (re, findex) where re is a compiled # regular expression and findex is a list # mapping regex group numbers to rules self.lexretext = None # Current regular expression strings self.lexstatere = {} # Dictionary mapping lexer states to master regexs self.lexstateretext = {} # Dictionary mapping lexer states to regex strings self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names - self.lexstate = "INITIAL" # Current lexer state + self.lexstate = 'INITIAL' # Current lexer state self.lexstatestack = [] # Stack of lexer states self.lexstateinfo = None # State information self.lexstateignore = {} # Dictionary of ignored characters for each state self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexstateeoff = {} # Dictionary of eof functions for each state self.lexreflags = 0 # Optional re compile flags self.lexdata = None # Actual input data (as a string) self.lexpos = 0 # Current position in input text self.lexlen = 0 # Length of the input text self.lexerrorf = None # Error rule (if any) + self.lexeoff = None # EOF rule (if any) self.lextokens = None # List of valid tokens - self.lexignore = "" # Ignored characters - self.lexliterals = "" # Literal characters that can be passed through + self.lexignore = '' # Ignored characters + self.lexliterals = '' # Literal characters that can be passed through self.lexmodule = None # Module self.lineno = 1 # Current line number - self.lexoptimize = 0 # Optimized mode + self.lexoptimize = False # Optimized mode - def clone(self,object=None): + def clone(self, object=None): c = copy.copy(self) # If the object parameter has been supplied, it means we are attaching the @@ -146,113 +149,110 @@ def clone(self,object=None): # the lexstatere and lexstateerrorf tables. if object: - newtab = { } + newtab = {} for key, ritem in self.lexstatere.items(): newre = [] for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object,f[0].__name__),f[1])) - newre.append((cre,newfindex)) + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object, f[0].__name__), f[1])) + newre.append((cre, newfindex)) newtab[key] = newre c.lexstatere = newtab - c.lexstateerrorf = { } + c.lexstateerrorf = {} for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object,ef.__name__) + c.lexstateerrorf[key] = getattr(object, ef.__name__) c.lexmodule = object return c # ------------------------------------------------------------ # writetab() - Write lexer information to a table file # ------------------------------------------------------------ - def writetab(self,tabfile,outputdir=""): - if isinstance(tabfile,types.ModuleType): - return - basetabfilename = tabfile.split(".")[-1] - filename = os.path.join(outputdir,basetabfilename)+".py" - tf = open(filename,"w") - tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) - tf.write("_tabversion = %s\n" % repr(__version__)) - tf.write("_lextokens = %s\n" % repr(self.lextokens)) - tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) - tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) - tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) - - tabre = { } - # Collect all functions in the initial state - initial = self.lexstatere["INITIAL"] - initialfuncs = [] - for part in initial: - for f in part[1]: - if f and f[0]: - initialfuncs.append(f) - - for key, lre in self.lexstatere.items(): - titem = [] - for i in range(len(lre)): - titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i]))) - tabre[key] = titem - - tf.write("_lexstatere = %s\n" % repr(tabre)) - tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) - - taberr = { } - for key, ef in self.lexstateerrorf.items(): - if ef: - taberr[key] = ef.__name__ - else: - taberr[key] = None - tf.write("_lexstateerrorf = %s\n" % repr(taberr)) - tf.close() + def writetab(self, lextab, outputdir=''): + if isinstance(lextab, types.ModuleType): + raise IOError("Won't overwrite existing lextab module") + basetabmodule = lextab.split('.')[-1] + filename = os.path.join(outputdir, basetabmodule) + '.py' + with open(filename, 'w') as tf: + tf.write('# %s.py. This file automatically created by PLY (version %s). Don\'t edit!\n' % (basetabmodule, __version__)) + tf.write('_tabversion = %s\n' % repr(__tabversion__)) + tf.write('_lextokens = set(%s)\n' % repr(tuple(sorted(self.lextokens)))) + tf.write('_lexreflags = %s\n' % repr(int(self.lexreflags))) + tf.write('_lexliterals = %s\n' % repr(self.lexliterals)) + tf.write('_lexstateinfo = %s\n' % repr(self.lexstateinfo)) + + # Rewrite the lexstatere table, replacing function objects with function names + tabre = {} + for statename, lre in self.lexstatere.items(): + titem = [] + for (pat, func), retext, renames in zip(lre, self.lexstateretext[statename], self.lexstaterenames[statename]): + titem.append((retext, _funcs_to_names(func, renames))) + tabre[statename] = titem + + tf.write('_lexstatere = %s\n' % repr(tabre)) + tf.write('_lexstateignore = %s\n' % repr(self.lexstateignore)) + + taberr = {} + for statename, ef in self.lexstateerrorf.items(): + taberr[statename] = ef.__name__ if ef else None + tf.write('_lexstateerrorf = %s\n' % repr(taberr)) + + tabeof = {} + for statename, ef in self.lexstateeoff.items(): + tabeof[statename] = ef.__name__ if ef else None + tf.write('_lexstateeoff = %s\n' % repr(tabeof)) # ------------------------------------------------------------ # readtab() - Read lexer information from a tab file # ------------------------------------------------------------ - def readtab(self,tabfile,fdict): - if isinstance(tabfile,types.ModuleType): + def readtab(self, tabfile, fdict): + if isinstance(tabfile, types.ModuleType): lextab = tabfile else: - if sys.version_info[0] < 3: - exec("import %s as lextab" % tabfile) - else: - env = { } - exec("import %s as lextab" % tabfile, env,env) - lextab = env['lextab'] + exec('import %s' % tabfile) + lextab = sys.modules[tabfile] - if getattr(lextab,"_tabversion","0.0") != __version__: - raise ImportError("Inconsistent PLY version") + if getattr(lextab, '_tabversion', '0.0') != __tabversion__: + raise ImportError('Inconsistent PLY version') self.lextokens = lextab._lextokens self.lexreflags = lextab._lexreflags self.lexliterals = lextab._lexliterals + self.lextokens_all = self.lextokens | set(self.lexliterals) self.lexstateinfo = lextab._lexstateinfo self.lexstateignore = lextab._lexstateignore - self.lexstatere = { } - self.lexstateretext = { } - for key,lre in lextab._lexstatere.items(): - titem = [] - txtitem = [] - for i in range(len(lre)): - titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict))) - txtitem.append(lre[i][0]) - self.lexstatere[key] = titem - self.lexstateretext[key] = txtitem - self.lexstateerrorf = { } - for key,ef in lextab._lexstateerrorf.items(): - self.lexstateerrorf[key] = fdict[ef] + self.lexstatere = {} + self.lexstateretext = {} + for statename, lre in lextab._lexstatere.items(): + titem = [] + txtitem = [] + for pat, func_name in lre: + titem.append((re.compile(pat, lextab._lexreflags), _names_to_funcs(func_name, fdict))) + + self.lexstatere[statename] = titem + self.lexstateretext[statename] = txtitem + + self.lexstateerrorf = {} + for statename, ef in lextab._lexstateerrorf.items(): + self.lexstateerrorf[statename] = fdict[ef] + + self.lexstateeoff = {} + for statename, ef in lextab._lexstateeoff.items(): + self.lexstateeoff[statename] = fdict[ef] + self.begin('INITIAL') # ------------------------------------------------------------ # input() - Push a new string into the lexer # ------------------------------------------------------------ - def input(self,s): + def input(self, s): # Pull off the first character to see if s looks like a string c = s[:1] - if not isinstance(c,StringTypes): - raise ValueError("Expected a string") + if not isinstance(c, StringTypes): + raise ValueError('Expected a string') self.lexdata = s self.lexpos = 0 self.lexlen = len(s) @@ -260,19 +260,20 @@ def input(self,s): # ------------------------------------------------------------ # begin() - Changes the lexing state # ------------------------------------------------------------ - def begin(self,state): - if not state in self.lexstatere: - raise ValueError("Undefined state") + def begin(self, state): + if state not in self.lexstatere: + raise ValueError('Undefined state') self.lexre = self.lexstatere[state] self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state,"") - self.lexerrorf = self.lexstateerrorf.get(state,None) + self.lexignore = self.lexstateignore.get(state, '') + self.lexerrorf = self.lexstateerrorf.get(state, None) + self.lexeoff = self.lexstateeoff.get(state, None) self.lexstate = state # ------------------------------------------------------------ # push_state() - Changes the lexing state and saves old on stack # ------------------------------------------------------------ - def push_state(self,state): + def push_state(self, state): self.lexstatestack.append(self.lexstate) self.begin(state) @@ -291,7 +292,7 @@ def current_state(self): # ------------------------------------------------------------ # skip() - Skip ahead n characters # ------------------------------------------------------------ - def skip(self,n): + def skip(self, n): self.lexpos += n # ------------------------------------------------------------ @@ -315,9 +316,10 @@ def token(self): continue # Look for a regular expression match - for lexre,lexindexfunc in self.lexre: - m = lexre.match(lexdata,lexpos) - if not m: continue + for lexre, lexindexfunc in self.lexre: + m = lexre.match(lexdata, lexpos) + if not m: + continue # Create a token for return tok = LexToken() @@ -326,16 +328,16 @@ def token(self): tok.lexpos = lexpos i = m.lastindex - func,tok.type = lexindexfunc[i] + func, tok.type = lexindexfunc[i] if not func: - # If no token type was set, it's an ignored token - if tok.type: - self.lexpos = m.end() - return tok - else: - lexpos = m.end() - break + # If no token type was set, it's an ignored token + if tok.type: + self.lexpos = m.end() + return tok + else: + lexpos = m.end() + break lexpos = m.end() @@ -355,10 +357,10 @@ def token(self): # Verify type of the token. If not in the token map, raise an error if not self.lexoptimize: - if not newtok.type in self.lextokens: + if newtok.type not in self.lextokens_all: raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( - func_code(func).co_filename, func_code(func).co_firstlineno, - func.__name__, newtok.type),lexdata[lexpos:]) + func.__code__.co_filename, func.__code__.co_firstlineno, + func.__name__, newtok.type), lexdata[lexpos:]) return newtok else: @@ -377,7 +379,7 @@ def token(self): tok = LexToken() tok.value = self.lexdata[lexpos:] tok.lineno = self.lineno - tok.type = "error" + tok.type = 'error' tok.lexer = self tok.lexpos = lexpos self.lexpos = lexpos @@ -386,15 +388,27 @@ def token(self): # Error method didn't change text position at all. This is an error. raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) lexpos = self.lexpos - if not newtok: continue + if not newtok: + continue return newtok self.lexpos = lexpos - raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) + raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos], lexpos), lexdata[lexpos:]) + + if self.lexeoff: + tok = LexToken() + tok.type = 'eof' + tok.value = '' + tok.lineno = self.lineno + tok.lexpos = lexpos + tok.lexer = self + self.lexpos = lexpos + newtok = self.lexeoff(tok) + return newtok self.lexpos = lexpos + 1 if self.lexdata is None: - raise RuntimeError("No input string given with input()") + raise RuntimeError('No input string given with input()') return None # Iterator interface @@ -416,6 +430,15 @@ def next(self): # and build a Lexer object from it. # ----------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- +# _get_regex(func) +# +# Returns the regular expression assigned to a function either as a doc string +# or as a .regex attribute attached by the @TOKEN decorator. +# ----------------------------------------------------------------------------- +def _get_regex(func): + return getattr(func, 'regex', func.__doc__) + # ----------------------------------------------------------------------------- # get_caller_module_dict() # @@ -423,21 +446,12 @@ def next(self): # a caller further down the call stack. This is used to get the environment # associated with the yacc() call if none was provided. # ----------------------------------------------------------------------------- - def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict # ----------------------------------------------------------------------------- # _funcs_to_names() @@ -445,14 +459,13 @@ def get_caller_module_dict(levels): # Given a list of regular expression functions, this converts it to a list # suitable for output to a table file # ----------------------------------------------------------------------------- - -def _funcs_to_names(funclist,namelist): +def _funcs_to_names(funclist, namelist): result = [] - for f,name in zip(funclist,namelist): - if f and f[0]: - result.append((name, f[1])) - else: - result.append(f) + for f, name in zip(funclist, namelist): + if f and f[0]: + result.append((name, f[1])) + else: + result.append(f) return result # ----------------------------------------------------------------------------- @@ -461,15 +474,14 @@ def _funcs_to_names(funclist,namelist): # Given a list of regular expression function names, this converts it back to # functions. # ----------------------------------------------------------------------------- - -def _names_to_funcs(namelist,fdict): - result = [] - for n in namelist: - if n and n[0]: - result.append((fdict[n[0]],n[1])) - else: - result.append(n) - return result +def _names_to_funcs(namelist, fdict): + result = [] + for n in namelist: + if n and n[0]: + result.append((fdict[n[0]], n[1])) + else: + result.append(n) + return result # ----------------------------------------------------------------------------- # _form_master_re() @@ -478,36 +490,37 @@ def _names_to_funcs(namelist,fdict): # form the master regular expression. Given limitations in the Python re # module, it may be necessary to break the master regex into separate expressions. # ----------------------------------------------------------------------------- - -def _form_master_re(relist,reflags,ldict,toknames): - if not relist: return [] - regex = "|".join(relist) +def _form_master_re(relist, reflags, ldict, toknames): + if not relist: + return [] + regex = '|'.join(relist) try: - lexre = re.compile(regex,re.VERBOSE | reflags) + lexre = re.compile(regex, reflags) # Build the index to function map for the matching engine - lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) + lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) lexindexnames = lexindexfunc[:] - for f,i in lexre.groupindex.items(): - handle = ldict.get(f,None) + for f, i in lexre.groupindex.items(): + handle = ldict.get(f, None) if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle,toknames[f]) + lexindexfunc[i] = (handle, toknames[f]) lexindexnames[i] = f elif handle is not None: lexindexnames[i] = f - if f.find("ignore_") > 0: - lexindexfunc[i] = (None,None) + if f.find('ignore_') > 0: + lexindexfunc[i] = (None, None) else: lexindexfunc[i] = (None, toknames[f]) - - return [(lexre,lexindexfunc)],[regex],[lexindexnames] + + return [(lexre, lexindexfunc)], [regex], [lexindexnames] except Exception: m = int(len(relist)/2) - if m == 0: m = 1 - llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) - rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) - return llist+rlist, lre+rre, lnames+rnames + if m == 0: + m = 1 + llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) + rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) + return (llist+rlist), (lre+rre), (lnames+rnames) # ----------------------------------------------------------------------------- # def _statetoken(s,names) @@ -517,22 +530,22 @@ def _form_master_re(relist,reflags,ldict,toknames): # is a tuple of state names and tokenname is the name of the token. For example, # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') # ----------------------------------------------------------------------------- +def _statetoken(s, names): + parts = s.split('_') + for i, part in enumerate(parts[1:], 1): + if part not in names and part != 'ANY': + break -def _statetoken(s,names): - nonstate = 1 - parts = s.split("_") - for i in range(1,len(parts)): - if not parts[i] in names and parts[i] != 'ANY': break if i > 1: - states = tuple(parts[1:i]) + states = tuple(parts[1:i]) else: - states = ('INITIAL',) + states = ('INITIAL',) if 'ANY' in states: - states = tuple(names) + states = tuple(names) - tokenname = "_".join(parts[i:]) - return (states,tokenname) + tokenname = '_'.join(parts[i:]) + return (states, tokenname) # ----------------------------------------------------------------------------- @@ -542,19 +555,15 @@ def _statetoken(s,names): # user's input file. # ----------------------------------------------------------------------------- class LexerReflect(object): - def __init__(self,ldict,log=None,reflags=0): + def __init__(self, ldict, log=None, reflags=0): self.ldict = ldict self.error_func = None self.tokens = [] self.reflags = reflags - self.stateinfo = { 'INITIAL' : 'inclusive'} - self.files = {} - self.error = 0 - - if log is None: - self.log = PlyLogger(sys.stderr) - else: - self.log = log + self.stateinfo = {'INITIAL': 'inclusive'} + self.modules = set() + self.error = False + self.log = PlyLogger(sys.stderr) if log is None else log # Get all of the basic information def get_all(self): @@ -562,7 +571,7 @@ def get_all(self): self.get_literals() self.get_states() self.get_rules() - + # Validate all of the information def validate_all(self): self.validate_tokens() @@ -572,20 +581,20 @@ def validate_all(self): # Get the tokens map def get_tokens(self): - tokens = self.ldict.get("tokens",None) + tokens = self.ldict.get('tokens', None) if not tokens: - self.log.error("No token list is defined") - self.error = 1 + self.log.error('No token list is defined') + self.error = True return - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True return - + if not tokens: - self.log.error("tokens is empty") - self.error = 1 + self.log.error('tokens is empty') + self.error = True return self.tokens = tokens @@ -595,280 +604,274 @@ def validate_tokens(self): terminals = {} for n in self.tokens: if not _is_identifier.match(n): - self.log.error("Bad token name '%s'",n) - self.error = 1 + self.log.error("Bad token name '%s'", n) + self.error = True if n in terminals: self.log.warning("Token '%s' multiply defined", n) terminals[n] = 1 # Get the literals specifier def get_literals(self): - self.literals = self.ldict.get("literals","") + self.literals = self.ldict.get('literals', '') + if not self.literals: + self.literals = '' # Validate literals def validate_literals(self): try: for c in self.literals: - if not isinstance(c,StringTypes) or len(c) > 1: - self.log.error("Invalid literal %s. Must be a single character", repr(c)) - self.error = 1 - continue + if not isinstance(c, StringTypes) or len(c) > 1: + self.log.error('Invalid literal %s. Must be a single character', repr(c)) + self.error = True except TypeError: - self.log.error("Invalid literals specification. literals must be a sequence of characters") - self.error = 1 + self.log.error('Invalid literals specification. literals must be a sequence of characters') + self.error = True def get_states(self): - self.states = self.ldict.get("states",None) + self.states = self.ldict.get('states', None) # Build statemap if self.states: - if not isinstance(self.states,(tuple,list)): - self.log.error("states must be defined as a tuple or list") - self.error = 1 - else: - for s in self.states: - if not isinstance(s,tuple) or len(s) != 2: - self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s)) - self.error = 1 - continue - name, statetype = s - if not isinstance(name,StringTypes): - self.log.error("State name %s must be a string", repr(name)) - self.error = 1 - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name) - self.error = 1 - continue - if name in self.stateinfo: - self.log.error("State '%s' already defined",name) - self.error = 1 - continue - self.stateinfo[name] = statetype + if not isinstance(self.states, (tuple, list)): + self.log.error('states must be defined as a tuple or list') + self.error = True + else: + for s in self.states: + if not isinstance(s, tuple) or len(s) != 2: + self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')", repr(s)) + self.error = True + continue + name, statetype = s + if not isinstance(name, StringTypes): + self.log.error('State name %s must be a string', repr(name)) + self.error = True + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + self.log.error("State type for state %s must be 'inclusive' or 'exclusive'", name) + self.error = True + continue + if name in self.stateinfo: + self.log.error("State '%s' already defined", name) + self.error = True + continue + self.stateinfo[name] = statetype # Get all of the symbols with a t_ prefix and sort them into various # categories (functions, strings, error functions, and ignore characters) def get_rules(self): - tsymbols = [f for f in self.ldict if f[:2] == 't_' ] + tsymbols = [f for f in self.ldict if f[:2] == 't_'] # Now build up a list of functions and a list of strings - - self.toknames = { } # Mapping of symbols to token names - self.funcsym = { } # Symbols defined as functions - self.strsym = { } # Symbols defined as strings - self.ignore = { } # Ignore strings by state - self.errorf = { } # Error functions by state + self.toknames = {} # Mapping of symbols to token names + self.funcsym = {} # Symbols defined as functions + self.strsym = {} # Symbols defined as strings + self.ignore = {} # Ignore strings by state + self.errorf = {} # Error functions by state + self.eoff = {} # EOF functions by state for s in self.stateinfo: - self.funcsym[s] = [] - self.strsym[s] = [] + self.funcsym[s] = [] + self.strsym[s] = [] if len(tsymbols) == 0: - self.log.error("No rules of the form t_rulename are defined") - self.error = 1 + self.log.error('No rules of the form t_rulename are defined') + self.error = True return for f in tsymbols: t = self.ldict[f] - states, tokname = _statetoken(f,self.stateinfo) + states, tokname = _statetoken(f, self.stateinfo) self.toknames[f] = tokname - if hasattr(t,"__call__"): + if hasattr(t, '__call__'): if tokname == 'error': for s in states: self.errorf[s] = t + elif tokname == 'eof': + for s in states: + self.eoff[s] = t elif tokname == 'ignore': - line = func_code(t).co_firstlineno - file = func_code(t).co_filename - self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__) - self.error = 1 + line = t.__code__.co_firstlineno + file = t.__code__.co_filename + self.log.error("%s:%d: Rule '%s' must be defined as a string", file, line, t.__name__) + self.error = True else: - for s in states: - self.funcsym[s].append((f,t)) + for s in states: + self.funcsym[s].append((f, t)) elif isinstance(t, StringTypes): if tokname == 'ignore': for s in states: self.ignore[s] = t - if "\\" in t: - self.log.warning("%s contains a literal backslash '\\'",f) + if '\\' in t: + self.log.warning("%s contains a literal backslash '\\'", f) elif tokname == 'error': self.log.error("Rule '%s' must be defined as a function", f) - self.error = 1 + self.error = True else: - for s in states: - self.strsym[s].append((f,t)) + for s in states: + self.strsym[s].append((f, t)) else: - self.log.error("%s not defined as a function or string", f) - self.error = 1 + self.log.error('%s not defined as a function or string', f) + self.error = True # Sort the functions by line number for f in self.funcsym.values(): - if sys.version_info[0] < 3: - f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno)) - else: - # Python 3.0 - f.sort(key=lambda x: func_code(x[1]).co_firstlineno) + f.sort(key=lambda x: x[1].__code__.co_firstlineno) # Sort the strings by regular expression length for s in self.strsym.values(): - if sys.version_info[0] < 3: - s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) - else: - # Python 3.0 - s.sort(key=lambda x: len(x[1]),reverse=True) + s.sort(key=lambda x: len(x[1]), reverse=True) - # Validate all of the t_rules collected + # Validate all of the t_rules collected def validate_rules(self): for state in self.stateinfo: # Validate all rules defined by functions - - for fname, f in self.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) tokname = self.toknames[fname] if isinstance(f, types.MethodType): reqargs = 2 else: reqargs = 1 - nargs = func_code(f).co_argcount + nargs = f.__code__.co_argcount if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) + self.error = True continue if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) + self.error = True continue - if not f.__doc__: - self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__) - self.error = 1 + if not _get_regex(f): + self.log.error("%s:%d: No regular expression defined for rule '%s'", file, line, f.__name__) + self.error = True continue try: - c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags) - if c.match(""): - self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e) - if '#' in f.__doc__: - self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__) - self.error = 1 + c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) + if c.match(''): + self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file, line, f.__name__) + self.error = True + except re.error as e: + self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) + if '#' in _get_regex(f): + self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'", file, line, f.__name__) + self.error = True # Validate all rules defined by strings - for name,r in self.strsym[state]: + for name, r in self.strsym[state]: tokname = self.toknames[name] if tokname == 'error': self.log.error("Rule '%s' must be defined as a function", name) - self.error = 1 + self.error = True continue - if not tokname in self.tokens and tokname.find("ignore_") < 0: - self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname) - self.error = 1 + if tokname not in self.tokens and tokname.find('ignore_') < 0: + self.log.error("Rule '%s' defined for an unspecified token %s", name, tokname) + self.error = True continue try: - c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags) - if (c.match("")): - self.log.error("Regular expression for rule '%s' matches empty string",name) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("Invalid regular expression for rule '%s'. %s",name,e) + c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) + if (c.match('')): + self.log.error("Regular expression for rule '%s' matches empty string", name) + self.error = True + except re.error as e: + self.log.error("Invalid regular expression for rule '%s'. %s", name, e) if '#' in r: - self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name) - self.error = 1 + self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'", name) + self.error = True if not self.funcsym[state] and not self.strsym[state]: - self.log.error("No rules defined for state '%s'",state) - self.error = 1 + self.log.error("No rules defined for state '%s'", state) + self.error = True # Validate the error function - efunc = self.errorf.get(state,None) + efunc = self.errorf.get(state, None) if efunc: f = efunc - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) if isinstance(f, types.MethodType): reqargs = 2 else: reqargs = 1 - nargs = func_code(f).co_argcount + nargs = f.__code__.co_argcount if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule '%s' has too many arguments", file, line, f.__name__) + self.error = True if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 - - for f in self.files: - self.validate_file(f) + self.log.error("%s:%d: Rule '%s' requires an argument", file, line, f.__name__) + self.error = True + for module in self.modules: + self.validate_module(module) # ----------------------------------------------------------------------------- - # validate_file() + # validate_module() # # This checks to see if there are duplicated t_rulename() functions or strings # in the parser input file. This is done using a simple regular expression - # match on each line in the given file. + # match on each line in the source code of the given module. # ----------------------------------------------------------------------------- - def validate_file(self,filename): - import os.path - base,ext = os.path.splitext(filename) - if ext != '.py': return # No idea what the file is. Return OK - + def validate_module(self, module): try: - f = open(filename) - lines = f.readlines() - f.close() + lines, linen = inspect.getsourcelines(module) except IOError: - return # Couldn't find the file. Don't worry about it + return fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') - counthash = { } - linen = 1 - for l in lines: - m = fre.match(l) + counthash = {} + linen += 1 + for line in lines: + m = fre.match(line) if not m: - m = sre.match(l) + m = sre.match(line) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: - self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev) - self.error = 1 + filename = inspect.getsourcefile(module) + self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) + self.error = True linen += 1 - + # ----------------------------------------------------------------------------- # lex(module) # # Build all of the regular expression rules from definitions in the supplied module # ----------------------------------------------------------------------------- -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None): +def lex(module=None, object=None, debug=False, optimize=False, lextab='lextab', + reflags=int(re.VERBOSE), nowarn=False, outputdir=None, debuglog=None, errorlog=None): + + if lextab is None: + lextab = 'lextab' + global lexer + ldict = None - stateinfo = { 'INITIAL' : 'inclusive'} + stateinfo = {'INITIAL': 'inclusive'} lexobj = Lexer() lexobj.lexoptimize = optimize - global token,input + global token, input if errorlog is None: errorlog = PlyLogger(sys.stderr) @@ -878,16 +881,28 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now debuglog = PlyLogger(sys.stderr) # Get the module dictionary used for the lexer - if object: module = object + if object: + module = object + # Get the module dictionary used for the parser if module: - _items = [(k,getattr(module,k)) for k in dir(module)] + _items = [(k, getattr(module, k)) for k in dir(module)] ldict = dict(_items) + # If no __file__ attribute is available, try to obtain it from the __module__ instead + if '__file__' not in ldict: + ldict['__file__'] = sys.modules[ldict['__module__']].__file__ else: ldict = get_caller_module_dict(2) + # Determine if the module is package of a package or not. + # If so, fix the tabmodule setting so that tables load correctly + pkg = ldict.get('__package__') + if pkg and isinstance(lextab, str): + if '.' not in lextab: + lextab = pkg + '.' + lextab + # Collect parser information from the dictionary - linfo = LexerReflect(ldict,log=errorlog,reflags=reflags) + linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) linfo.get_all() if not optimize: if linfo.validate_all(): @@ -895,7 +910,7 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now if optimize and lextab: try: - lexobj.readtab(lextab,ldict) + lexobj.readtab(lextab, ldict) token = lexobj.token input = lexobj.input lexer = lexobj @@ -906,93 +921,97 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # Dump some basic debugging information if debug: - debuglog.info("lex: tokens = %r", linfo.tokens) - debuglog.info("lex: literals = %r", linfo.literals) - debuglog.info("lex: states = %r", linfo.stateinfo) + debuglog.info('lex: tokens = %r', linfo.tokens) + debuglog.info('lex: literals = %r', linfo.literals) + debuglog.info('lex: states = %r', linfo.stateinfo) # Build a dictionary of valid token names - lexobj.lextokens = { } + lexobj.lextokens = set() for n in linfo.tokens: - lexobj.lextokens[n] = 1 + lexobj.lextokens.add(n) # Get literals specification - if isinstance(linfo.literals,(list,tuple)): + if isinstance(linfo.literals, (list, tuple)): lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) else: lexobj.lexliterals = linfo.literals + lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) + # Get the stateinfo dictionary stateinfo = linfo.stateinfo - regexs = { } + regexs = {} # Build the master regular expressions for state in stateinfo: regex_list = [] # Add rules defined by functions first for fname, f in linfo.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) + regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) # Now add all of the simple rules - for name,r in linfo.strsym[state]: - regex_list.append("(?P<%s>%s)" % (name,r)) + for name, r in linfo.strsym[state]: + regex_list.append('(?P<%s>%s)' % (name, r)) if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) regexs[state] = regex_list # Build the master regular expressions if debug: - debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") + debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') for state in regexs: - lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) + lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) lexobj.lexstatere[state] = lexre lexobj.lexstateretext[state] = re_text lexobj.lexstaterenames[state] = re_names if debug: - for i in range(len(re_text)): - debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) + for i, text in enumerate(re_text): + debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) # For inclusive states, we need to add the regular expressions from the INITIAL state - for state,stype in stateinfo.items(): - if state != "INITIAL" and stype == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) + for state, stype in stateinfo.items(): + if state != 'INITIAL' and stype == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere["INITIAL"] - lexobj.lexretext = lexobj.lexstateretext["INITIAL"] + lexobj.lexre = lexobj.lexstatere['INITIAL'] + lexobj.lexretext = lexobj.lexstateretext['INITIAL'] lexobj.lexreflags = reflags # Set up ignore variables lexobj.lexstateignore = linfo.ignore - lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") + lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') # Set up error functions lexobj.lexstateerrorf = linfo.errorf - lexobj.lexerrorf = linfo.errorf.get("INITIAL",None) + lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) if not lexobj.lexerrorf: - errorlog.warning("No t_error rule is defined") + errorlog.warning('No t_error rule is defined') + + # Set up eof functions + lexobj.lexstateeoff = linfo.eoff + lexobj.lexeoff = linfo.eoff.get('INITIAL', None) # Check state information for ignore and error rules - for s,stype in stateinfo.items(): + for s, stype in stateinfo.items(): if stype == 'exclusive': - if not s in linfo.errorf: - errorlog.warning("No error rule is defined for exclusive state '%s'", s) - if not s in linfo.ignore and lexobj.lexignore: - errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) + if s not in linfo.errorf: + errorlog.warning("No error rule is defined for exclusive state '%s'", s) + if s not in linfo.ignore and lexobj.lexignore: + errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) elif stype == 'inclusive': - if not s in linfo.errorf: - linfo.errorf[s] = linfo.errorf.get("INITIAL",None) - if not s in linfo.ignore: - linfo.ignore[s] = linfo.ignore.get("INITIAL","") + if s not in linfo.errorf: + linfo.errorf[s] = linfo.errorf.get('INITIAL', None) + if s not in linfo.ignore: + linfo.ignore[s] = linfo.ignore.get('INITIAL', '') # Create global versions of the token() and input() functions token = lexobj.token @@ -1001,7 +1020,28 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # If in optimize mode, we write the lextab if lextab and optimize: - lexobj.writetab(lextab,outputdir) + if outputdir is None: + # If no output directory is set, the location of the output files + # is determined according to the following rules: + # - If lextab specifies a package, files go into that package directory + # - Otherwise, files go in the same directory as the specifying module + if isinstance(lextab, types.ModuleType): + srcfile = lextab.__file__ + else: + if '.' not in lextab: + srcfile = ldict['__file__'] + else: + parts = lextab.split('.') + pkgname = '.'.join(parts[:-1]) + exec('import %s' % pkgname) + srcfile = getattr(sys.modules[pkgname], '__file__', '') + outputdir = os.path.dirname(srcfile) + try: + lexobj.writetab(lextab, outputdir) + if lextab in sys.modules: + del sys.modules[lextab] + except IOError as e: + errorlog.warning("Couldn't write lextab module %r. %s" % (lextab, e)) return lexobj @@ -1011,7 +1051,7 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # This runs the lexer as a main program # ----------------------------------------------------------------------------- -def runmain(lexer=None,data=None): +def runmain(lexer=None, data=None): if not data: try: filename = sys.argv[1] @@ -1019,7 +1059,7 @@ def runmain(lexer=None,data=None): data = f.read() f.close() except IndexError: - sys.stdout.write("Reading from standard input (type EOF to end):\n") + sys.stdout.write('Reading from standard input (type EOF to end):\n') data = sys.stdin.read() if lexer: @@ -1032,10 +1072,11 @@ def runmain(lexer=None,data=None): else: _token = token - while 1: + while True: tok = _token() - if not tok: break - sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos)) + if not tok: + break + sys.stdout.write('(%s,%r,%d,%d)\n' % (tok.type, tok.value, tok.lineno, tok.lexpos)) # ----------------------------------------------------------------------------- # @TOKEN(regex) @@ -1045,14 +1086,13 @@ def runmain(lexer=None,data=None): # ----------------------------------------------------------------------------- def TOKEN(r): - def set_doc(f): - if hasattr(r,"__call__"): - f.__doc__ = r.__doc__ + def set_regex(f): + if hasattr(r, '__call__'): + f.regex = _get_regex(r) else: - f.__doc__ = r + f.regex = r return f - return set_doc + return set_regex # Alternative spelling of the TOKEN decorator Token = TOKEN - diff --git a/third_party/ply/license.patch b/third_party/ply/license.patch index 7b2621fc46b0..b6cdff196c4b 100644 --- a/third_party/ply/license.patch +++ b/third_party/ply/license.patch @@ -6,9 +6,9 @@ index 853a985..f3da03e 100644 # PLY package # Author: David Beazley (dave@dabeaz.com) +# ----------------------------------------------------------------------------- -+# ply: yacc.py ++# ply: __init__.py +# -+# Copyright (C) 2001-2011, ++# Copyright (C) 2001-2018 +# David M. Beazley (Dabeaz LLC) +# All rights reserved. +# @@ -38,4 +38,5 @@ index 853a985..f3da03e 100644 +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- + __version__ = '3.11' __all__ = ['lex','yacc'] diff --git a/third_party/ply/ply.gni b/third_party/ply/ply.gni new file mode 100644 index 000000000000..6a2db436f125 --- /dev/null +++ b/third_party/ply/ply.gni @@ -0,0 +1,5 @@ +ply_sources = [ + "//third_party/ply/__init__.py", + "//third_party/ply/lex.py", + "//third_party/ply/yacc.py", +] diff --git a/third_party/ply/yacc.py b/third_party/ply/yacc.py index f70439ea5e1c..88188a1e8ead 100644 --- a/third_party/ply/yacc.py +++ b/third_party/ply/yacc.py @@ -1,22 +1,22 @@ # ----------------------------------------------------------------------------- # ply: yacc.py # -# Copyright (C) 2001-2011, +# Copyright (C) 2001-2018 # David M. Beazley (Dabeaz LLC) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. +# and/or other materials provided with the distribution. # * Neither the name of the David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without -# specific prior written permission. +# specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -32,7 +32,7 @@ # ----------------------------------------------------------------------------- # # This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside +# as Python functions. The grammar is specified by supplying the BNF inside # Python documentation strings. The inspiration for this technique was borrowed # from John Aycock's Spark parsing system. PLY might be viewed as cross between # Spark and the GNU bison utility. @@ -59,8 +59,15 @@ # own risk! # ---------------------------------------------------------------------------- -__version__ = "3.4" -__tabversion__ = "3.2" # Table version +import re +import types +import sys +import os.path +import inspect +import warnings + +__version__ = '3.11' +__tabversion__ = '3.10' #----------------------------------------------------------------------------- # === User configurable parameters === @@ -68,7 +75,7 @@ # Change these to modify the default behavior of yacc (if you wish) #----------------------------------------------------------------------------- -yaccdebug = 1 # Debugging mode. If set, yacc generates a +yaccdebug = True # Debugging mode. If set, yacc generates a # a 'parser.out' file in the current directory debug_file = 'parser.out' # Default name of the debugging file @@ -77,86 +84,117 @@ error_count = 3 # Number of symbols that must be shifted to leave recovery mode -yaccdevel = 0 # Set to True if developing yacc. This turns off optimized +yaccdevel = False # Set to True if developing yacc. This turns off optimized # implementations of certain functions. resultlimit = 40 # Size limit of results when running in debug mode. pickle_protocol = 0 # Protocol to use when writing pickle files -import re, types, sys, os.path - -# Compatibility function for python 2.6/3.0 +# String type-checking compatibility if sys.version_info[0] < 3: - def func_code(f): - return f.func_code + string_types = basestring else: - def func_code(f): - return f.__code__ - -# Compatibility -try: - MAXINT = sys.maxint -except AttributeError: - MAXINT = sys.maxsize - -# Python 2.x/3.0 compatibility. -def load_ply_lex(): - if sys.version_info[0] < 3: - import lex - else: - import ply.lex as lex - return lex + string_types = str + +MAXINT = sys.maxsize -# This object is a stand-in for a logging object created by the +# This object is a stand-in for a logging object created by the # logging module. PLY will use this by default to create things # such as the parser.out file. If a user wants more detailed # information, they can create their own logging object and pass # it into PLY. class PlyLogger(object): - def __init__(self,f): + def __init__(self, f): self.f = f - def debug(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - info = debug - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") + def debug(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + info = debug - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') critical = debug # Null logger is used when no output is generated. Does nothing. class NullLogger(object): - def __getattribute__(self,name): + def __getattribute__(self, name): return self - def __call__(self,*args,**kwargs): + + def __call__(self, *args, **kwargs): return self - + # Exception raised for yacc-related errors -class YaccError(Exception): pass +class YaccError(Exception): + pass # Format the result message that the parser produces when running in debug mode. def format_result(r): repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) + if '\n' in repr_str: + repr_str = repr(repr_str) if len(repr_str) > resultlimit: - repr_str = repr_str[:resultlimit]+" ..." - result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) + repr_str = repr_str[:resultlimit] + ' ...' + result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) return result - # Format stack entries when the parser is running in debug mode def format_stack_entry(r): repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) + if '\n' in repr_str: + repr_str = repr(repr_str) if len(repr_str) < 16: return repr_str else: - return "<%s @ 0x%x>" % (type(r).__name__,id(r)) + return '<%s @ 0x%x>' % (type(r).__name__, id(r)) + +# Panic mode error recovery support. This feature is being reworked--much of the +# code here is to offer a deprecation/backwards compatible transition + +_errok = None +_token = None +_restart = None +_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error(). +Instead, invoke the methods on the associated parser instance: + + def p_error(p): + ... + # Use parser.errok(), parser.token(), parser.restart() + ... + + parser = yacc.yacc() +''' + +def errok(): + warnings.warn(_warnmsg) + return _errok() + +def restart(): + warnings.warn(_warnmsg) + return _restart() + +def token(): + warnings.warn(_warnmsg) + return _token() + +# Utility function to call the p_error() function with some deprecation hacks +def call_errorfunc(errorfunc, token, parser): + global _errok, _token, _restart + _errok = parser.errok + _token = parser.token + _restart = parser.restart + r = errorfunc(token) + try: + del _errok, _token, _restart + except NameError: + pass + return r #----------------------------------------------------------------------------- # === LR Parsing Engine === @@ -176,8 +214,11 @@ def format_stack_entry(r): # .endlexpos = Ending lex position (optional, set automatically) class YaccSymbol: - def __str__(self): return self.type - def __repr__(self): return str(self) + def __str__(self): + return self.type + + def __repr__(self): + return str(self) # This class is a wrapper around the objects actually passed to each # grammar rule. Index lookup and assignment actually assign the @@ -189,46 +230,53 @@ def __repr__(self): return str(self) # representing the range of positional information for a symbol. class YaccProduction: - def __init__(self,s,stack=None): + def __init__(self, s, stack=None): self.slice = s self.stack = stack self.lexer = None - self.parser= None - def __getitem__(self,n): - if n >= 0: return self.slice[n].value - else: return self.stack[n].value + self.parser = None - def __setitem__(self,n,v): + def __getitem__(self, n): + if isinstance(n, slice): + return [s.value for s in self.slice[n]] + elif n >= 0: + return self.slice[n].value + else: + return self.stack[n].value + + def __setitem__(self, n, v): self.slice[n].value = v - def __getslice__(self,i,j): + def __getslice__(self, i, j): return [s.value for s in self.slice[i:j]] def __len__(self): return len(self.slice) - def lineno(self,n): - return getattr(self.slice[n],"lineno",0) + def lineno(self, n): + return getattr(self.slice[n], 'lineno', 0) - def set_lineno(self,n,lineno): + def set_lineno(self, n, lineno): self.slice[n].lineno = lineno - def linespan(self,n): - startline = getattr(self.slice[n],"lineno",0) - endline = getattr(self.slice[n],"endlineno",startline) - return startline,endline + def linespan(self, n): + startline = getattr(self.slice[n], 'lineno', 0) + endline = getattr(self.slice[n], 'endlineno', startline) + return startline, endline - def lexpos(self,n): - return getattr(self.slice[n],"lexpos",0) + def lexpos(self, n): + return getattr(self.slice[n], 'lexpos', 0) - def lexspan(self,n): - startpos = getattr(self.slice[n],"lexpos",0) - endpos = getattr(self.slice[n],"endlexpos",startpos) - return startpos,endpos + def set_lexpos(self, n, lexpos): + self.slice[n].lexpos = lexpos - def error(self): - raise SyntaxError + def lexspan(self, n): + startpos = getattr(self.slice[n], 'lexpos', 0) + endpos = getattr(self.slice[n], 'endlexpos', startpos) + return startpos, endpos + def error(self): + raise SyntaxError # ----------------------------------------------------------------------------- # == LRParser == @@ -237,14 +285,16 @@ def error(self): # ----------------------------------------------------------------------------- class LRParser: - def __init__(self,lrtab,errorf): + def __init__(self, lrtab, errorf): self.productions = lrtab.lr_productions - self.action = lrtab.lr_action - self.goto = lrtab.lr_goto - self.errorfunc = errorf + self.action = lrtab.lr_action + self.goto = lrtab.lr_goto + self.errorfunc = errorf + self.set_defaulted_states() + self.errorok = True def errok(self): - self.errorok = 1 + self.errorok = True def restart(self): del self.statestack[:] @@ -254,24 +304,42 @@ def restart(self): self.symstack.append(sym) self.statestack.append(0) - def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): + # Defaulted state support. + # This method identifies parser states where there is only one possible reduction action. + # For such states, the parser can make a choose to make a rule reduction without consuming + # the next look-ahead token. This delayed invocation of the tokenizer can be useful in + # certain kinds of advanced parsing situations where the lexer and parser interact with + # each other or change states (i.e., manipulation of scope, lexer states, etc.). + # + # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions + def set_defaulted_states(self): + self.defaulted_states = {} + for state, actions in self.action.items(): + rules = list(actions.values()) + if len(rules) == 1 and rules[0] < 0: + self.defaulted_states[state] = rules[0] + + def disable_defaulted_states(self): + self.defaulted_states = {} + + def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): if debug or yaccdevel: - if isinstance(debug,int): + if isinstance(debug, int): debug = PlyLogger(sys.stderr) - return self.parsedebug(input,lexer,debug,tracking,tokenfunc) + return self.parsedebug(input, lexer, debug, tracking, tokenfunc) elif tracking: - return self.parseopt(input,lexer,debug,tracking,tokenfunc) + return self.parseopt(input, lexer, debug, tracking, tokenfunc) else: - return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) - + return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # parsedebug(). # # This is the debugging enabled version of parse(). All changes made to the - # parsing engine should be made here. For the non-debugging version, - # copy this code to a method parseopt() and delete all of the sections - # enclosed in: + # parsing engine should be made here. Optimized versions of this function + # are automatically created by the ply/ygen.py script. This script cuts out + # sections enclosed in markers such as this: # # #--! DEBUG # statements @@ -279,22 +347,24 @@ def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): # # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # --! DEBUG - debug.info("PLY: PARSE DEBUG START") - # --! DEBUG + def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parsedebug-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + + #--! DEBUG + debug.info('PLY: PARSE DEBUG START') + #--! DEBUG # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer # Set up the lexer and parser objects on pslice @@ -306,16 +376,19 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): lexer.input(input) if tokenfunc is None: - # Tokenize function - get_token = lexer.token + # Tokenize function + get_token = lexer.token else: - get_token = tokenfunc + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token # Set up the state and symbol stacks - statestack = [ ] # Stack of parsing states + statestack = [] # Stack of parsing states self.statestack = statestack - symstack = [ ] # Stack of grammar symbols + symstack = [] # Stack of grammar symbols self.symstack = symstack pslice.stack = symstack # Put in the production @@ -325,52 +398,59 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): statestack.append(0) sym = YaccSymbol() - sym.type = "$end" + sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - # --! DEBUG + #--! DEBUG debug.debug('') debug.debug('State : %s', state) - # --! DEBUG + #--! DEBUG - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() + if state not in defaulted_states: if not lookahead: - lookahead = YaccSymbol() - lookahead.type = "$end" + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + #--! DEBUG + debug.debug('Defaulted state %s: Reduce using %d', state, -t) + #--! DEBUG - # --! DEBUG + #--! DEBUG debug.debug('Stack : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + #--! DEBUG if t is not None: if t > 0: # shift a symbol on the stack statestack.append(t) state = t - - # --! DEBUG - debug.debug("Action : Shift and goto state %s", t) - # --! DEBUG + + #--! DEBUG + debug.debug('Action : Shift and goto state %s', t) + #--! DEBUG symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -384,72 +464,77 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): sym.type = pname # Production name sym.value = None - # --! DEBUG + #--! DEBUG if plen: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, + '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', + goto[statestack[-1-plen]][pname]) else: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) - - # --! DEBUG + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], + goto[statestack[-1]][pname]) + + #--! DEBUG if plen: targ = symstack[-plen-1:] targ[0] = sym - # --! TRACKING + #--! TRACKING if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + #--! TRACKING # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - + try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG + del statestack[-plen:] + #--! DEBUG + debug.info('Result : %s', format_result(pslice[0])) + #--! DEBUG symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - + else: - # --! TRACKING + #--! TRACKING if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + #--! TRACKING - targ = [ sym ] + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -457,41 +542,43 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG + #--! DEBUG + debug.info('Result : %s', format_result(pslice[0])) + #--! DEBUG symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - result = getattr(n,"value",None) - # --! DEBUG - debug.info("Done : Returning %s", format_result(result)) - debug.info("PLY: PARSE DEBUG END") - # --! DEBUG + result = getattr(n, 'value', None) + #--! DEBUG + debug.info('Done : Returning %s', format_result(result)) + debug.info('PLY: PARSE DEBUG END') + #--! DEBUG return result - if t == None: + if t is None: - # --! DEBUG + #--! DEBUG debug.error('Error : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) + #--! DEBUG # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -505,20 +592,15 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead - if errtoken.type == "$end": + if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -528,14 +610,16 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -545,7 +629,7 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): # entire parse has been rolled back and we're completely hosed. The token is # discarded and we just keep going. - if len(statestack) <= 1 and lookahead.type != "$end": + if len(statestack) <= 1 and lookahead.type != '$end': lookahead = None errtoken = None state = 0 @@ -557,7 +641,7 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): # at the end of the file. nuke the top entry and generate an error token # Start nuking entries on the stack - if lookahead.type == "$end": + if lookahead.type == '$end': # Whoa. We're really hosed here. Bail out return @@ -566,48 +650,67 @@ def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): if sym.type == 'error': # Hmmm. Error is on top of stack, we'll just nuke input # symbol and continue + #--! TRACKING + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + #--! TRACKING lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() + #--! TRACKING + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + #--! TRACKING statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parsedebug-end # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # parseopt(). # - # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. - # Edit the debug version above, then copy any modifications to the method - # below while removing #--! DEBUG sections. + # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! + # This code is automatically generated by the ply/ygen.py script. Make + # changes to the parsedebug() method instead. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parseopt-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery - def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer - + # Set up the lexer and parser objects on pslice pslice.lexer = lexer pslice.parser = self @@ -617,16 +720,19 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): lexer.input(input) if tokenfunc is None: - # Tokenize function - get_token = lexer.token + # Tokenize function + get_token = lexer.token else: - get_token = tokenfunc + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token # Set up the state and symbol stacks - statestack = [ ] # Stack of parsing states + statestack = [] # Stack of parsing states self.statestack = statestack - symstack = [ ] # Stack of grammar symbols + symstack = [] # Stack of grammar symbols self.symstack = symstack pslice.stack = symstack # Put in the production @@ -639,23 +745,28 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() + + if state not in defaulted_states: if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) if t is not None: if t > 0: @@ -663,11 +774,13 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): statestack.append(t) state = t + symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -681,61 +794,64 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): sym.type = pname # Production name sym.value = None + if plen: targ = symstack[-plen-1:] targ[0] = sym - # --! TRACKING + #--! TRACKING if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) + #--! TRACKING # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - + try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) + del statestack[-plen:] symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - + else: - # --! TRACKING + #--! TRACKING if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos + #--! TRACKING - targ = [ sym ] + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -743,28 +859,32 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - return getattr(n,"value",None) + result = getattr(n, 'value', None) + return result + + if t is None: - if t == None: # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -778,20 +898,15 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -801,14 +916,16 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -839,47 +956,67 @@ def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): if sym.type == 'error': # Hmmm. Error is on top of stack, we'll just nuke input # symbol and continue + #--! TRACKING + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) + #--! TRACKING lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() + #--! TRACKING + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos + #--! TRACKING statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parseopt-end # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # parseopt_notrack(). # - # Optimized version of parseopt() with line number tracking removed. - # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove - # code in the #--! TRACKING sections + # Optimized version of parseopt() with line number tracking removed. + # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated + # by the ply/ygen.py script. Make changes to the parsedebug() method instead. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery + def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): + #--! parseopt-notrack-start + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery + # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer - + # Set up the lexer and parser objects on pslice pslice.lexer = lexer pslice.parser = self @@ -889,16 +1026,19 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non lexer.input(input) if tokenfunc is None: - # Tokenize function - get_token = lexer.token + # Tokenize function + get_token = lexer.token else: - get_token = tokenfunc + get_token = tokenfunc + + # Set the parser() token method (sometimes used in error recovery) + self.token = get_token # Set up the state and symbol stacks - statestack = [ ] # Stack of parsing states + statestack = [] # Stack of parsing states self.statestack = statestack - symstack = [ ] # Stack of grammar symbols + symstack = [] # Stack of grammar symbols self.symstack = symstack pslice.stack = symstack # Put in the production @@ -911,23 +1051,28 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() + + if state not in defaulted_states: if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) if t is not None: if t > 0: @@ -935,11 +1080,13 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non statestack.append(t) state = t + symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -953,44 +1100,50 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non sym.type = pname # Production name sym.value = None + if plen: targ = symstack[-plen-1:] targ[0] = sym + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - + try: # Call the grammar rule with our special slice object del symstack[-plen:] - del statestack[-plen:] + self.state = state p.callable(pslice) + del statestack[-plen:] symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - + else: - targ = [ sym ] + + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -998,28 +1151,32 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - return getattr(n,"value",None) + result = getattr(n, 'value', None) + return result + + if t is None: - if t == None: # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -1033,20 +1190,15 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - + self.state = state + tok = call_errorfunc(self.errorfunc, errtoken, self) if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -1056,14 +1208,16 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -1096,32 +1250,37 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non # symbol and continue lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + raise RuntimeError('yacc: internal parser error!!!\n') + + #--! parseopt-notrack-end # ----------------------------------------------------------------------------- # === Grammar Representation === # # The following functions, classes, and variables are used to represent and -# manipulate the rules that make up a grammar. +# manipulate the rules that make up a grammar. # ----------------------------------------------------------------------------- -import re - # regex matching identifiers _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') @@ -1131,7 +1290,7 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non # This class stores the raw information about a single production or grammar rule. # A grammar rule refers to a specification such as this: # -# expr : expr PLUS term +# expr : expr PLUS term # # Here are the basic attributes defined on all productions # @@ -1151,7 +1310,7 @@ def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=Non class Production(object): reduced = 0 - def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): + def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): self.name = name self.prod = tuple(prod) self.number = number @@ -1162,11 +1321,11 @@ def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line self.prec = precedence # Internal settings used during table construction - + self.len = len(self.prod) # Length of the production # Create a list of unique production symbols used in the production - self.usyms = [ ] + self.usyms = [] for s in self.prod: if s not in self.usyms: self.usyms.append(s) @@ -1177,15 +1336,15 @@ def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line # Create a string representation if self.prod: - self.str = "%s -> %s" % (self.name," ".join(self.prod)) + self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) else: - self.str = "%s -> " % self.name + self.str = '%s -> ' % self.name def __str__(self): return self.str def __repr__(self): - return "Production("+str(self)+")" + return 'Production(' + str(self) + ')' def __len__(self): return len(self.prod) @@ -1193,28 +1352,27 @@ def __len__(self): def __nonzero__(self): return 1 - def __getitem__(self,index): + def __getitem__(self, index): return self.prod[index] - - # Return the nth lr_item from the production (or None if at the end) - def lr_item(self,n): - if n > len(self.prod): return None - p = LRItem(self,n) - # Precompute the list of productions immediately following. Hack. Remove later + # Return the nth lr_item from the production (or None if at the end) + def lr_item(self, n): + if n > len(self.prod): + return None + p = LRItem(self, n) + # Precompute the list of productions immediately following. try: - p.lr_after = Prodnames[p.prod[n+1]] - except (IndexError,KeyError): + p.lr_after = self.Prodnames[p.prod[n+1]] + except (IndexError, KeyError): p.lr_after = [] try: p.lr_before = p.prod[n-1] except IndexError: p.lr_before = None - return p - + # Bind the production function name to a callable - def bind(self,pdict): + def bind(self, pdict): if self.func: self.callable = pdict[self.func] @@ -1223,7 +1381,7 @@ def bind(self,pdict): # actually used by the LR parsing engine, plus some additional # debugging information. class MiniProduction(object): - def __init__(self,str,name,len,func,file,line): + def __init__(self, str, name, len, func, file, line): self.name = name self.len = len self.func = func @@ -1231,13 +1389,15 @@ def __init__(self,str,name,len,func,file,line): self.file = file self.line = line self.str = str + def __str__(self): return self.str + def __repr__(self): - return "MiniProduction(%s)" % self.str + return 'MiniProduction(%s)' % self.str # Bind the production function name to a callable - def bind(self,pdict): + def bind(self, pdict): if self.func: self.callable = pdict[self.func] @@ -1246,9 +1406,9 @@ def bind(self,pdict): # class LRItem # # This class represents a specific stage of parsing a production rule. For -# example: +# example: # -# expr : expr . PLUS term +# expr : expr . PLUS term # # In the above, the "." represents the current location of the parse. Here # basic attributes: @@ -1267,26 +1427,26 @@ def bind(self,pdict): # ----------------------------------------------------------------------------- class LRItem(object): - def __init__(self,p,n): + def __init__(self, p, n): self.name = p.name self.prod = list(p.prod) self.number = p.number self.lr_index = n - self.lookaheads = { } - self.prod.insert(n,".") + self.lookaheads = {} + self.prod.insert(n, '.') self.prod = tuple(self.prod) self.len = len(self.prod) self.usyms = p.usyms def __str__(self): if self.prod: - s = "%s -> %s" % (self.name," ".join(self.prod)) + s = '%s -> %s' % (self.name, ' '.join(self.prod)) else: - s = "%s -> " % self.name + s = '%s -> ' % self.name return s def __repr__(self): - return "LRItem("+str(self)+")" + return 'LRItem(' + str(self) + ')' # ----------------------------------------------------------------------------- # rightmost_terminal() @@ -1309,21 +1469,22 @@ def rightmost_terminal(symbols, terminals): # This data is used for critical parts of the table generation process later. # ----------------------------------------------------------------------------- -class GrammarError(YaccError): pass +class GrammarError(YaccError): + pass class Grammar(object): - def __init__(self,terminals): + def __init__(self, terminals): self.Productions = [None] # A list of all of the productions. The first # entry is always reserved for the purpose of # building an augmented grammar - self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all + self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all # productions of that nonterminal. - self.Prodmap = { } # A dictionary that is only used to detect duplicate + self.Prodmap = {} # A dictionary that is only used to detect duplicate # productions. - self.Terminals = { } # A dictionary mapping the names of terminal symbols to a + self.Terminals = {} # A dictionary mapping the names of terminal symbols to a # list of the rules where they are used. for term in terminals: @@ -1331,17 +1492,17 @@ def __init__(self,terminals): self.Terminals['error'] = [] - self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list + self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list # of rule numbers where they are used. - self.First = { } # A dictionary of precomputed FIRST(x) symbols + self.First = {} # A dictionary of precomputed FIRST(x) symbols - self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols + self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols - self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the + self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the # form ('right',level) or ('nonassoc', level) or ('left',level) - self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. + self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. # This is only used to provide error checking and to generate # a warning about unused precedence rules. @@ -1351,7 +1512,7 @@ def __init__(self,terminals): def __len__(self): return len(self.Productions) - def __getitem__(self,index): + def __getitem__(self, index): return self.Productions[index] # ----------------------------------------------------------------------------- @@ -1362,14 +1523,14 @@ def __getitem__(self,index): # # ----------------------------------------------------------------------------- - def set_precedence(self,term,assoc,level): - assert self.Productions == [None],"Must call set_precedence() before add_production()" + def set_precedence(self, term, assoc, level): + assert self.Productions == [None], 'Must call set_precedence() before add_production()' if term in self.Precedence: - raise GrammarError("Precedence already specified for terminal '%s'" % term) - if assoc not in ['left','right','nonassoc']: + raise GrammarError('Precedence already specified for terminal %r' % term) + if assoc not in ['left', 'right', 'nonassoc']: raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") - self.Precedence[term] = (assoc,level) - + self.Precedence[term] = (assoc, level) + # ----------------------------------------------------------------------------- # add_production() # @@ -1387,72 +1548,74 @@ def set_precedence(self,term,assoc,level): # are valid and that %prec is used correctly. # ----------------------------------------------------------------------------- - def add_production(self,prodname,syms,func=None,file='',line=0): + def add_production(self, prodname, syms, func=None, file='', line=0): if prodname in self.Terminals: - raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) if prodname == 'error': - raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) if not _is_identifier.match(prodname): - raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) - # Look for literal tokens - for n,s in enumerate(syms): + # Look for literal tokens + for n, s in enumerate(syms): if s[0] in "'\"": - try: - c = eval(s) - if (len(c) > 1): - raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) - if not c in self.Terminals: - self.Terminals[c] = [] - syms[n] = c - continue - except SyntaxError: - pass + try: + c = eval(s) + if (len(c) > 1): + raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % + (file, line, s, prodname)) + if c not in self.Terminals: + self.Terminals[c] = [] + syms[n] = c + continue + except SyntaxError: + pass if not _is_identifier.match(s) and s != '%prec': - raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) - + raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) + # Determine the precedence level if '%prec' in syms: if syms[-1] == '%prec': - raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) + raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) if syms[-2] != '%prec': - raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) + raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % + (file, line)) precname = syms[-1] - prodprec = self.Precedence.get(precname,None) + prodprec = self.Precedence.get(precname) if not prodprec: - raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) + raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) else: - self.UsedPrecedence[precname] = 1 + self.UsedPrecedence.add(precname) del syms[-2:] # Drop %prec from the rule else: # If no %prec, precedence is determined by the rightmost terminal symbol - precname = rightmost_terminal(syms,self.Terminals) - prodprec = self.Precedence.get(precname,('right',0)) - + precname = rightmost_terminal(syms, self.Terminals) + prodprec = self.Precedence.get(precname, ('right', 0)) + # See if the rule is already in the rulemap - map = "%s -> %s" % (prodname,syms) + map = '%s -> %s' % (prodname, syms) if map in self.Prodmap: m = self.Prodmap[map] - raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + - "Previous definition at %s:%d" % (m.file, m.line)) + raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + + 'Previous definition at %s:%d' % (m.file, m.line)) # From this point on, everything is valid. Create a new Production instance pnumber = len(self.Productions) - if not prodname in self.Nonterminals: - self.Nonterminals[prodname] = [ ] + if prodname not in self.Nonterminals: + self.Nonterminals[prodname] = [] # Add the production number to Terminals and Nonterminals for t in syms: if t in self.Terminals: self.Terminals[t].append(pnumber) else: - if not t in self.Nonterminals: - self.Nonterminals[t] = [ ] + if t not in self.Nonterminals: + self.Nonterminals[t] = [] self.Nonterminals[t].append(pnumber) # Create a production and add it to the list of productions - p = Production(pnumber,prodname,syms,prodprec,func,file,line) + p = Production(pnumber, prodname, syms, prodprec, func, file, line) self.Productions.append(p) self.Prodmap[map] = p @@ -1460,22 +1623,21 @@ def add_production(self,prodname,syms,func=None,file='',line=0): try: self.Prodnames[prodname].append(p) except KeyError: - self.Prodnames[prodname] = [ p ] - return 0 + self.Prodnames[prodname] = [p] # ----------------------------------------------------------------------------- # set_start() # - # Sets the starting symbol and creates the augmented grammar. Production + # Sets the starting symbol and creates the augmented grammar. Production # rule 0 is S' -> start where start is the start symbol. # ----------------------------------------------------------------------------- - def set_start(self,start=None): + def set_start(self, start=None): if not start: start = self.Productions[1].name if start not in self.Nonterminals: - raise GrammarError("start symbol %s undefined" % start) - self.Productions[0] = Production(0,"S'",[start]) + raise GrammarError('start symbol %s undefined' % start) + self.Productions[0] = Production(0, "S'", [start]) self.Nonterminals[start].append(0) self.Start = start @@ -1487,26 +1649,20 @@ def set_start(self,start=None): # ----------------------------------------------------------------------------- def find_unreachable(self): - + # Mark all symbols that are reachable from a symbol s def mark_reachable_from(s): - if reachable[s]: - # We've already reached symbol s. + if s in reachable: return - reachable[s] = 1 - for p in self.Prodnames.get(s,[]): + reachable.add(s) + for p in self.Prodnames.get(s, []): for r in p.prod: mark_reachable_from(r) - reachable = { } - for s in list(self.Terminals) + list(self.Nonterminals): - reachable[s] = 0 - - mark_reachable_from( self.Productions[0].prod[0] ) + reachable = set() + mark_reachable_from(self.Productions[0].prod[0]) + return [s for s in self.Nonterminals if s not in reachable] - return [s for s in list(self.Nonterminals) - if not reachable[s]] - # ----------------------------------------------------------------------------- # infinite_cycles() # @@ -1520,20 +1676,20 @@ def infinite_cycles(self): # Terminals: for t in self.Terminals: - terminates[t] = 1 + terminates[t] = True - terminates['$end'] = 1 + terminates['$end'] = True # Nonterminals: # Initialize to false: for n in self.Nonterminals: - terminates[n] = 0 + terminates[n] = False # Then propagate termination until no change: - while 1: - some_change = 0 - for (n,pl) in self.Prodnames.items(): + while True: + some_change = False + for (n, pl) in self.Prodnames.items(): # Nonterminal n terminates iff any of its productions terminates. for p in pl: # Production p terminates iff all of its rhs symbols terminate. @@ -1541,19 +1697,19 @@ def infinite_cycles(self): if not terminates[s]: # The symbol s does not terminate, # so production p does not terminate. - p_terminates = 0 + p_terminates = False break else: # didn't break from the loop, # so every symbol s terminates # so production p terminates. - p_terminates = 1 + p_terminates = True if p_terminates: # symbol n terminates! if not terminates[n]: - terminates[n] = 1 - some_change = 1 + terminates[n] = True + some_change = True # Don't need to consider any more productions for this n. break @@ -1561,9 +1717,9 @@ def infinite_cycles(self): break infinite = [] - for (s,term) in terminates.items(): + for (s, term) in terminates.items(): if not term: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': + if s not in self.Prodnames and s not in self.Terminals and s != 'error': # s is used-but-not-defined, and we've already warned of that, # so it would be overkill to say that it's also non-terminating. pass @@ -1572,22 +1728,22 @@ def infinite_cycles(self): return infinite - # ----------------------------------------------------------------------------- # undefined_symbols() # # Find all symbols that were used the grammar, but not defined as tokens or # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol - # and prod is the production where the symbol was used. + # and prod is the production where the symbol was used. # ----------------------------------------------------------------------------- def undefined_symbols(self): result = [] for p in self.Productions: - if not p: continue + if not p: + continue for s in p.prod: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': - result.append((s,p)) + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + result.append((s, p)) return result # ----------------------------------------------------------------------------- @@ -1598,7 +1754,7 @@ def undefined_symbols(self): # ----------------------------------------------------------------------------- def unused_terminals(self): unused_tok = [] - for s,v in self.Terminals.items(): + for s, v in self.Terminals.items(): if s != 'error' and not v: unused_tok.append(s) @@ -1613,7 +1769,7 @@ def unused_terminals(self): def unused_rules(self): unused_prod = [] - for s,v in self.Nonterminals.items(): + for s, v in self.Nonterminals.items(): if not v: p = self.Prodnames[s][0] unused_prod.append(p) @@ -1625,15 +1781,15 @@ def unused_rules(self): # Returns a list of tuples (term,precedence) corresponding to precedence # rules that were never used by the grammar. term is the name of the terminal # on which precedence was applied and precedence is a string such as 'left' or - # 'right' corresponding to the type of precedence. + # 'right' corresponding to the type of precedence. # ----------------------------------------------------------------------------- def unused_precedence(self): unused = [] for termname in self.Precedence: if not (termname in self.Terminals or termname in self.UsedPrecedence): - unused.append((termname,self.Precedence[termname][0])) - + unused.append((termname, self.Precedence[termname][0])) + return unused # ------------------------------------------------------------------------- @@ -1644,19 +1800,20 @@ def unused_precedence(self): # During execution of compute_first1, the result may be incomplete. # Afterward (e.g., when called from compute_follow()), it will be complete. # ------------------------------------------------------------------------- - def _first(self,beta): + def _first(self, beta): # We are computing First(x1,x2,x3,...,xn) - result = [ ] + result = [] for x in beta: - x_produces_empty = 0 + x_produces_empty = False # Add all the non- symbols of First[x] to the result. for f in self.First[x]: if f == '': - x_produces_empty = 1 + x_produces_empty = True else: - if f not in result: result.append(f) + if f not in result: + result.append(f) if x_produces_empty: # We have to consider the next x in beta, @@ -1695,17 +1852,17 @@ def compute_first(self): self.First[n] = [] # Then propagate symbols until no change: - while 1: - some_change = 0 + while True: + some_change = False for n in self.Nonterminals: for p in self.Prodnames[n]: for f in self._first(p.prod): if f not in self.First[n]: - self.First[n].append( f ) - some_change = 1 + self.First[n].append(f) + some_change = True if not some_change: break - + return self.First # --------------------------------------------------------------------- @@ -1715,7 +1872,7 @@ def compute_first(self): # follow set is the set of all symbols that might follow a given # non-terminal. See the Dragon book, 2nd Ed. p. 189. # --------------------------------------------------------------------- - def compute_follow(self,start=None): + def compute_follow(self, start=None): # If already computed, return the result if self.Follow: return self.Follow @@ -1726,36 +1883,36 @@ def compute_follow(self,start=None): # Add '$end' to the follow list of the start symbol for k in self.Nonterminals: - self.Follow[k] = [ ] + self.Follow[k] = [] if not start: start = self.Productions[1].name - self.Follow[start] = [ '$end' ] + self.Follow[start] = ['$end'] - while 1: - didadd = 0 + while True: + didadd = False for p in self.Productions[1:]: # Here is the production set - for i in range(len(p.prod)): - B = p.prod[i] + for i, B in enumerate(p.prod): if B in self.Nonterminals: # Okay. We got a non-terminal in a production fst = self._first(p.prod[i+1:]) - hasempty = 0 + hasempty = False for f in fst: if f != '' and f not in self.Follow[B]: self.Follow[B].append(f) - didadd = 1 + didadd = True if f == '': - hasempty = 1 + hasempty = True if hasempty or i == (len(p.prod)-1): # Add elements of follow(a) to follow(b) for f in self.Follow[p.name]: if f not in self.Follow[B]: self.Follow[B].append(f) - didadd = 1 - if not didadd: break + didadd = True + if not didadd: + break return self.Follow @@ -1779,15 +1936,15 @@ def build_lritems(self): lastlri = p i = 0 lr_items = [] - while 1: + while True: if i > len(p): lri = None else: - lri = LRItem(p,i) + lri = LRItem(p, i) # Precompute the list of productions immediately following try: lri.lr_after = self.Prodnames[lri.prod[i+1]] - except (IndexError,KeyError): + except (IndexError, KeyError): lri.lr_after = [] try: lri.lr_before = lri.prod[i-1] @@ -1795,7 +1952,8 @@ def build_lritems(self): lri.lr_before = None lastlri.lr_next = lri - if not lri: break + if not lri: + break lr_items.append(lri) lastlri = lri i += 1 @@ -1804,12 +1962,13 @@ def build_lritems(self): # ----------------------------------------------------------------------------- # == Class LRTable == # -# This basic class represents a basic table of LR parsing information. +# This basic class represents a basic table of LR parsing information. # Methods for generating the tables are not defined here. They are defined # in the derived class LRGeneratedTable. # ----------------------------------------------------------------------------- -class VersionError(YaccError): pass +class VersionError(YaccError): + pass class LRTable(object): def __init__(self): @@ -1818,19 +1977,15 @@ def __init__(self): self.lr_productions = None self.lr_method = None - def read_table(self,module): - if isinstance(module,types.ModuleType): + def read_table(self, module): + if isinstance(module, types.ModuleType): parsetab = module else: - if sys.version_info[0] < 3: - exec("import %s as parsetab" % module) - else: - env = { } - exec("import %s as parsetab" % module, env, env) - parsetab = env['parsetab'] + exec('import %s' % module) + parsetab = sys.modules[module] if parsetab._tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") + raise VersionError('yacc table file version is out of date') self.lr_action = parsetab._lr_action self.lr_goto = parsetab._lr_goto @@ -1842,17 +1997,20 @@ def read_table(self,module): self.lr_method = parsetab._lr_method return parsetab._lr_signature - def read_pickle(self,filename): + def read_pickle(self, filename): try: import cPickle as pickle except ImportError: import pickle - in_f = open(filename,"rb") + if not os.path.exists(filename): + raise ImportError + + in_f = open(filename, 'rb') tabversion = pickle.load(in_f) if tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") + raise VersionError('yacc table file version is out of date') self.lr_method = pickle.load(in_f) signature = pickle.load(in_f) self.lr_action = pickle.load(in_f) @@ -1867,14 +2025,15 @@ def read_pickle(self,filename): return signature # Bind all production function names to callable objects in pdict - def bind_callables(self,pdict): + def bind_callables(self, pdict): for p in self.lr_productions: p.bind(pdict) - + + # ----------------------------------------------------------------------------- # === LR Generator === # -# The following classes and functions are used to generate LR parsing tables on +# The following classes and functions are used to generate LR parsing tables on # a grammar. # ----------------------------------------------------------------------------- @@ -1895,17 +2054,18 @@ def bind_callables(self,pdict): # FP - Set-valued function # ------------------------------------------------------------------------------ -def digraph(X,R,FP): - N = { } +def digraph(X, R, FP): + N = {} for x in X: - N[x] = 0 + N[x] = 0 stack = [] - F = { } + F = {} for x in X: - if N[x] == 0: traverse(x,N,stack,F,X,R,FP) + if N[x] == 0: + traverse(x, N, stack, F, X, R, FP) return F -def traverse(x,N,stack,F,X,R,FP): +def traverse(x, N, stack, F, X, R, FP): stack.append(x) d = len(stack) N[x] = d @@ -1914,20 +2074,22 @@ def traverse(x,N,stack,F,X,R,FP): rel = R(x) # Get y's related to x for y in rel: if N[y] == 0: - traverse(y,N,stack,F,X,R,FP) - N[x] = min(N[x],N[y]) - for a in F.get(y,[]): - if a not in F[x]: F[x].append(a) + traverse(y, N, stack, F, X, R, FP) + N[x] = min(N[x], N[y]) + for a in F.get(y, []): + if a not in F[x]: + F[x].append(a) if N[x] == d: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + while element != x: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() -class LALRError(YaccError): pass +class LALRError(YaccError): + pass # ----------------------------------------------------------------------------- # == LRGeneratedTable == @@ -1937,9 +2099,9 @@ class LALRError(YaccError): pass # ----------------------------------------------------------------------------- class LRGeneratedTable(LRTable): - def __init__(self,grammar,method='LALR',log=None): - if method not in ['SLR','LALR']: - raise LALRError("Unsupported method %s" % method) + def __init__(self, grammar, method='LALR', log=None): + if method not in ['SLR', 'LALR']: + raise LALRError('Unsupported method %s' % method) self.grammar = grammar self.lr_method = method @@ -1974,21 +2136,22 @@ def __init__(self,grammar,method='LALR',log=None): # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. - def lr0_closure(self,I): + def lr0_closure(self, I): self._add_count += 1 # Add everything in I to J J = I[:] - didadd = 1 + didadd = True while didadd: - didadd = 0 + didadd = False for j in J: for x in j.lr_after: - if getattr(x,"lr0_added",0) == self._add_count: continue + if getattr(x, 'lr0_added', 0) == self._add_count: + continue # Add B --> .G to J J.append(x.lr_next) x.lr0_added = self._add_count - didadd = 1 + didadd = True return J @@ -1999,43 +2162,43 @@ def lr0_closure(self,I): # objects). With uniqueness, we can later do fast set comparisons using # id(obj) instead of element-wise comparison. - def lr0_goto(self,I,x): + def lr0_goto(self, I, x): # First we look for a previously cached entry - g = self.lr_goto_cache.get((id(I),x),None) - if g: return g + g = self.lr_goto_cache.get((id(I), x)) + if g: + return g # Now we generate the goto set in a way that guarantees uniqueness # of the result - s = self.lr_goto_cache.get(x,None) + s = self.lr_goto_cache.get(x) if not s: - s = { } + s = {} self.lr_goto_cache[x] = s - gs = [ ] + gs = [] for p in I: n = p.lr_next if n and n.lr_before == x: - s1 = s.get(id(n),None) + s1 = s.get(id(n)) if not s1: - s1 = { } + s1 = {} s[id(n)] = s1 gs.append(n) s = s1 - g = s.get('$end',None) + g = s.get('$end') if not g: if gs: g = self.lr0_closure(gs) s['$end'] = g else: s['$end'] = gs - self.lr_goto_cache[(id(I),x)] = g + self.lr_goto_cache[(id(I), x)] = g return g # Compute the LR(0) sets of item function def lr0_items(self): - - C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] + C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] i = 0 for I in C: self.lr0_cidhash[id(I)] = i @@ -2048,15 +2211,15 @@ def lr0_items(self): i += 1 # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = { } + asyms = {} for ii in I: for s in ii.usyms: asyms[s] = None for x in asyms: - g = self.lr0_goto(I,x) - if not g: continue - if id(g) in self.lr0_cidhash: continue + g = self.lr0_goto(I, x) + if not g or id(g) in self.lr0_cidhash: + continue self.lr0_cidhash[id(g)] = len(C) C.append(g) @@ -2091,19 +2254,21 @@ def lr0_items(self): # ----------------------------------------------------------------------------- def compute_nullable_nonterminals(self): - nullable = {} + nullable = set() num_nullable = 0 - while 1: - for p in self.grammar.Productions[1:]: - if p.len == 0: - nullable[p.name] = 1 + while True: + for p in self.grammar.Productions[1:]: + if p.len == 0: + nullable.add(p.name) continue - for t in p.prod: - if not t in nullable: break - else: - nullable[p.name] = 1 - if len(nullable) == num_nullable: break - num_nullable = len(nullable) + for t in p.prod: + if t not in nullable: + break + else: + nullable.add(p.name) + if len(nullable) == num_nullable: + break + num_nullable = len(nullable) return nullable # ----------------------------------------------------------------------------- @@ -2117,16 +2282,16 @@ def compute_nullable_nonterminals(self): # The input C is the set of LR(0) items. # ----------------------------------------------------------------------------- - def find_nonterminal_transitions(self,C): - trans = [] - for state in range(len(C)): - for p in C[state]: - if p.lr_index < p.len - 1: - t = (state,p.prod[p.lr_index+1]) - if t[1] in self.grammar.Nonterminals: - if t not in trans: trans.append(t) - state = state + 1 - return trans + def find_nonterminal_transitions(self, C): + trans = [] + for stateno, state in enumerate(C): + for p in state: + if p.lr_index < p.len - 1: + t = (stateno, p.prod[p.lr_index+1]) + if t[1] in self.grammar.Nonterminals: + if t not in trans: + trans.append(t) + return trans # ----------------------------------------------------------------------------- # dr_relation() @@ -2137,21 +2302,21 @@ def find_nonterminal_transitions(self,C): # Returns a list of terminals. # ----------------------------------------------------------------------------- - def dr_relation(self,C,trans,nullable): - dr_set = { } - state,N = trans + def dr_relation(self, C, trans, nullable): + state, N = trans terms = [] - g = self.lr0_goto(C[state],N) + g = self.lr0_goto(C[state], N) for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] - if a in self.grammar.Terminals: - if a not in terms: terms.append(a) + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index+1] + if a in self.grammar.Terminals: + if a not in terms: + terms.append(a) # This extra bit is to handle the start state if state == 0 and N == self.grammar.Productions[0].prod[0]: - terms.append('$end') + terms.append('$end') return terms @@ -2161,18 +2326,18 @@ def dr_relation(self,C,trans,nullable): # Computes the READS() relation (p,A) READS (t,C). # ----------------------------------------------------------------------------- - def reads_relation(self,C, trans, empty): + def reads_relation(self, C, trans, empty): # Look for empty transitions rel = [] state, N = trans - g = self.lr0_goto(C[state],N) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(C[state], N) + j = self.lr0_cidhash.get(id(g), -1) for p in g: if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in empty: - rel.append((j,a)) + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j, a)) return rel @@ -2204,8 +2369,7 @@ def reads_relation(self,C, trans, empty): # # ----------------------------------------------------------------------------- - def compute_lookback_includes(self,C,trans,nullable): - + def compute_lookback_includes(self, C, trans, nullable): lookdict = {} # Dictionary of lookback relations includedict = {} # Dictionary of include relations @@ -2215,11 +2379,12 @@ def compute_lookback_includes(self,C,trans,nullable): dtrans[t] = 1 # Loop over all transitions and compute lookbacks and includes - for state,N in trans: + for state, N in trans: lookb = [] includes = [] for p in C[state]: - if p.name != N: continue + if p.name != N: + continue # Okay, we have a name match. We now follow the production all the way # through the state machine until we get the . on the right hand side @@ -2227,44 +2392,50 @@ def compute_lookback_includes(self,C,trans,nullable): lr_index = p.lr_index j = state while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] - - # Check to see if this symbol and state are a non-terminal transition - if (j,t) in dtrans: - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty - - li = lr_index + 1 - while li < p.len: - if p.prod[li] in self.grammar.Terminals: break # No forget it - if not p.prod[li] in nullable: break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j,t)) - - g = self.lr0_goto(C[j],t) # Go to next set - j = self.lr0_cidhash.get(id(g),-1) # Go to next state + lr_index = lr_index + 1 + t = p.prod[lr_index] + + # Check to see if this symbol and state are a non-terminal transition + if (j, t) in dtrans: + # Yes. Okay, there is some chance that this is an includes relation + # the only way to know for certain is whether the rest of the + # production derives empty + + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.Terminals: + break # No forget it + if p.prod[li] not in nullable: + break + li = li + 1 + else: + # Appears to be a relation between (j,t) and (state,N) + includes.append((j, t)) + + g = self.lr0_goto(C[j], t) # Go to next set + j = self.lr0_cidhash.get(id(g), -1) # Go to next state # When we get here, j is the final state, now we have to locate the production for r in C[j]: - if r.name != p.name: continue - if r.len != p.len: continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: break - i = i + 1 - else: - lookb.append((j,r)) + if r.name != p.name: + continue + if r.len != p.len: + continue + i = 0 + # This look is comparing a production ". A B C" with "A B C ." + while i < r.lr_index: + if r.prod[i] != p.prod[i+1]: + break + i = i + 1 + else: + lookb.append((j, r)) for i in includes: - if not i in includedict: includedict[i] = [] - includedict[i].append((state,N)) - lookdict[(state,N)] = lookb + if i not in includedict: + includedict[i] = [] + includedict[i].append((state, N)) + lookdict[(state, N)] = lookb - return lookdict,includedict + return lookdict, includedict # ----------------------------------------------------------------------------- # compute_read_sets() @@ -2278,10 +2449,10 @@ def compute_lookback_includes(self,C,trans,nullable): # Returns a set containing the read sets # ----------------------------------------------------------------------------- - def compute_read_sets(self,C, ntrans, nullable): - FP = lambda x: self.dr_relation(C,x,nullable) - R = lambda x: self.reads_relation(C,x,nullable) - F = digraph(ntrans,R,FP) + def compute_read_sets(self, C, ntrans, nullable): + FP = lambda x: self.dr_relation(C, x, nullable) + R = lambda x: self.reads_relation(C, x, nullable) + F = digraph(ntrans, R, FP) return F # ----------------------------------------------------------------------------- @@ -2300,11 +2471,11 @@ def compute_read_sets(self,C, ntrans, nullable): # Returns a set containing the follow sets # ----------------------------------------------------------------------------- - def compute_follow_sets(self,ntrans,readsets,inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x,[]) - F = digraph(ntrans,R,FP) - return F + def compute_follow_sets(self, ntrans, readsets, inclsets): + FP = lambda x: readsets[x] + R = lambda x: inclsets.get(x, []) + F = digraph(ntrans, R, FP) + return F # ----------------------------------------------------------------------------- # add_lookaheads() @@ -2318,15 +2489,16 @@ def compute_follow_sets(self,ntrans,readsets,inclsets): # in the lookbacks set # ----------------------------------------------------------------------------- - def add_lookaheads(self,lookbacks,followset): - for trans,lb in lookbacks.items(): + def add_lookaheads(self, lookbacks, followset): + for trans, lb in lookbacks.items(): # Loop over productions in lookback - for state,p in lb: - if not state in p.lookaheads: - p.lookaheads[state] = [] - f = followset.get(trans,[]) - for a in f: - if a not in p.lookaheads[state]: p.lookaheads[state].append(a) + for state, p in lb: + if state not in p.lookaheads: + p.lookaheads[state] = [] + f = followset.get(trans, []) + for a in f: + if a not in p.lookaheads[state]: + p.lookaheads[state].append(a) # ----------------------------------------------------------------------------- # add_lalr_lookaheads() @@ -2335,7 +2507,7 @@ def add_lookaheads(self,lookbacks,followset): # with LALR parsing # ----------------------------------------------------------------------------- - def add_lalr_lookaheads(self,C): + def add_lalr_lookaheads(self, C): # Determine all of the nullable nonterminals nullable = self.compute_nullable_nonterminals() @@ -2343,16 +2515,16 @@ def add_lalr_lookaheads(self,C): trans = self.find_nonterminal_transitions(C) # Compute read sets - readsets = self.compute_read_sets(C,trans,nullable) + readsets = self.compute_read_sets(C, trans, nullable) # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(C,trans,nullable) + lookd, included = self.compute_lookback_includes(C, trans, nullable) # Compute LALR FOLLOW sets - followsets = self.compute_follow_sets(trans,readsets,included) + followsets = self.compute_follow_sets(trans, readsets, included) # Add all of the lookaheads - self.add_lookaheads(lookd,followsets) + self.add_lookaheads(lookd, followsets) # ----------------------------------------------------------------------------- # lr_parse_table() @@ -2366,9 +2538,9 @@ def lr_parse_table(self): action = self.lr_action # Action array log = self.log # Logger for output - actionp = { } # Action production array (temporary) - - log.info("Parsing method: %s", self.lr_method) + actionp = {} # Action production array (temporary) + + log.info('Parsing method: %s', self.lr_method) # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items # This determines the number of states @@ -2382,23 +2554,23 @@ def lr_parse_table(self): st = 0 for I in C: # Loop over each production in I - actlist = [ ] # List of actions - st_action = { } - st_actionp = { } - st_goto = { } - log.info("") - log.info("state %d", st) - log.info("") + actlist = [] # List of actions + st_action = {} + st_actionp = {} + st_goto = {} + log.info('') + log.info('state %d', st) + log.info('') for p in I: - log.info(" (%d) %s", p.number, str(p)) - log.info("") + log.info(' (%d) %s', p.number, p) + log.info('') for p in I: if p.len == p.lr_index + 1: if p.name == "S'": # Start symbol. Accept! - st_action["$end"] = 0 - st_actionp["$end"] = p + st_action['$end'] = 0 + st_actionp['$end'] = p else: # We are at the end of a production. Reduce! if self.lr_method == 'LALR': @@ -2406,31 +2578,36 @@ def lr_parse_table(self): else: laheads = self.grammar.Follow[p.name] for a in laheads: - actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) - r = st_action.get(a,None) + actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) + r = st_action.get(a) if r is not None: # Whoa. Have a shift/reduce or reduce/reduce conflict if r > 0: # Need to decide on shift or reduce here # By default we favor shifting. Need to add # some precedence rules here. - sprec,slevel = Productions[st_actionp[a].number].prec - rprec,rlevel = Precedence.get(a,('right',0)) + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from rule being reduced (p) + rprec, rlevel = Productions[p.number].prec + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): # We really need to reduce here. st_action[a] = -p.number st_actionp[a] = p if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) Productions[p.number].reduced += 1 elif (slevel == rlevel) and (rprec == 'nonassoc'): st_action[a] = None else: # Hmmm. Guess we'll keep the shift if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) elif r < 0: # Reduce/reduce conflict. In this case, we favor the rule # that was defined first in the grammar file @@ -2439,15 +2616,16 @@ def lr_parse_table(self): if oldp.line > pp.line: st_action[a] = -p.number st_actionp[a] = p - chosenp,rejectp = pp,oldp + chosenp, rejectp = pp, oldp Productions[p.number].reduced += 1 Productions[oldp.number].reduced -= 1 else: - chosenp,rejectp = oldp,pp - self.rr_conflicts.append((st,chosenp,rejectp)) - log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) + chosenp, rejectp = oldp, pp + self.rr_conflicts.append((st, chosenp, rejectp)) + log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', + a, st_actionp[a].number, st_actionp[a]) else: - raise LALRError("Unknown conflict in state %d" % st) + raise LALRError('Unknown conflict in state %d' % st) else: st_action[a] = -p.number st_actionp[a] = p @@ -2456,205 +2634,211 @@ def lr_parse_table(self): i = p.lr_index a = p.prod[i+1] # Get symbol right after the "." if a in self.grammar.Terminals: - g = self.lr0_goto(I,a) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(I, a) + j = self.lr0_cidhash.get(id(g), -1) if j >= 0: # We are in a shift state - actlist.append((a,p,"shift and go to state %d" % j)) - r = st_action.get(a,None) + actlist.append((a, p, 'shift and go to state %d' % j)) + r = st_action.get(a) if r is not None: # Whoa have a shift/reduce or shift/shift conflict if r > 0: if r != j: - raise LALRError("Shift/shift conflict in state %d" % st) + raise LALRError('Shift/shift conflict in state %d' % st) elif r < 0: # Do a precedence check. # - if precedence of reduce rule is higher, we reduce. # - if precedence of reduce is same and left assoc, we reduce. # - otherwise we shift - rprec,rlevel = Productions[st_actionp[a].number].prec - sprec,slevel = Precedence.get(a,('right',0)) + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from the rule that could have been reduced + rprec, rlevel = Productions[st_actionp[a].number].prec + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): # We decide to shift here... highest precedence to shift Productions[st_actionp[a].number].reduced -= 1 st_action[a] = j st_actionp[a] = p if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) elif (slevel == rlevel) and (rprec == 'nonassoc'): st_action[a] = None else: # Hmmm. Guess we'll keep the reduce if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) else: - raise LALRError("Unknown conflict in state %d" % st) + raise LALRError('Unknown conflict in state %d' % st) else: st_action[a] = j st_actionp[a] = p # Print the actions associated with each terminal - _actprint = { } - for a,p,m in actlist: + _actprint = {} + for a, p, m in actlist: if a in st_action: if p is st_actionp[a]: - log.info(" %-15s %s",a,m) - _actprint[(a,m)] = 1 - log.info("") + log.info(' %-15s %s', a, m) + _actprint[(a, m)] = 1 + log.info('') # Print the actions that were not used. (debugging) not_used = 0 - for a,p,m in actlist: + for a, p, m in actlist: if a in st_action: if p is not st_actionp[a]: - if not (a,m) in _actprint: - log.debug(" ! %-15s [ %s ]",a,m) + if not (a, m) in _actprint: + log.debug(' ! %-15s [ %s ]', a, m) not_used = 1 - _actprint[(a,m)] = 1 + _actprint[(a, m)] = 1 if not_used: - log.debug("") + log.debug('') # Construct the goto table for this state - nkeys = { } + nkeys = {} for ii in I: for s in ii.usyms: if s in self.grammar.Nonterminals: nkeys[s] = None for n in nkeys: - g = self.lr0_goto(I,n) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(I, n) + j = self.lr0_cidhash.get(id(g), -1) if j >= 0: st_goto[n] = j - log.info(" %-30s shift and go to state %d",n,j) + log.info(' %-30s shift and go to state %d', n, j) action[st] = st_action actionp[st] = st_actionp goto[st] = st_goto st += 1 - # ----------------------------------------------------------------------------- # write() # # This function writes the LR parsing tables to a file # ----------------------------------------------------------------------------- - def write_table(self,modulename,outputdir='',signature=""): - basemodulename = modulename.split(".")[-1] - filename = os.path.join(outputdir,basemodulename) + ".py" + def write_table(self, tabmodule, outputdir='', signature=''): + if isinstance(tabmodule, types.ModuleType): + raise IOError("Won't overwrite existing tabmodule") + + basemodulename = tabmodule.split('.')[-1] + filename = os.path.join(outputdir, basemodulename) + '.py' try: - f = open(filename,"w") + f = open(filename, 'w') - f.write(""" + f.write(''' # %s # This file is automatically generated. Do not edit. +# pylint: disable=W,C,R _tabversion = %r _lr_method = %r _lr_signature = %r - """ % (filename, __tabversion__, self.lr_method, signature)) + ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature)) # Change smaller to 0 to go back to original tables smaller = 1 # Factor out names to try and make smaller if smaller: - items = { } - - for s,nd in self.lr_action.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_action_items = {") - for k,v in items.items(): - f.write("%r:([" % k) + items = {} + + for s, nd in self.lr_action.items(): + for name, v in nd.items(): + i = items.get(name) + if not i: + i = ([], []) + items[name] = i + i[0].append(s) + i[1].append(v) + + f.write('\n_lr_action_items = {') + for k, v in items.items(): + f.write('%r:([' % k) for i in v[0]: - f.write("%r," % i) - f.write("],[") + f.write('%r,' % i) + f.write('],[') for i in v[1]: - f.write("%r," % i) + f.write('%r,' % i) - f.write("]),") - f.write("}\n") + f.write(']),') + f.write('}\n') - f.write(""" -_lr_action = { } + f.write(''' +_lr_action = {} for _k, _v in _lr_action_items.items(): for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = { } + if not _x in _lr_action: _lr_action[_x] = {} _lr_action[_x][_k] = _y del _lr_action_items -""") +''') else: - f.write("\n_lr_action = { "); - for k,v in self.lr_action.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); + f.write('\n_lr_action = { ') + for k, v in self.lr_action.items(): + f.write('(%r,%r):%r,' % (k[0], k[1], v)) + f.write('}\n') if smaller: # Factor out names to try and make smaller - items = { } - - for s,nd in self.lr_goto.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_goto_items = {") - for k,v in items.items(): - f.write("%r:([" % k) + items = {} + + for s, nd in self.lr_goto.items(): + for name, v in nd.items(): + i = items.get(name) + if not i: + i = ([], []) + items[name] = i + i[0].append(s) + i[1].append(v) + + f.write('\n_lr_goto_items = {') + for k, v in items.items(): + f.write('%r:([' % k) for i in v[0]: - f.write("%r," % i) - f.write("],[") + f.write('%r,' % i) + f.write('],[') for i in v[1]: - f.write("%r," % i) + f.write('%r,' % i) - f.write("]),") - f.write("}\n") + f.write(']),') + f.write('}\n') - f.write(""" -_lr_goto = { } + f.write(''' +_lr_goto = {} for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_goto: _lr_goto[_x] = { } + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_goto: _lr_goto[_x] = {} _lr_goto[_x][_k] = _y del _lr_goto_items -""") +''') else: - f.write("\n_lr_goto = { "); - for k,v in self.lr_goto.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); + f.write('\n_lr_goto = { ') + for k, v in self.lr_goto.items(): + f.write('(%r,%r):%r,' % (k[0], k[1], v)) + f.write('}\n') # Write production table - f.write("_lr_productions = [\n") + f.write('_lr_productions = [\n') for p in self.lr_productions: if p.func: - f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) + f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len, + p.func, os.path.basename(p.file), p.line)) else: - f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) - f.write("]\n") + f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len)) + f.write(']\n') f.close() - except IOError: - e = sys.exc_info()[1] - sys.stderr.write("Unable to create '%s'\n" % filename) - sys.stderr.write(str(e)+"\n") - return + except IOError as e: + raise # ----------------------------------------------------------------------------- @@ -2663,26 +2847,25 @@ def write_table(self,modulename,outputdir='',signature=""): # This function pickles the LR parsing tables to a supplied file object # ----------------------------------------------------------------------------- - def pickle_table(self,filename,signature=""): + def pickle_table(self, filename, signature=''): try: import cPickle as pickle except ImportError: import pickle - outf = open(filename,"wb") - pickle.dump(__tabversion__,outf,pickle_protocol) - pickle.dump(self.lr_method,outf,pickle_protocol) - pickle.dump(signature,outf,pickle_protocol) - pickle.dump(self.lr_action,outf,pickle_protocol) - pickle.dump(self.lr_goto,outf,pickle_protocol) - - outp = [] - for p in self.lr_productions: - if p.func: - outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) - else: - outp.append((str(p),p.name,p.len,None,None,None)) - pickle.dump(outp,outf,pickle_protocol) - outf.close() + with open(filename, 'wb') as outf: + pickle.dump(__tabversion__, outf, pickle_protocol) + pickle.dump(self.lr_method, outf, pickle_protocol) + pickle.dump(signature, outf, pickle_protocol) + pickle.dump(self.lr_action, outf, pickle_protocol) + pickle.dump(self.lr_goto, outf, pickle_protocol) + + outp = [] + for p in self.lr_productions: + if p.func: + outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) + else: + outp.append((str(p), p.name, p.len, None, None, None)) + pickle.dump(outp, outf, pickle_protocol) # ----------------------------------------------------------------------------- # === INTROSPECTION === @@ -2700,26 +2883,18 @@ def pickle_table(self,filename,signature=""): # ----------------------------------------------------------------------------- def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict # ----------------------------------------------------------------------------- # parse_grammar() # # This takes a raw grammar rule string and parses it into production data # ----------------------------------------------------------------------------- -def parse_grammar(doc,file,line): +def parse_grammar(doc, file, line): grammar = [] # Split the doc string into lines pstrings = doc.splitlines() @@ -2728,12 +2903,13 @@ def parse_grammar(doc,file,line): for ps in pstrings: dline += 1 p = ps.split() - if not p: continue + if not p: + continue try: if p[0] == '|': # This is a continuation of a previous rule if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) + raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) prodname = lastp syms = p[1:] else: @@ -2742,13 +2918,13 @@ def parse_grammar(doc,file,line): syms = p[2:] assign = p[1] if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) - grammar.append((file,dline,prodname,syms)) + grammar.append((file, dline, prodname, syms)) except SyntaxError: raise except Exception: - raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) + raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) return grammar @@ -2760,14 +2936,14 @@ def parse_grammar(doc,file,line): # etc. # ----------------------------------------------------------------------------- class ParserReflect(object): - def __init__(self,pdict,log=None): + def __init__(self, pdict, log=None): self.pdict = pdict self.start = None self.error_func = None self.tokens = None - self.files = {} + self.modules = set() self.grammar = [] - self.error = 0 + self.error = False if log is None: self.log = PlyLogger(sys.stderr) @@ -2781,7 +2957,7 @@ def get_all(self): self.get_tokens() self.get_precedence() self.get_pfunctions() - + # Validate all of the information def validate_all(self): self.validate_start() @@ -2789,32 +2965,28 @@ def validate_all(self): self.validate_tokens() self.validate_precedence() self.validate_pfunctions() - self.validate_files() + self.validate_modules() return self.error # Compute a signature over the grammar def signature(self): + parts = [] try: - from hashlib import md5 - except ImportError: - from md5 import md5 - try: - sig = md5() if self.start: - sig.update(self.start.encode('latin-1')) + parts.append(self.start) if self.prec: - sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) + parts.append(''.join([''.join(p) for p in self.prec])) if self.tokens: - sig.update(" ".join(self.tokens).encode('latin-1')) + parts.append(' '.join(self.tokens)) for f in self.pfuncs: if f[3]: - sig.update(f[3].encode('latin-1')) - except (TypeError,ValueError): + parts.append(f[3]) + except (TypeError, ValueError): pass - return sig.digest() + return ''.join(parts) # ----------------------------------------------------------------------------- - # validate_file() + # validate_modules() # # This method checks to see if there are duplicated p_rulename() functions # in the parser module file. Without this function, it is really easy for @@ -2824,32 +2996,29 @@ def signature(self): # to try and detect duplicates. # ----------------------------------------------------------------------------- - def validate_files(self): + def validate_modules(self): # Match def p_funcname( fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - for filename in self.files.keys(): - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea. Assume it's okay. - + for module in self.modules: try: - f = open(filename) - lines = f.readlines() - f.close() + lines, linen = inspect.getsourcelines(module) except IOError: continue - counthash = { } - for linen,l in enumerate(lines): + counthash = {} + for linen, line in enumerate(lines): linen += 1 - m = fre.match(l) + m = fre.match(line) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: - self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) + filename = inspect.getsourcefile(module) + self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', + filename, linen, name, prev) # Get the start symbol def get_start(self): @@ -2858,7 +3027,7 @@ def get_start(self): # Validate the start symbol def validate_start(self): if self.start is not None: - if not isinstance(self.start,str): + if not isinstance(self.start, string_types): self.log.error("'start' must be a string") # Look for error handler @@ -2868,162 +3037,173 @@ def get_error_func(self): # Validate the error function def validate_error_func(self): if self.error_func: - if isinstance(self.error_func,types.FunctionType): + if isinstance(self.error_func, types.FunctionType): ismethod = 0 elif isinstance(self.error_func, types.MethodType): ismethod = 1 else: self.log.error("'p_error' defined, but is not a function or method") - self.error = 1 + self.error = True return - eline = func_code(self.error_func).co_firstlineno - efile = func_code(self.error_func).co_filename - self.files[efile] = 1 + eline = self.error_func.__code__.co_firstlineno + efile = self.error_func.__code__.co_filename + module = inspect.getmodule(self.error_func) + self.modules.add(module) - if (func_code(self.error_func).co_argcount != 1+ismethod): - self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) - self.error = 1 + argcount = self.error_func.__code__.co_argcount - ismethod + if argcount != 1: + self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) + self.error = True # Get the tokens map def get_tokens(self): - tokens = self.pdict.get("tokens",None) + tokens = self.pdict.get('tokens') if not tokens: - self.log.error("No token list is defined") - self.error = 1 + self.log.error('No token list is defined') + self.error = True return - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True return - + if not tokens: - self.log.error("tokens is empty") - self.error = 1 + self.log.error('tokens is empty') + self.error = True return - self.tokens = tokens + self.tokens = sorted(tokens) # Validate the tokens def validate_tokens(self): # Validate the tokens. if 'error' in self.tokens: self.log.error("Illegal token name 'error'. Is a reserved word") - self.error = 1 + self.error = True return - terminals = {} + terminals = set() for n in self.tokens: if n in terminals: - self.log.warning("Token '%s' multiply defined", n) - terminals[n] = 1 + self.log.warning('Token %r multiply defined', n) + terminals.add(n) # Get the precedence map (if any) def get_precedence(self): - self.prec = self.pdict.get("precedence",None) + self.prec = self.pdict.get('precedence') # Validate and parse the precedence map def validate_precedence(self): preclist = [] if self.prec: - if not isinstance(self.prec,(list,tuple)): - self.log.error("precedence must be a list or tuple") - self.error = 1 + if not isinstance(self.prec, (list, tuple)): + self.log.error('precedence must be a list or tuple') + self.error = True return - for level,p in enumerate(self.prec): - if not isinstance(p,(list,tuple)): - self.log.error("Bad precedence table") - self.error = 1 + for level, p in enumerate(self.prec): + if not isinstance(p, (list, tuple)): + self.log.error('Bad precedence table') + self.error = True return if len(p) < 2: - self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) - self.error = 1 + self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) + self.error = True return assoc = p[0] - if not isinstance(assoc,str): - self.log.error("precedence associativity must be a string") - self.error = 1 + if not isinstance(assoc, string_types): + self.log.error('precedence associativity must be a string') + self.error = True return for term in p[1:]: - if not isinstance(term,str): - self.log.error("precedence items must be strings") - self.error = 1 + if not isinstance(term, string_types): + self.log.error('precedence items must be strings') + self.error = True return - preclist.append((term,assoc,level+1)) + preclist.append((term, assoc, level+1)) self.preclist = preclist # Get all p_functions from the grammar def get_pfunctions(self): p_functions = [] for name, item in self.pdict.items(): - if name[:2] != 'p_': continue - if name == 'p_error': continue - if isinstance(item,(types.FunctionType,types.MethodType)): - line = func_code(item).co_firstlineno - file = func_code(item).co_filename - p_functions.append((line,file,name,item.__doc__)) - - # Sort all of the actions by line number - p_functions.sort() + if not name.startswith('p_') or name == 'p_error': + continue + if isinstance(item, (types.FunctionType, types.MethodType)): + line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) + module = inspect.getmodule(item) + p_functions.append((line, module, name, item.__doc__)) + + # Sort all of the actions by line number; make sure to stringify + # modules to make them sortable, since `line` may not uniquely sort all + # p functions + p_functions.sort(key=lambda p_function: ( + p_function[0], + str(p_function[1]), + p_function[2], + p_function[3])) self.pfuncs = p_functions - # Validate all of the p_functions def validate_pfunctions(self): grammar = [] # Check for non-empty symbols if len(self.pfuncs) == 0: - self.log.error("no rules of the form p_rulename are defined") - self.error = 1 - return - - for line, file, name, doc in self.pfuncs: + self.log.error('no rules of the form p_rulename are defined') + self.error = True + return + + for line, module, name, doc in self.pfuncs: + file = inspect.getsourcefile(module) func = self.pdict[name] if isinstance(func, types.MethodType): reqargs = 2 else: reqargs = 1 - if func_code(func).co_argcount > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) - self.error = 1 - elif func_code(func).co_argcount < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) - self.error = 1 + if func.__code__.co_argcount > reqargs: + self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) + self.error = True + elif func.__code__.co_argcount < reqargs: + self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) + self.error = True elif not func.__doc__: - self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) + self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', + file, line, func.__name__) else: try: - parsed_g = parse_grammar(doc,file,line) + parsed_g = parse_grammar(doc, file, line) for g in parsed_g: grammar.append((name, g)) - except SyntaxError: - e = sys.exc_info()[1] + except SyntaxError as e: self.log.error(str(e)) - self.error = 1 + self.error = True # Looks like a valid grammar rule # Mark the file in which defined. - self.files[file] = 1 + self.modules.add(module) # Secondary validation step that looks for p_ definitions that are not functions # or functions that look like they might be grammar rules. - for n,v in self.pdict.items(): - if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue - if n[0:2] == 't_': continue - if n[0:2] == 'p_' and n != 'p_error': - self.log.warning("'%s' not defined as a function", n) - if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or - (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): - try: - doc = v.__doc__.split(" ") - if doc[1] == ':': - self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", - func_code(v).co_filename, func_code(v).co_firstlineno,n) - except Exception: - pass + for n, v in self.pdict.items(): + if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): + continue + if n.startswith('t_'): + continue + if n.startswith('p_') and n != 'p_error': + self.log.warning('%r not defined as a function', n) + if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): + if v.__doc__: + try: + doc = v.__doc__.split(' ') + if doc[1] == ':': + self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', + v.__code__.co_filename, v.__code__.co_firstlineno, n) + except IndexError: + pass self.grammar = grammar @@ -3033,14 +3213,17 @@ def validate_pfunctions(self): # Build a parser # ----------------------------------------------------------------------------- -def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, - check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', - debuglog=None, errorlog = None, picklefile=None): +def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, + check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file, + outputdir=None, debuglog=None, errorlog=None, picklefile=None): - global parse # Reference to the parsing method of the last built parser + if tabmodule is None: + tabmodule = tab_module - # If pickling is enabled, table files are not created + # Reference to the parsing method of the last built parser + global parse + # If pickling is enabled, table files are not created if picklefile: write_tables = 0 @@ -3049,17 +3232,54 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star # Get the module dictionary used for the parser if module: - _items = [(k,getattr(module,k)) for k in dir(module)] + _items = [(k, getattr(module, k)) for k in dir(module)] pdict = dict(_items) + # If no __file__ or __package__ attributes are available, try to obtain them + # from the __module__ instead + if '__file__' not in pdict: + pdict['__file__'] = sys.modules[pdict['__module__']].__file__ + if '__package__' not in pdict and '__module__' in pdict: + if hasattr(sys.modules[pdict['__module__']], '__package__'): + pdict['__package__'] = sys.modules[pdict['__module__']].__package__ else: pdict = get_caller_module_dict(2) + if outputdir is None: + # If no output directory is set, the location of the output files + # is determined according to the following rules: + # - If tabmodule specifies a package, files go into that package directory + # - Otherwise, files go in the same directory as the specifying module + if isinstance(tabmodule, types.ModuleType): + srcfile = tabmodule.__file__ + else: + if '.' not in tabmodule: + srcfile = pdict['__file__'] + else: + parts = tabmodule.split('.') + pkgname = '.'.join(parts[:-1]) + exec('import %s' % pkgname) + srcfile = getattr(sys.modules[pkgname], '__file__', '') + outputdir = os.path.dirname(srcfile) + + # Determine if the module is package of a package or not. + # If so, fix the tabmodule setting so that tables load correctly + pkg = pdict.get('__package__') + if pkg and isinstance(tabmodule, str): + if '.' not in tabmodule: + tabmodule = pkg + '.' + tabmodule + + + + # Set start symbol if it's specified directly using an argument + if start is not None: + pdict['start'] = start + # Collect parser information from the dictionary - pinfo = ParserReflect(pdict,log=errorlog) + pinfo = ParserReflect(pdict, log=errorlog) pinfo.get_all() if pinfo.error: - raise YaccError("Unable to build parser") + raise YaccError('Unable to build parser') # Check signature against table files (if any) signature = pinfo.signature() @@ -3074,35 +3294,36 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star if optimize or (read_signature == signature): try: lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) + parser = LRParser(lr, pinfo.error_func) parse = parser.parse return parser - except Exception: - e = sys.exc_info()[1] - errorlog.warning("There was a problem loading the table file: %s", repr(e)) - except VersionError: - e = sys.exc_info() + except Exception as e: + errorlog.warning('There was a problem loading the table file: %r', e) + except VersionError as e: errorlog.warning(str(e)) - except Exception: + except ImportError: pass if debuglog is None: if debug: - debuglog = PlyLogger(open(debugfile,"w")) + try: + debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w')) + except IOError as e: + errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) + debuglog = NullLogger() else: debuglog = NullLogger() - debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) - + debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__) - errors = 0 + errors = False # Validate the parser information if pinfo.validate_all(): - raise YaccError("Unable to build parser") - + raise YaccError('Unable to build parser') + if not pinfo.error_func: - errorlog.warning("no p_error() function is defined") + errorlog.warning('no p_error() function is defined') # Create a grammar object grammar = Grammar(pinfo.tokens) @@ -3110,20 +3331,18 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star # Set precedence level for terminals for term, assoc, level in pinfo.preclist: try: - grammar.set_precedence(term,assoc,level) - except GrammarError: - e = sys.exc_info()[1] - errorlog.warning("%s",str(e)) + grammar.set_precedence(term, assoc, level) + except GrammarError as e: + errorlog.warning('%s', e) # Add productions to the grammar for funcname, gram in pinfo.grammar: file, line, prodname, syms = gram try: - grammar.add_production(prodname,syms,funcname,file,line) - except GrammarError: - e = sys.exc_info()[1] - errorlog.error("%s",str(e)) - errors = 1 + grammar.add_production(prodname, syms, funcname, file, line) + except GrammarError as e: + errorlog.error('%s', e) + errors = True # Set the grammar start symbols try: @@ -3131,146 +3350,153 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star grammar.set_start(pinfo.start) else: grammar.set_start(start) - except GrammarError: - e = sys.exc_info()[1] + except GrammarError as e: errorlog.error(str(e)) - errors = 1 + errors = True if errors: - raise YaccError("Unable to build parser") + raise YaccError('Unable to build parser') # Verify the grammar structure undefined_symbols = grammar.undefined_symbols() for sym, prod in undefined_symbols: - errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) - errors = 1 + errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) + errors = True unused_terminals = grammar.unused_terminals() if unused_terminals: - debuglog.info("") - debuglog.info("Unused terminals:") - debuglog.info("") + debuglog.info('') + debuglog.info('Unused terminals:') + debuglog.info('') for term in unused_terminals: - errorlog.warning("Token '%s' defined, but not used", term) - debuglog.info(" %s", term) + errorlog.warning('Token %r defined, but not used', term) + debuglog.info(' %s', term) # Print out all productions to the debug log if debug: - debuglog.info("") - debuglog.info("Grammar") - debuglog.info("") - for n,p in enumerate(grammar.Productions): - debuglog.info("Rule %-5d %s", n, p) + debuglog.info('') + debuglog.info('Grammar') + debuglog.info('') + for n, p in enumerate(grammar.Productions): + debuglog.info('Rule %-5d %s', n, p) # Find unused non-terminals unused_rules = grammar.unused_rules() for prod in unused_rules: - errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) + errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) if len(unused_terminals) == 1: - errorlog.warning("There is 1 unused token") + errorlog.warning('There is 1 unused token') if len(unused_terminals) > 1: - errorlog.warning("There are %d unused tokens", len(unused_terminals)) + errorlog.warning('There are %d unused tokens', len(unused_terminals)) if len(unused_rules) == 1: - errorlog.warning("There is 1 unused rule") + errorlog.warning('There is 1 unused rule') if len(unused_rules) > 1: - errorlog.warning("There are %d unused rules", len(unused_rules)) + errorlog.warning('There are %d unused rules', len(unused_rules)) if debug: - debuglog.info("") - debuglog.info("Terminals, with rules where they appear") - debuglog.info("") + debuglog.info('') + debuglog.info('Terminals, with rules where they appear') + debuglog.info('') terms = list(grammar.Terminals) terms.sort() for term in terms: - debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) - - debuglog.info("") - debuglog.info("Nonterminals, with rules where they appear") - debuglog.info("") + debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) + + debuglog.info('') + debuglog.info('Nonterminals, with rules where they appear') + debuglog.info('') nonterms = list(grammar.Nonterminals) nonterms.sort() for nonterm in nonterms: - debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) - debuglog.info("") + debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) + debuglog.info('') if check_recursion: unreachable = grammar.find_unreachable() for u in unreachable: - errorlog.warning("Symbol '%s' is unreachable",u) + errorlog.warning('Symbol %r is unreachable', u) infinite = grammar.infinite_cycles() for inf in infinite: - errorlog.error("Infinite recursion detected for symbol '%s'", inf) - errors = 1 - + errorlog.error('Infinite recursion detected for symbol %r', inf) + errors = True + unused_prec = grammar.unused_precedence() for term, assoc in unused_prec: - errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) - errors = 1 + errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) + errors = True if errors: - raise YaccError("Unable to build parser") - + raise YaccError('Unable to build parser') + # Run the LRGeneratedTable on the grammar if debug: - errorlog.debug("Generating %s tables", method) - - lr = LRGeneratedTable(grammar,method,debuglog) + errorlog.debug('Generating %s tables', method) + + lr = LRGeneratedTable(grammar, method, debuglog) if debug: num_sr = len(lr.sr_conflicts) # Report shift/reduce and reduce/reduce conflicts if num_sr == 1: - errorlog.warning("1 shift/reduce conflict") + errorlog.warning('1 shift/reduce conflict') elif num_sr > 1: - errorlog.warning("%d shift/reduce conflicts", num_sr) + errorlog.warning('%d shift/reduce conflicts', num_sr) num_rr = len(lr.rr_conflicts) if num_rr == 1: - errorlog.warning("1 reduce/reduce conflict") + errorlog.warning('1 reduce/reduce conflict') elif num_rr > 1: - errorlog.warning("%d reduce/reduce conflicts", num_rr) + errorlog.warning('%d reduce/reduce conflicts', num_rr) # Write out conflicts to the output file if debug and (lr.sr_conflicts or lr.rr_conflicts): - debuglog.warning("") - debuglog.warning("Conflicts:") - debuglog.warning("") + debuglog.warning('') + debuglog.warning('Conflicts:') + debuglog.warning('') for state, tok, resolution in lr.sr_conflicts: - debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) - - already_reported = {} + debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) + + already_reported = set() for state, rule, rejected in lr.rr_conflicts: - if (state,id(rule),id(rejected)) in already_reported: + if (state, id(rule), id(rejected)) in already_reported: continue - debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - debuglog.warning("rejected rule (%s) in state %d", rejected,state) - errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - errorlog.warning("rejected rule (%s) in state %d", rejected, state) - already_reported[state,id(rule),id(rejected)] = 1 - + debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + debuglog.warning('rejected rule (%s) in state %d', rejected, state) + errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + errorlog.warning('rejected rule (%s) in state %d', rejected, state) + already_reported.add((state, id(rule), id(rejected))) + warned_never = [] for state, rule, rejected in lr.rr_conflicts: if not rejected.reduced and (rejected not in warned_never): - debuglog.warning("Rule (%s) is never reduced", rejected) - errorlog.warning("Rule (%s) is never reduced", rejected) + debuglog.warning('Rule (%s) is never reduced', rejected) + errorlog.warning('Rule (%s) is never reduced', rejected) warned_never.append(rejected) # Write the table file if requested if write_tables: - lr.write_table(tabmodule,outputdir,signature) + try: + lr.write_table(tabmodule, outputdir, signature) + if tabmodule in sys.modules: + del sys.modules[tabmodule] + except IOError as e: + errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) # Write a pickled version of the tables if picklefile: - lr.pickle_table(picklefile,signature) + try: + lr.pickle_table(picklefile, signature) + except IOError as e: + errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) # Build the parser lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) + parser = LRParser(lr, pinfo.error_func) parse = parser.parse return parser