From 6c544d774533d2b4a6218f8be43bb283fabb4622 Mon Sep 17 00:00:00 2001 From: Dirk Pranke Date: Sun, 17 Nov 2024 19:50:43 -0800 Subject: [PATCH] First pass at supporting global vars. Everything works in the interpreter and the python generator, but not the javascript generator yet. This is roughly feature parity with the recent changes in glop. This adds a new pragma, '%globals' so that a grammar can explicitly declare the globals that it should be looking for. We should add a check in parse that every expected global is being passed in. Also, we still need to add new tests to cover this functionality. Updates the JSON5 grammar to support the `_strict` option as requested over in pyjson5-land. --- grammars/json5.g | 4 +++ src/pyfloyd/analyzer.py | 16 ++++++--- src/pyfloyd/api.py | 16 ++++++--- src/pyfloyd/interpreter.py | 18 ++++++---- src/pyfloyd/parser.py | 10 +++--- src/pyfloyd/printer.py | 4 ++- src/pyfloyd/python_generator.py | 29 +++++++++++---- src/pyfloyd/tool.py | 15 ++++++-- tests/grammar_test.py | 64 ++++++++++++++++++++------------- tests/printer_test.py | 2 +- 10 files changed, 123 insertions(+), 55 deletions(-) diff --git a/grammars/json5.g b/grammars/json5.g index f838fe5..76c4f41 100644 --- a/grammars/json5.g +++ b/grammars/json5.g @@ -14,6 +14,8 @@ %tokens = ident num_literal string +%globals = _strict + grammar = value end -> $1 value = 'null' -> null @@ -36,10 +38,12 @@ string = squote sqchar* squote -> cat($2) sqchar = bslash esc_char -> $2 | bslash eol -> '' | ~bslash ~squote ~eol any -> $4 + | ~(?{ _strict }) '\x00'..'\x1f' dqchar = bslash esc_char -> $2 | bslash eol -> '' | ~bslash ~dquote ~eol any -> $4 + | ~(?{ _strict }) '\x00'..'\x1f' bslash = '\\' diff --git a/src/pyfloyd/analyzer.py b/src/pyfloyd/analyzer.py index 4cc3310..cae4210 100644 --- a/src/pyfloyd/analyzer.py +++ b/src/pyfloyd/analyzer.py @@ -51,6 +51,7 @@ def __init__(self, ast): self.operators = {} self.leftrec_rules = set() self.outer_scope_rules = set() + self.global_vars = set() has_starting_rule = False for n in self.ast[2]: @@ -87,7 +88,7 @@ def __init__(self): self.choices = {} -def analyze(ast, rewrite_filler: bool, rewrite_subrules: bool) -> Grammar: +def analyze(ast, rewrite_filler: bool, rewrite_subrules: bool, global_vars=None) -> Grammar: """Analyze and optimize the AST. This runs any static analysis we can do over the grammars and @@ -215,6 +216,10 @@ def check_pragma(self, node): operator = seq[2][0][1] direction = seq[2][1][1] self.grammar.assoc[operator] = direction + elif pragma == '%globals': + for choice in node[2][0][2]: + for t in choice[2]: + self.grammar.global_vars.add(t[1]) else: self.errors.append(f'Unknown pragma "{pragma}"') @@ -325,6 +330,8 @@ def _check_named_vars(self, node, labels, references): if node[0] == 'e_var': if node[1] in labels: references.add(node[1]) + elif node[1] in self.grammar.global_vars: + pass elif not node[1][0] == '$': self.errors.append(f'Unknown variable "{node[1]}" referenced') @@ -353,7 +360,7 @@ def rewrite_node(node): def _rewrite_recursion(grammar): """Rewrite the AST to insert leftrec and operator nodes as needed.""" for node in grammar.ast[2]: - if node[0] == 'pragma': + if node[1][0] == '%': continue name = node[1] assert node[2][0][0] == 'choice' @@ -423,6 +430,7 @@ def _check_lr(name, node, grammar, seen): return False seen.add(node[1]) return _check_lr(name, grammar.rules[node[1]], grammar, seen) + if ty == 'seq': for subnode in node[2]: if subnode[0] == 'lit': @@ -488,7 +496,7 @@ def _rewrite_filler(grammar): grammar.ast[2] = [ rule for rule in grammar.ast[2] - if rule[1] not in ('%whitespace', '%comment', '%token', '%tokens') + if rule[1] not in ('%whitespace', '%comment', '%globals', '%token', '%tokens') ] grammar.comment = None grammar.whitespace = None @@ -496,7 +504,7 @@ def _rewrite_filler(grammar): grammar.pragmas = [ n for n in grammar.pragmas - if n[1] not in ('%whitespace', '%comment', '%token', '%tokens') + if n[1] not in ('%whitespace', '%comment', '%globals', '%token', '%tokens') ] diff --git a/src/pyfloyd/api.py b/src/pyfloyd/api.py index 955ce04..eae1512 100644 --- a/src/pyfloyd/api.py +++ b/src/pyfloyd/api.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import NamedTuple, Optional, Protocol, Tuple +from typing import Any, Dict, NamedTuple, Optional, Protocol, Tuple from pyfloyd import analyzer from pyfloyd.interpreter import Interpreter @@ -32,7 +32,7 @@ class ParserInterface(Protocol): `compile()`. """ - def parse(self, text: str, path: str = '') -> Result: + def parse(self, text: str, path: str = '', global_vars: Optional[Dict[str, Any]] = None) -> Result: """Parse a string and return a result. `text` is the string to parse. @@ -84,6 +84,7 @@ def generate( grammar: str, path: str = '', options: Optional[GeneratorOptions] = None, + global_vars = None ) -> Result: """Generate the source code of a parser. @@ -116,11 +117,12 @@ def generate( """ result = parser.parse(grammar, path) + global_vars = global_vars or {} if result.err: return result try: grammar_obj = analyzer.analyze( - result.val, rewrite_filler=True, rewrite_subrules=True + result.val, rewrite_filler=True, rewrite_subrules=True, global_vars=global_vars ) except analyzer.AnalysisError as e: return Result(err=str(e)) @@ -141,6 +143,7 @@ def parse( text: str, grammar_path: str = '', path: str = '', + global_vars : Dict[str, Any] = None, memoize: bool = False, ) -> Result: """Match an input text against the specified grammar. @@ -165,12 +168,13 @@ def parse( if result.err: return Result(err='Error in grammar: ' + result.err, pos=result.pos) assert result.parser is not None - return result.parser.parse(text, path) + return result.parser.parse(text, path, global_vars) def pretty_print( grammar: str, path: str = '', + global_vars = None, rewrite_filler: bool = False, rewrite_subrules: bool = False, ) -> Tuple[Optional[str], Optional[str]]: @@ -197,9 +201,11 @@ def pretty_print( if result.err: return None, result.err + global_vars = global_vars or {} try: g = analyzer.analyze( result.val, + global_vars=global_vars, rewrite_filler=rewrite_filler, rewrite_subrules=rewrite_subrules, ) @@ -211,6 +217,7 @@ def pretty_print( def dump_ast( grammar: str, path: str = '', + global_vars = None, rewrite_filler: bool = False, rewrite_subrules: bool = False, ) -> Tuple[Optional[str], Optional[str]]: @@ -225,6 +232,7 @@ def dump_ast( try: g = analyzer.analyze( result.val, + global_vars=global_vars, rewrite_filler=rewrite_filler, rewrite_subrules=rewrite_subrules, ) diff --git a/src/pyfloyd/interpreter.py b/src/pyfloyd/interpreter.py index 90d8f26..e81af49 100644 --- a/src/pyfloyd/interpreter.py +++ b/src/pyfloyd/interpreter.py @@ -47,8 +47,9 @@ def __init__(self, grammar, memoize): self._blocked = set() self._operators = {} self._regexps = {} + self._global_vars = {} - def parse(self, text: str, path: str = '') -> parser.Result: + def parse(self, text: str, path: str = '', global_vars = None) -> parser.Result: self._text = text self._path = path self._failed = False @@ -58,6 +59,7 @@ def parse(self, text: str, path: str = '') -> parser.Result: self._errstr = None self._errpos = 0 self._scopes = [{}] + self._global_vars = global_vars or {} self._interpret(self._grammar.rules[self._grammar.starting_rule]) if self._failed: @@ -278,18 +280,22 @@ def _ty_e_var(self, node): return # Unknown variables should have been caught in analysis. - if node[1][0] == '$': + v = node[1] + if v[0] == '$': # Look up positional labels in the current scope. - self._succeed(self._scopes[-1][node[1]]) + self._succeed(self._scopes[-1][v]) else: # Look up named labels in any scope. i = len(self._scopes) - 1 while i >= 0: - if node[1] in self._scopes[i]: - self._succeed(self._scopes[i][node[1]]) + if v in self._scopes[i]: + self._succeed(self._scopes[i][v]) return i -= 1 - assert False, f'Unknown label "{node[1]}"' + if v in self._global_vars: + self._succeed(self._global_vars[v]) + return + assert False, f'Unknown label "{v}"' def _ty_empty(self, node): del node diff --git a/src/pyfloyd/parser.py b/src/pyfloyd/parser.py index 921c1e8..a970d23 100644 --- a/src/pyfloyd/parser.py +++ b/src/pyfloyd/parser.py @@ -1,4 +1,4 @@ -# Generated by pyfloyd version 0.18.0.dev0 +# Generated by pyfloyd version 0.18.4.dev0 # https://github.com/dpranke/pyfloyd # `pyfloyd -o src/pyfloyd/parser.py --memoize -c grammars/floyd.g` @@ -24,7 +24,7 @@ class Result(NamedTuple): pos: Optional[int] = None -def parse(text: str, path: str = '') -> Result: +def parse(text: str, path: str = '', global_vars = None) -> Result: """Parse a given text and return the result. If the parse was successful, `result.val` will be the returned value @@ -39,7 +39,7 @@ def parse(text: str, path: str = '') -> Result: messages to indicate the path to the filename containing the given text. """ - return _Parser(text, path).parse() + return _Parser(text, path).parse(global_vars) class _Parser: @@ -47,6 +47,7 @@ def __init__(self, text, path): self._text = text self._end = len(self._text) self._errpos = 0 + self._global_vars = {} self._failed = False self._path = path self._pos = 0 @@ -54,7 +55,8 @@ def __init__(self, text, path): self._cache = {} self._regexps = {} - def parse(self): + def parse(self, global_vars=None): + self._global_vars = global_vars or {} self._r_grammar() if self._failed: return Result(None, self._err_str(), self._errpos) diff --git a/src/pyfloyd/printer.py b/src/pyfloyd/printer.py index 30d07f6..c0c399e 100644 --- a/src/pyfloyd/printer.py +++ b/src/pyfloyd/printer.py @@ -39,6 +39,8 @@ def _build_rules(self): cs = [(node[0], '')] else: cs = [(' '.join(node), '')] + elif rule_name == '%globals': + cs = [(' '.join(node), '')] else: assert rule_name in ( '%comment', @@ -189,7 +191,7 @@ def _ty_plus(self, node): return self._proc(node[2][0]) + '+' def _ty_pred(self, node): - return '?{%s}' % self._proc(node[2][0]) + return '?{ %s }' % self._proc(node[2][0]) def _ty_range(self, node): return '%s..%s' % (lit.encode(node[1][0]), lit.encode(node[1][1])) diff --git a/src/pyfloyd/python_generator.py b/src/pyfloyd/python_generator.py index e73c01b..32e53e7 100644 --- a/src/pyfloyd/python_generator.py +++ b/src/pyfloyd/python_generator.py @@ -417,6 +417,8 @@ def _ty_e_qual(self, node) -> Saw: def _ty_e_var(self, node) -> str: if self._current_rule in self._grammar.outer_scope_rules: return f"self._lookup('{node[1]}')" + if node[1] in self._grammar.global_vars: + return f"self._global_vars['{node[1]}']" return 'v_' + node[1].replace('$', '_') def _ty_empty(self, node) -> List[str]: @@ -667,6 +669,7 @@ def _ty_unicat(self, node) -> List[str]: # `pyfloyd {args}` {imports} +import json import re # pylint: disable=too-many-lines @@ -681,6 +684,8 @@ def main( opener=open, ) -> int: arg_parser = argparse.ArgumentParser() + arg_parser.add_argument('-D', '--define', action='append', default=[], + help='Define a global var=value') arg_parser.add_argument('file', nargs='?') args = arg_parser.parse_args(argv) @@ -694,8 +699,13 @@ def main( path = args.file fp = opener(path) + global_vars = {{}} + for d in args.define: + k, v = d.split('=', 1) + global_vars[k] = json.loads(v) + msg = fp.read() - result = parse(msg, path) + result = parse(msg, path, global_vars) if result.err: print(result.err, file=stderr) return 1 @@ -750,7 +760,7 @@ class Result(NamedTuple): pos: Optional[int] = None -def parse(text: str, path: str = '') -> Result: +def parse(text: str, path: str = '', global_vars = None) -> Result: \"\"\"Parse a given text and return the result. If the parse was successful, `result.val` will be the returned value @@ -765,7 +775,7 @@ def parse(text: str, path: str = '') -> Result: messages to indicate the path to the filename containing the given text. \"\"\" - return _Parser(text, path).parse() + return _Parser(text, path).parse(global_vars) class _Parser: @@ -773,6 +783,7 @@ def __init__(self, text, path): self._text = text self._end = len(self._text) self._errpos = 0 + self._global_vars = {} self._failed = False self._path = path self._pos = 0 @@ -781,7 +792,8 @@ def __init__(self, text, path): _PARSE = """\ - def parse(self): + def parse(self, global_vars=None): + self._global_vars = global_vars or {{}} self._r_{starting_rule}() if self._failed: return Result(None, self._err_str(), self._errpos) @@ -790,12 +802,13 @@ def parse(self): _PARSE_WITH_EXCEPTION = """\ - def parse(self): + def parse(self, global_vars=None): + self._global_vars = global_vars or {{}} try: self._r_{starting_rule}() if self._failed: - return None, self._err_str(), self._errpos - return self._val, None, self._pos + return Result(None, self._err_str(), self._errpos) + return Result(self._val, None, self._pos) except _ParsingRuntimeError as e: # pragma: no cover lineno, _ = self._err_offsets() return ( @@ -889,6 +902,8 @@ def _lookup(self, var): if var in self._scopes[l]: return self._scopes[l][var] l -= 1 + if var in self._global_vars: + return self._global_vars[var] assert False, f'unknown var {var}' def _memoize(self, rule_name, fn): diff --git a/src/pyfloyd/tool.py b/src/pyfloyd/tool.py index def0c2c..3854dc7 100755 --- a/src/pyfloyd/tool.py +++ b/src/pyfloyd/tool.py @@ -51,6 +51,11 @@ def main(argv=None, host=None): host.print(err, file=host.stderr) return 1 + global_vars = {} + for d in args.define: + k, v = d.split('=', 1) + global_vars[k] = json.loads(v) + if args.ast: ast, err = pyfloyd.dump_ast( grammar, @@ -75,10 +80,11 @@ def main(argv=None, host=None): contents, err, _ = pyfloyd.generate( grammar, path=args.grammar, - options=options, + options=options ) else: - contents, err, _ = _interpret_grammar(host, args, grammar) + contents, err, _ = _interpret_grammar(host, args, grammar, + global_vars) if err: host.print(err, file=host.stderr) @@ -104,6 +110,8 @@ def _parse_args(host, argv): action='store_true', help='compile grammar instead of interpreting it', ) + ap.add_argument('-D', '--define', action='append', default=[], + help='Define a global var=value') ap.add_argument('-o', '--output', help='path to write output to') ap.add_argument( '-p', @@ -193,7 +201,7 @@ def _read_grammar(host, args): return None, 'Error reading "%s": %s' % (args.grammar, str(e)) -def _interpret_grammar(host, args, grammar): +def _interpret_grammar(host, args, grammar, global_vars): if args.input == '-': path, contents = ('', host.stdin.read()) else: @@ -204,6 +212,7 @@ def _interpret_grammar(host, args, grammar): contents, grammar_path=args.grammar, path=path, + global_vars=global_vars, memoize=args.memoize, ) if err: diff --git a/tests/grammar_test.py b/tests/grammar_test.py index 7bc4449..0d801a1 100644 --- a/tests/grammar_test.py +++ b/tests/grammar_test.py @@ -59,8 +59,9 @@ def check( if hasattr(p, 'cleanup'): p.cleanup() - def checkp(self, parser, text, out=None, err=None): - actual_out, actual_err, _ = parser.parse(text) + def checkp(self, parser, text, out=None, err=None, global_vars=None): + actual_out, actual_err, _ = parser.parse(text, path='', + global_vars=global_vars) # Test err before out because it's probably more helpful to display # an unexpected error than it is to display an unexpected output. self.assertMultiLineEqual(err or '', actual_err or '') @@ -392,7 +393,7 @@ def test_json(self): self.assertIsNone(err) self._common_json_checks(p) - self.checkp(p, text='"foo"', out='"foo"') + self.checkp(p, text='"foo"', out='"foo"', global_vars={'_strict': False}) if hasattr(p, 'cleanup'): p.cleanup() @@ -408,6 +409,7 @@ def test_json5(self): @skip('integration') def test_json5_special_floats(self): + gv = { '_strict': True } h = pyfloyd.host.Host() path = str(THIS_DIR / '../grammars/json5.g') p, err, _ = self.compile(h.read_text_file(path)) @@ -415,43 +417,47 @@ def test_json5_special_floats(self): # TODO: Figure out what to do with 'Infinity' and 'NaN'. # self.checkp(p, text='Infinity', out=float('inf')) - self.checkp(p, text='Infinity', out='Infinity') + self.checkp(p, text='Infinity', out='Infinity', global_vars=gv) # Can't use check() for this because NaN != NaN. # obj, err, _ = p.parse('NaN') # self.assertTrue(math.isnan(obj)) # self.assertTrue(err is None) - self.checkp(p, text='NaN', out='NaN') + self.checkp(p, text='NaN', out='NaN', global_vars=gv) if hasattr(p, 'cleanup'): p.cleanup() def _common_json_checks(self, p): - self.checkp(p, text='123', out=123) - self.checkp(p, text='1.5', out=1.5) - self.checkp(p, text='-1.5', out=-1.5) - self.checkp(p, text='1.5e2', out=150) - self.checkp(p, text='null', out=None) - self.checkp(p, text='true', out=True) - self.checkp(p, text='false', out=False) - - self.checkp(p, text='[]', out=[]) - self.checkp(p, text='[2]', out=[2]) - self.checkp(p, text='{}', out={}) + gvs = { '_strict': True } + self.checkp(p, text='123', out=123, global_vars=gvs) + self.checkp(p, text='1.5', out=1.5, global_vars=gvs) + self.checkp(p, text='-1.5', out=-1.5, global_vars=gvs) + self.checkp(p, text='1.5e2', out=150, global_vars=gvs) + self.checkp(p, text='null', out=None, global_vars=gvs) + self.checkp(p, text='true', out=True, global_vars=gvs) + self.checkp(p, text='false', out=False, global_vars=gvs) + + self.checkp(p, text='[]', out=[], global_vars=gvs) + self.checkp(p, text='[2]', out=[2], global_vars=gvs) + self.checkp(p, text='{}', out={}, global_vars=gvs) self.checkp( - p, text='[1', err=':1 Unexpected end of input at column 3' + p, text='[1', err=':1 Unexpected end of input at column 3', + global_vars=gvs ) # Check that leading whitespace is allowed. - self.checkp(p, ' {}', {}) + self.checkp(p, ' {}', {}, global_vars=gvs) def _common_json5_checks(self, p): - self.checkp(p, text='+1.5', out=1.5) - self.checkp(p, text='.5e-2', out=0.005) - self.checkp(p, text='"foo"', out='foo') + gvs = { '_strict': False } + self.checkp(p, text='+1.5', out=1.5, global_vars=gvs) + self.checkp(p, text='.5e-2', out=0.005, global_vars=gvs) + self.checkp(p, text='"foo"', out='foo', global_vars=gvs) self.checkp( - p, text='{foo: "bar", a: "b"}', out={'foo': 'bar', 'a': 'b'} + p, text='{foo: "bar", a: "b"}', out={'foo': 'bar', 'a': 'b'}, + global_vars=gvs ) @skip('integration') @@ -506,6 +512,7 @@ def test_json5_sample(self): 'trailing commas too', ], }, + global_vars={ '_strict': False } ) if hasattr(p, 'cleanup'): p.cleanup() @@ -1114,8 +1121,8 @@ class _PythonParserWrapper: def __init__(self, parse_fn): self.parse_fn = parse_fn - def parse(self, text, path=''): - return self.parse_fn(text, path) + def parse(self, text, path='', global_vars=None): + return self.parse_fn(text, path, global_vars) def cleanup(self): pass @@ -1148,6 +1155,13 @@ def test_json5_special_floats(self): # we can't read them in from output that is JSON. pass + def test_json5(self): + # TODO: implement global var support in JavaScript, get this to pass. + pass + + def test_json5_sample(self): + # TODO: implement global var support in JavaScript, get this to pass. + pass class _JavaScriptParserWrapper: def __init__(self, h, d): @@ -1155,7 +1169,7 @@ def __init__(self, h, d): self.d = d self.source = d + '/parser.js' - def parse(self, text, path=''): + def parse(self, text, path='', global_vars=None): del path inp = self.d + '/input.txt' self.h.write_text_file(inp, text) diff --git a/tests/printer_test.py b/tests/printer_test.py index b486cf9..89ef33d 100644 --- a/tests/printer_test.py +++ b/tests/printer_test.py @@ -112,7 +112,7 @@ def test_leftrec(self): self.assertIsNone(err) def test_pred(self): - grammar = 'grammar = ?{true} -> true\n' + grammar = 'grammar = ?{ true } -> true\n' out, err = pyfloyd.pretty_print(grammar) self.assertEqual(grammar, out) self.assertIsNone(err)