First pass at supporting global vars.

Everything works in the interpreter and the python generator, but not the javascript generator yet. This is roughly feature parity with the recent changes in glop. This adds a new pragma, '%globals' so that a grammar can explicitly declare the globals that it should be looking for. We should add a check in parse that every expected global is being passed in. Also, we still need to add new tests to cover this functionality. Updates the JSON5 grammar to support the `_strict` option as requested over in pyjson5-land.
dpranke · Nov 18, 2024 · 6c544d7 · 6c544d7
1 parent 161b14e
commit 6c544d7
Show file tree

Hide file tree

Showing 10 changed files with 123 additions and 55 deletions.
diff --git a/grammars/json5.g b/grammars/json5.g
@@ -14,6 +14,8 @@
 
 %tokens = ident num_literal string
 
+%globals       = _strict
+
 grammar        = value end                        -> $1
 
 value          = 'null'                           -> null
@@ -36,10 +38,12 @@ string         = squote sqchar* squote            -> cat($2)
 sqchar         = bslash esc_char                  -> $2
                | bslash eol                       -> ''
                | ~bslash ~squote ~eol any         -> $4
+               | ~(?{ _strict }) '\x00'..'\x1f'
 
 dqchar         = bslash esc_char                  -> $2
                | bslash eol                       -> ''
                | ~bslash ~dquote ~eol any         -> $4
+               | ~(?{ _strict }) '\x00'..'\x1f'
 
 bslash         = '\\'
 

diff --git a/src/pyfloyd/analyzer.py b/src/pyfloyd/analyzer.py
@@ -51,6 +51,7 @@ def __init__(self, ast):
         self.operators = {}
         self.leftrec_rules = set()
         self.outer_scope_rules = set()
+        self.global_vars = set()
 
         has_starting_rule = False
         for n in self.ast[2]:
@@ -87,7 +88,7 @@ def __init__(self):
         self.choices = {}
 
 
-def analyze(ast, rewrite_filler: bool, rewrite_subrules: bool) -> Grammar:
+def analyze(ast, rewrite_filler: bool, rewrite_subrules: bool, global_vars=None) -> Grammar:
     """Analyze and optimize the AST.
 
     This runs any static analysis we can do over the grammars and
@@ -215,6 +216,10 @@ def check_pragma(self, node):
             operator = seq[2][0][1]
             direction = seq[2][1][1]
             self.grammar.assoc[operator] = direction
+        elif pragma == '%globals':
+            for choice in node[2][0][2]:
+                for t in choice[2]:
+                    self.grammar.global_vars.add(t[1])
         else:
             self.errors.append(f'Unknown pragma "{pragma}"')
 
@@ -325,6 +330,8 @@ def _check_named_vars(self, node, labels, references):
         if node[0] == 'e_var':
             if node[1] in labels:
                 references.add(node[1])
+            elif node[1] in self.grammar.global_vars:
+                pass
             elif not node[1][0] == '$':
                 self.errors.append(f'Unknown variable "{node[1]}" referenced')
 
@@ -353,7 +360,7 @@ def rewrite_node(node):
 def _rewrite_recursion(grammar):
     """Rewrite the AST to insert leftrec and operator nodes as needed."""
     for node in grammar.ast[2]:
-        if node[0] == 'pragma':
+        if node[1][0] == '%':
             continue
         name = node[1]
         assert node[2][0][0] == 'choice'
@@ -423,6 +430,7 @@ def _check_lr(name, node, grammar, seen):
             return False
         seen.add(node[1])
         return _check_lr(name, grammar.rules[node[1]], grammar, seen)
+
     if ty == 'seq':
         for subnode in node[2]:
             if subnode[0] == 'lit':
@@ -488,15 +496,15 @@ def _rewrite_filler(grammar):
     grammar.ast[2] = [
         rule
         for rule in grammar.ast[2]
-        if rule[1] not in ('%whitespace', '%comment', '%token', '%tokens')
+        if rule[1] not in ('%whitespace', '%comment', '%globals', '%token', '%tokens')
     ]
     grammar.comment = None
     grammar.whitespace = None
     grammar.tokens = set()
     grammar.pragmas = [
         n
         for n in grammar.pragmas
-        if n[1] not in ('%whitespace', '%comment', '%token', '%tokens')
+        if n[1] not in ('%whitespace', '%comment', '%globals', '%token', '%tokens')
     ]
 
 

diff --git a/src/pyfloyd/api.py b/src/pyfloyd/api.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import NamedTuple, Optional, Protocol, Tuple
+from typing import Any, Dict, NamedTuple, Optional, Protocol, Tuple
 
 from pyfloyd import analyzer
 from pyfloyd.interpreter import Interpreter
@@ -32,7 +32,7 @@ class ParserInterface(Protocol):
     `compile()`.
     """
 
-    def parse(self, text: str, path: str = '<string>') -> Result:
+    def parse(self, text: str, path: str = '<string>', global_vars: Optional[Dict[str, Any]] = None) -> Result:
         """Parse a string and return a result.
 
         `text` is the string to parse.
@@ -84,6 +84,7 @@ def generate(
     grammar: str,
     path: str = '<string>',
     options: Optional[GeneratorOptions] = None,
+    global_vars = None
 ) -> Result:
     """Generate the source code of a parser.
 
@@ -116,11 +117,12 @@ def generate(
     """
 
     result = parser.parse(grammar, path)
+    global_vars = global_vars or {}
     if result.err:
         return result
     try:
         grammar_obj = analyzer.analyze(
-            result.val, rewrite_filler=True, rewrite_subrules=True
+            result.val, rewrite_filler=True, rewrite_subrules=True, global_vars=global_vars
         )
     except analyzer.AnalysisError as e:
         return Result(err=str(e))
@@ -141,6 +143,7 @@ def parse(
     text: str,
     grammar_path: str = '<string>',
     path: str = '<string>',
+    global_vars : Dict[str, Any] = None,
     memoize: bool = False,
 ) -> Result:
     """Match an input text against the specified grammar.
@@ -165,12 +168,13 @@ def parse(
     if result.err:
         return Result(err='Error in grammar: ' + result.err, pos=result.pos)
     assert result.parser is not None
-    return result.parser.parse(text, path)
+    return result.parser.parse(text, path, global_vars)
 
 
 def pretty_print(
     grammar: str,
     path: str = '<string>',
+    global_vars = None,
     rewrite_filler: bool = False,
     rewrite_subrules: bool = False,
 ) -> Tuple[Optional[str], Optional[str]]:
@@ -197,9 +201,11 @@ def pretty_print(
     if result.err:
         return None, result.err
 
+    global_vars = global_vars or {}
     try:
         g = analyzer.analyze(
             result.val,
+            global_vars=global_vars,
             rewrite_filler=rewrite_filler,
             rewrite_subrules=rewrite_subrules,
         )
@@ -211,6 +217,7 @@ def pretty_print(
 def dump_ast(
     grammar: str,
     path: str = '<string>',
+    global_vars = None,
     rewrite_filler: bool = False,
     rewrite_subrules: bool = False,
 ) -> Tuple[Optional[str], Optional[str]]:
@@ -225,6 +232,7 @@ def dump_ast(
     try:
         g = analyzer.analyze(
             result.val,
+            global_vars=global_vars,
             rewrite_filler=rewrite_filler,
             rewrite_subrules=rewrite_subrules,
         )

diff --git a/src/pyfloyd/interpreter.py b/src/pyfloyd/interpreter.py
@@ -47,8 +47,9 @@ def __init__(self, grammar, memoize):
         self._blocked = set()
         self._operators = {}
         self._regexps = {}
+        self._global_vars = {}
 
-    def parse(self, text: str, path: str = '<string>') -> parser.Result:
+    def parse(self, text: str, path: str = '<string>', global_vars = None) -> parser.Result:
         self._text = text
         self._path = path
         self._failed = False
@@ -58,6 +59,7 @@ def parse(self, text: str, path: str = '<string>') -> parser.Result:
         self._errstr = None
         self._errpos = 0
         self._scopes = [{}]
+        self._global_vars = global_vars or {}
 
         self._interpret(self._grammar.rules[self._grammar.starting_rule])
         if self._failed:
@@ -278,18 +280,22 @@ def _ty_e_var(self, node):
             return
 
         # Unknown variables should have been caught in analysis.
-        if node[1][0] == '$':
+        v = node[1]
+        if v[0] == '$':
             # Look up positional labels in the current scope.
-            self._succeed(self._scopes[-1][node[1]])
+            self._succeed(self._scopes[-1][v])
         else:
             # Look up named labels in any scope.
             i = len(self._scopes) - 1
             while i >= 0:
-                if node[1] in self._scopes[i]:
-                    self._succeed(self._scopes[i][node[1]])
+                if v in self._scopes[i]:
+                    self._succeed(self._scopes[i][v])
                     return
                 i -= 1
-            assert False, f'Unknown label "{node[1]}"'
+            if v in self._global_vars:
+                self._succeed(self._global_vars[v])
+                return
+            assert False, f'Unknown label "{v}"'
 
     def _ty_empty(self, node):
         del node

diff --git a/src/pyfloyd/parser.py b/src/pyfloyd/parser.py
@@ -1,4 +1,4 @@
-# Generated by pyfloyd version 0.18.0.dev0
+# Generated by pyfloyd version 0.18.4.dev0
 #    https://github.com/dpranke/pyfloyd
 #    `pyfloyd -o src/pyfloyd/parser.py --memoize -c grammars/floyd.g`
 
@@ -24,7 +24,7 @@ class Result(NamedTuple):
     pos: Optional[int] = None
 
 
-def parse(text: str, path: str = '<string>') -> Result:
+def parse(text: str, path: str = '<string>', global_vars = None) -> Result:
     """Parse a given text and return the result.
 
     If the parse was successful, `result.val` will be the returned value
@@ -39,22 +39,24 @@ def parse(text: str, path: str = '<string>') -> Result:
     messages to indicate the path to the filename containing the given
     text.
     """
-    return _Parser(text, path).parse()
+    return _Parser(text, path).parse(global_vars)
 
 
 class _Parser:
     def __init__(self, text, path):
         self._text = text
         self._end = len(self._text)
         self._errpos = 0
+        self._global_vars = {}
         self._failed = False
         self._path = path
         self._pos = 0
         self._val = None
         self._cache = {}
         self._regexps = {}
 
-    def parse(self):
+    def parse(self, global_vars=None):
+        self._global_vars = global_vars or {}
         self._r_grammar()
         if self._failed:
             return Result(None, self._err_str(), self._errpos)

diff --git a/src/pyfloyd/printer.py b/src/pyfloyd/printer.py
@@ -39,6 +39,8 @@ def _build_rules(self):
                         cs = [(node[0], '')]
                     else:
                         cs = [(' '.join(node), '')]
+                elif rule_name == '%globals':
+                    cs = [(' '.join(node), '')]
                 else:
                     assert rule_name in (
                         '%comment',
@@ -189,7 +191,7 @@ def _ty_plus(self, node):
         return self._proc(node[2][0]) + '+'
 
     def _ty_pred(self, node):
-        return '?{%s}' % self._proc(node[2][0])
+        return '?{ %s }' % self._proc(node[2][0])
 
     def _ty_range(self, node):
         return '%s..%s' % (lit.encode(node[1][0]), lit.encode(node[1][1]))

diff --git a/src/pyfloyd/python_generator.py b/src/pyfloyd/python_generator.py
@@ -417,6 +417,8 @@ def _ty_e_qual(self, node) -> Saw:
     def _ty_e_var(self, node) -> str:
         if self._current_rule in self._grammar.outer_scope_rules:
             return f"self._lookup('{node[1]}')"
+        if node[1] in self._grammar.global_vars:
+            return f"self._global_vars['{node[1]}']"
         return 'v_' + node[1].replace('$', '_')
 
     def _ty_empty(self, node) -> List[str]:
@@ -667,6 +669,7 @@ def _ty_unicat(self, node) -> List[str]:
 #    `pyfloyd {args}`
 
 {imports}
+import json
 import re
 
 # pylint: disable=too-many-lines
@@ -681,6 +684,8 @@ def main(
     opener=open,
 ) -> int:
     arg_parser = argparse.ArgumentParser()
+    arg_parser.add_argument('-D', '--define', action='append', default=[],
+                            help='Define a global var=value')
     arg_parser.add_argument('file', nargs='?')
     args = arg_parser.parse_args(argv)
 
@@ -694,8 +699,13 @@ def main(
         path = args.file
         fp = opener(path)
 
+    global_vars = {{}}
+    for d in args.define:
+        k, v = d.split('=', 1)
+        global_vars[k] = json.loads(v)
+
     msg = fp.read()
-    result = parse(msg, path)
+    result = parse(msg, path, global_vars)
     if result.err:
         print(result.err, file=stderr)
         return 1
@@ -750,7 +760,7 @@ class Result(NamedTuple):
     pos: Optional[int] = None
 
 
-def parse(text: str, path: str = '<string>') -> Result:
+def parse(text: str, path: str = '<string>', global_vars = None) -> Result:
     \"\"\"Parse a given text and return the result.
 
     If the parse was successful, `result.val` will be the returned value
@@ -765,14 +775,15 @@ def parse(text: str, path: str = '<string>') -> Result:
     messages to indicate the path to the filename containing the given
     text.
     \"\"\"
-    return _Parser(text, path).parse()
+    return _Parser(text, path).parse(global_vars)
 
 
 class _Parser:
     def __init__(self, text, path):
         self._text = text
         self._end = len(self._text)
         self._errpos = 0
+        self._global_vars = {}
         self._failed = False
         self._path = path
         self._pos = 0
@@ -781,7 +792,8 @@ def __init__(self, text, path):
 
 
 _PARSE = """\
-    def parse(self):
+    def parse(self, global_vars=None):
+        self._global_vars = global_vars or {{}}
         self._r_{starting_rule}()
         if self._failed:
             return Result(None, self._err_str(), self._errpos)
@@ -790,12 +802,13 @@ def parse(self):
 
 
 _PARSE_WITH_EXCEPTION = """\
-    def parse(self):
+    def parse(self, global_vars=None):
+        self._global_vars = global_vars or {{}}
         try:
             self._r_{starting_rule}()
             if self._failed:
-                return None, self._err_str(), self._errpos
-            return self._val, None, self._pos
+                return Result(None, self._err_str(), self._errpos)
+            return Result(self._val, None, self._pos)
         except _ParsingRuntimeError as e:  # pragma: no cover
             lineno, _ = self._err_offsets()
             return (
@@ -889,6 +902,8 @@ def _lookup(self, var):
             if var in self._scopes[l]:
                 return self._scopes[l][var]
             l -= 1
+        if var in self._global_vars:
+            return self._global_vars[var]
         assert False, f'unknown var {var}'
 
     def _memoize(self, rule_name, fn):