Skip to content

Commit

Permalink
First pass at supporting global vars.
Browse files Browse the repository at this point in the history
Everything works in the interpreter and the python generator, but
not the javascript generator yet. This is roughly feature parity
with the recent changes in glop.

This adds a new pragma, '%globals' so that a grammar can explicitly
declare the globals that it should be looking for. We should add
a check in parse that every expected global is being passed in.

Also, we still need to add new tests to cover this functionality.

Updates the JSON5 grammar to support the `_strict` option as requested
over in pyjson5-land.
  • Loading branch information
dpranke committed Nov 18, 2024
1 parent 161b14e commit 6c544d7
Show file tree
Hide file tree
Showing 10 changed files with 123 additions and 55 deletions.
4 changes: 4 additions & 0 deletions grammars/json5.g
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

%tokens = ident num_literal string

%globals = _strict

grammar = value end -> $1

value = 'null' -> null
Expand All @@ -36,10 +38,12 @@ string = squote sqchar* squote -> cat($2)
sqchar = bslash esc_char -> $2
| bslash eol -> ''
| ~bslash ~squote ~eol any -> $4
| ~(?{ _strict }) '\x00'..'\x1f'

dqchar = bslash esc_char -> $2
| bslash eol -> ''
| ~bslash ~dquote ~eol any -> $4
| ~(?{ _strict }) '\x00'..'\x1f'

bslash = '\\'

Expand Down
16 changes: 12 additions & 4 deletions src/pyfloyd/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def __init__(self, ast):
self.operators = {}
self.leftrec_rules = set()
self.outer_scope_rules = set()
self.global_vars = set()

has_starting_rule = False
for n in self.ast[2]:
Expand Down Expand Up @@ -87,7 +88,7 @@ def __init__(self):
self.choices = {}


def analyze(ast, rewrite_filler: bool, rewrite_subrules: bool) -> Grammar:
def analyze(ast, rewrite_filler: bool, rewrite_subrules: bool, global_vars=None) -> Grammar:
"""Analyze and optimize the AST.
This runs any static analysis we can do over the grammars and
Expand Down Expand Up @@ -215,6 +216,10 @@ def check_pragma(self, node):
operator = seq[2][0][1]
direction = seq[2][1][1]
self.grammar.assoc[operator] = direction
elif pragma == '%globals':
for choice in node[2][0][2]:
for t in choice[2]:
self.grammar.global_vars.add(t[1])
else:
self.errors.append(f'Unknown pragma "{pragma}"')

Expand Down Expand Up @@ -325,6 +330,8 @@ def _check_named_vars(self, node, labels, references):
if node[0] == 'e_var':
if node[1] in labels:
references.add(node[1])
elif node[1] in self.grammar.global_vars:
pass
elif not node[1][0] == '$':
self.errors.append(f'Unknown variable "{node[1]}" referenced')

Expand Down Expand Up @@ -353,7 +360,7 @@ def rewrite_node(node):
def _rewrite_recursion(grammar):
"""Rewrite the AST to insert leftrec and operator nodes as needed."""
for node in grammar.ast[2]:
if node[0] == 'pragma':
if node[1][0] == '%':
continue
name = node[1]
assert node[2][0][0] == 'choice'
Expand Down Expand Up @@ -423,6 +430,7 @@ def _check_lr(name, node, grammar, seen):
return False
seen.add(node[1])
return _check_lr(name, grammar.rules[node[1]], grammar, seen)

if ty == 'seq':
for subnode in node[2]:
if subnode[0] == 'lit':
Expand Down Expand Up @@ -488,15 +496,15 @@ def _rewrite_filler(grammar):
grammar.ast[2] = [
rule
for rule in grammar.ast[2]
if rule[1] not in ('%whitespace', '%comment', '%token', '%tokens')
if rule[1] not in ('%whitespace', '%comment', '%globals', '%token', '%tokens')
]
grammar.comment = None
grammar.whitespace = None
grammar.tokens = set()
grammar.pragmas = [
n
for n in grammar.pragmas
if n[1] not in ('%whitespace', '%comment', '%token', '%tokens')
if n[1] not in ('%whitespace', '%comment', '%globals', '%token', '%tokens')
]


Expand Down
16 changes: 12 additions & 4 deletions src/pyfloyd/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import NamedTuple, Optional, Protocol, Tuple
from typing import Any, Dict, NamedTuple, Optional, Protocol, Tuple

from pyfloyd import analyzer
from pyfloyd.interpreter import Interpreter
Expand All @@ -32,7 +32,7 @@ class ParserInterface(Protocol):
`compile()`.
"""

def parse(self, text: str, path: str = '<string>') -> Result:
def parse(self, text: str, path: str = '<string>', global_vars: Optional[Dict[str, Any]] = None) -> Result:
"""Parse a string and return a result.
`text` is the string to parse.
Expand Down Expand Up @@ -84,6 +84,7 @@ def generate(
grammar: str,
path: str = '<string>',
options: Optional[GeneratorOptions] = None,
global_vars = None
) -> Result:
"""Generate the source code of a parser.
Expand Down Expand Up @@ -116,11 +117,12 @@ def generate(
"""

result = parser.parse(grammar, path)
global_vars = global_vars or {}
if result.err:
return result
try:
grammar_obj = analyzer.analyze(
result.val, rewrite_filler=True, rewrite_subrules=True
result.val, rewrite_filler=True, rewrite_subrules=True, global_vars=global_vars
)
except analyzer.AnalysisError as e:
return Result(err=str(e))
Expand All @@ -141,6 +143,7 @@ def parse(
text: str,
grammar_path: str = '<string>',
path: str = '<string>',
global_vars : Dict[str, Any] = None,
memoize: bool = False,
) -> Result:
"""Match an input text against the specified grammar.
Expand All @@ -165,12 +168,13 @@ def parse(
if result.err:
return Result(err='Error in grammar: ' + result.err, pos=result.pos)
assert result.parser is not None
return result.parser.parse(text, path)
return result.parser.parse(text, path, global_vars)


def pretty_print(
grammar: str,
path: str = '<string>',
global_vars = None,
rewrite_filler: bool = False,
rewrite_subrules: bool = False,
) -> Tuple[Optional[str], Optional[str]]:
Expand All @@ -197,9 +201,11 @@ def pretty_print(
if result.err:
return None, result.err

global_vars = global_vars or {}
try:
g = analyzer.analyze(
result.val,
global_vars=global_vars,
rewrite_filler=rewrite_filler,
rewrite_subrules=rewrite_subrules,
)
Expand All @@ -211,6 +217,7 @@ def pretty_print(
def dump_ast(
grammar: str,
path: str = '<string>',
global_vars = None,
rewrite_filler: bool = False,
rewrite_subrules: bool = False,
) -> Tuple[Optional[str], Optional[str]]:
Expand All @@ -225,6 +232,7 @@ def dump_ast(
try:
g = analyzer.analyze(
result.val,
global_vars=global_vars,
rewrite_filler=rewrite_filler,
rewrite_subrules=rewrite_subrules,
)
Expand Down
18 changes: 12 additions & 6 deletions src/pyfloyd/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ def __init__(self, grammar, memoize):
self._blocked = set()
self._operators = {}
self._regexps = {}
self._global_vars = {}

def parse(self, text: str, path: str = '<string>') -> parser.Result:
def parse(self, text: str, path: str = '<string>', global_vars = None) -> parser.Result:
self._text = text
self._path = path
self._failed = False
Expand All @@ -58,6 +59,7 @@ def parse(self, text: str, path: str = '<string>') -> parser.Result:
self._errstr = None
self._errpos = 0
self._scopes = [{}]
self._global_vars = global_vars or {}

self._interpret(self._grammar.rules[self._grammar.starting_rule])
if self._failed:
Expand Down Expand Up @@ -278,18 +280,22 @@ def _ty_e_var(self, node):
return

# Unknown variables should have been caught in analysis.
if node[1][0] == '$':
v = node[1]
if v[0] == '$':
# Look up positional labels in the current scope.
self._succeed(self._scopes[-1][node[1]])
self._succeed(self._scopes[-1][v])
else:
# Look up named labels in any scope.
i = len(self._scopes) - 1
while i >= 0:
if node[1] in self._scopes[i]:
self._succeed(self._scopes[i][node[1]])
if v in self._scopes[i]:
self._succeed(self._scopes[i][v])
return
i -= 1
assert False, f'Unknown label "{node[1]}"'
if v in self._global_vars:
self._succeed(self._global_vars[v])
return
assert False, f'Unknown label "{v}"'

def _ty_empty(self, node):
del node
Expand Down
10 changes: 6 additions & 4 deletions src/pyfloyd/parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by pyfloyd version 0.18.0.dev0
# Generated by pyfloyd version 0.18.4.dev0
# https://github.com/dpranke/pyfloyd
# `pyfloyd -o src/pyfloyd/parser.py --memoize -c grammars/floyd.g`

Expand All @@ -24,7 +24,7 @@ class Result(NamedTuple):
pos: Optional[int] = None


def parse(text: str, path: str = '<string>') -> Result:
def parse(text: str, path: str = '<string>', global_vars = None) -> Result:
"""Parse a given text and return the result.
If the parse was successful, `result.val` will be the returned value
Expand All @@ -39,22 +39,24 @@ def parse(text: str, path: str = '<string>') -> Result:
messages to indicate the path to the filename containing the given
text.
"""
return _Parser(text, path).parse()
return _Parser(text, path).parse(global_vars)


class _Parser:
def __init__(self, text, path):
self._text = text
self._end = len(self._text)
self._errpos = 0
self._global_vars = {}
self._failed = False
self._path = path
self._pos = 0
self._val = None
self._cache = {}
self._regexps = {}

def parse(self):
def parse(self, global_vars=None):
self._global_vars = global_vars or {}
self._r_grammar()
if self._failed:
return Result(None, self._err_str(), self._errpos)
Expand Down
4 changes: 3 additions & 1 deletion src/pyfloyd/printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def _build_rules(self):
cs = [(node[0], '')]
else:
cs = [(' '.join(node), '')]
elif rule_name == '%globals':
cs = [(' '.join(node), '')]
else:
assert rule_name in (
'%comment',
Expand Down Expand Up @@ -189,7 +191,7 @@ def _ty_plus(self, node):
return self._proc(node[2][0]) + '+'

def _ty_pred(self, node):
return '?{%s}' % self._proc(node[2][0])
return '?{ %s }' % self._proc(node[2][0])

def _ty_range(self, node):
return '%s..%s' % (lit.encode(node[1][0]), lit.encode(node[1][1]))
Expand Down
29 changes: 22 additions & 7 deletions src/pyfloyd/python_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,8 @@ def _ty_e_qual(self, node) -> Saw:
def _ty_e_var(self, node) -> str:
if self._current_rule in self._grammar.outer_scope_rules:
return f"self._lookup('{node[1]}')"
if node[1] in self._grammar.global_vars:
return f"self._global_vars['{node[1]}']"
return 'v_' + node[1].replace('$', '_')

def _ty_empty(self, node) -> List[str]:
Expand Down Expand Up @@ -667,6 +669,7 @@ def _ty_unicat(self, node) -> List[str]:
# `pyfloyd {args}`
{imports}
import json
import re
# pylint: disable=too-many-lines
Expand All @@ -681,6 +684,8 @@ def main(
opener=open,
) -> int:
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-D', '--define', action='append', default=[],
help='Define a global var=value')
arg_parser.add_argument('file', nargs='?')
args = arg_parser.parse_args(argv)
Expand All @@ -694,8 +699,13 @@ def main(
path = args.file
fp = opener(path)
global_vars = {{}}
for d in args.define:
k, v = d.split('=', 1)
global_vars[k] = json.loads(v)
msg = fp.read()
result = parse(msg, path)
result = parse(msg, path, global_vars)
if result.err:
print(result.err, file=stderr)
return 1
Expand Down Expand Up @@ -750,7 +760,7 @@ class Result(NamedTuple):
pos: Optional[int] = None
def parse(text: str, path: str = '<string>') -> Result:
def parse(text: str, path: str = '<string>', global_vars = None) -> Result:
\"\"\"Parse a given text and return the result.
If the parse was successful, `result.val` will be the returned value
Expand All @@ -765,14 +775,15 @@ def parse(text: str, path: str = '<string>') -> Result:
messages to indicate the path to the filename containing the given
text.
\"\"\"
return _Parser(text, path).parse()
return _Parser(text, path).parse(global_vars)
class _Parser:
def __init__(self, text, path):
self._text = text
self._end = len(self._text)
self._errpos = 0
self._global_vars = {}
self._failed = False
self._path = path
self._pos = 0
Expand All @@ -781,7 +792,8 @@ def __init__(self, text, path):


_PARSE = """\
def parse(self):
def parse(self, global_vars=None):
self._global_vars = global_vars or {{}}
self._r_{starting_rule}()
if self._failed:
return Result(None, self._err_str(), self._errpos)
Expand All @@ -790,12 +802,13 @@ def parse(self):


_PARSE_WITH_EXCEPTION = """\
def parse(self):
def parse(self, global_vars=None):
self._global_vars = global_vars or {{}}
try:
self._r_{starting_rule}()
if self._failed:
return None, self._err_str(), self._errpos
return self._val, None, self._pos
return Result(None, self._err_str(), self._errpos)
return Result(self._val, None, self._pos)
except _ParsingRuntimeError as e: # pragma: no cover
lineno, _ = self._err_offsets()
return (
Expand Down Expand Up @@ -889,6 +902,8 @@ def _lookup(self, var):
if var in self._scopes[l]:
return self._scopes[l][var]
l -= 1
if var in self._global_vars:
return self._global_vars[var]
assert False, f'unknown var {var}'
def _memoize(self, rule_name, fn):
Expand Down
Loading

0 comments on commit 6c544d7

Please sign in to comment.