diff --git a/python/lsst/daf/butler/registry/queries/expressions/parser/parser.py b/python/lsst/daf/butler/registry/queries/expressions/parser/parser.py index aa69e0f30a..a58cb5ff77 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/parser/parser.py +++ b/python/lsst/daf/butler/registry/queries/expressions/parser/parser.py @@ -28,7 +28,7 @@ from __future__ import annotations from .exprTree import Node -from .parserYacc import ParserYacc # type: ignore +from .parserYacc import ParserYacc def parse_expression(expression: str) -> Node | None: diff --git a/python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py b/python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py index b09303da7f..77fe302b8d 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py +++ b/python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py @@ -25,35 +25,30 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -# type: ignore - -"""Module which defines PLY lexer for user expressions parsed by pre-flight. -""" +"""Module which defines PLY lexer for user expressions parsed by pre-flight.""" __all__ = ["ParserLex", "ParserLexError"] -# ------------------------------- -# Imports of standard modules -- -# ------------------------------- import re +from typing import Any, Protocol -# ----------------------------- -# Imports for other modules -- -# ----------------------------- from .ply import lex -# ---------------------------------- -# Local non-exported definitions -- -# ---------------------------------- - _RE_RANGE = r"(?P-?\d+)\s*\.\.\s*(?P-?\d+)(\s*:\s*(?P[1-9]\d*))?" """Regular expression to match range literal in the form NUM..NUM[:NUM], this must match t_RANGE_LITERAL docstring. """ -# ------------------------ -# Exported definitions -- -# ------------------------ + +class LexToken(Protocol): + """Protocol for LexToken defined in ``ply.lex``.""" + + value: Any + type: str + lexer: Any + lexdata: str + lexpos: int + lineno: int class ParserLexError(Exception): @@ -72,7 +67,7 @@ class ParserLexError(Exception): Current line number in the expression. """ - def __init__(self, expression, remain, pos, lineno): + def __init__(self, expression: str, remain: str, pos: int, lineno: int): Exception.__init__(self, f"Unexpected character at position {pos}") self.expression = expression self.remain = remain @@ -84,7 +79,7 @@ class ParserLex: """Class which defines PLY lexer.""" @classmethod - def make_lexer(cls, reflags=0, **kwargs): + def make_lexer(cls, reflags: int = 0, **kwargs: Any) -> Any: """Return lexer. Parameters @@ -169,19 +164,19 @@ def make_lexer(cls, reflags=0, **kwargs): t_ignore = " \t" # Define a rule so we can track line numbers - def t_newline(self, t): + def t_newline(self, t: LexToken) -> None: r"""\n+""" t.lexer.lineno += len(t.value) # quoted string prefixed with 'T' - def t_TIME_LITERAL(self, t): + def t_TIME_LITERAL(self, t: LexToken) -> LexToken: """T'.*?'""" # strip quotes t.value = t.value[2:-1] return t # quoted string - def t_STRING_LITERAL(self, t): + def t_STRING_LITERAL(self, t: LexToken) -> LexToken: """'.*?'""" # strip quotes t.value = t.value[1:-1] @@ -189,8 +184,9 @@ def t_STRING_LITERAL(self, t): # range literal in format N..M[:S], spaces allowed, see _RE_RANGE @lex.TOKEN(_RE_RANGE) - def t_RANGE_LITERAL(self, t): + def t_RANGE_LITERAL(self, t: LexToken) -> LexToken: match = re.match(_RE_RANGE, t.value) + assert match is not None, "Guaranteed by tokenization" start = int(match.group("start")) stop = int(match.group("stop")) stride = match.group("stride") @@ -200,7 +196,7 @@ def t_RANGE_LITERAL(self, t): return t # numbers are used as strings by parser, do not convert - def t_NUMERIC_LITERAL(self, t): + def t_NUMERIC_LITERAL(self, t: LexToken) -> LexToken: r"""\d+(\.\d*)?(e[-+]?\d+)? # 1, 1., 1.1, 1e10, 1.1e-10, etc. | \.\d+(e[-+]?\d+)? # .1, .1e10, .1e+10 @@ -208,13 +204,13 @@ def t_NUMERIC_LITERAL(self, t): return t # qualified identifiers have one or two dots - def t_QUALIFIED_IDENTIFIER(self, t): + def t_QUALIFIED_IDENTIFIER(self, t: LexToken) -> LexToken: r"""[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*){1,2}""" t.type = "QUALIFIED_IDENTIFIER" return t # we only support ASCII in identifier names - def t_SIMPLE_IDENTIFIER(self, t): + def t_SIMPLE_IDENTIFIER(self, t: LexToken) -> LexToken: """[a-zA-Z_][a-zA-Z0-9_]*""" # Check for reserved words and make sure they are upper case reserved = self.reserved.get(t.value.upper()) @@ -225,7 +221,7 @@ def t_SIMPLE_IDENTIFIER(self, t): t.type = "SIMPLE_IDENTIFIER" return t - def t_error(self, t): + def t_error(self, t: LexToken) -> None: """Error handling rule""" lexer = t.lexer raise ParserLexError(lexer.lexdata, t.value, lexer.lexpos, lexer.lineno) diff --git a/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py b/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py index 9e0dacf9c9..bfbfdc599c 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py +++ b/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py @@ -25,22 +25,17 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -# type: ignore +"""Syntax definition for user expression parser.""" -"""Syntax definition for user expression parser. -""" +from __future__ import annotations __all__ = ["ParserYacc", "ParserYaccError", "ParseError", "ParserEOFError"] -# ------------------------------- -# Imports of standard modules -- -# ------------------------------- +import functools import re import warnings +from typing import Any, Protocol -# ----------------------------- -# Imports for other modules -- -# ----------------------------- import astropy.time # As of astropy 4.2, the erfa interface is shipped independently and @@ -54,6 +49,7 @@ BinaryOp, Identifier, IsIn, + Node, NumericLiteral, Parens, RangeLiteral, @@ -63,12 +59,21 @@ UnaryOp, function_call, ) -from .parserLex import ParserLex +from .parserLex import LexToken, ParserLex from .ply import yacc -# ---------------------------------- -# Local non-exported definitions -- -# ---------------------------------- + +class YaccProduction(Protocol): + """Protocol for YaccProduction defined in ``ply.yacc``.""" + + lexer: Any + + def __getitem__(self, n: int) -> Any: ... + def __setitem__(self, n: int, v: Any) -> None: ... + def __len__(self) -> int: ... + def lineno(self, n: int) -> int: ... + def lexpos(self, n: int) -> int: ... + # The purpose of this regex is to guess time format if it is not explicitly # provided in the string itself @@ -91,7 +96,7 @@ ) -def _parseTimeString(time_str): +def _parseTimeString(time_str: str) -> astropy.time.Time: """Try to convert time string into astropy.Time. Parameters @@ -212,7 +217,7 @@ class ParseError(ParserYaccError): Parsing position in current line, 0-based. """ - def __init__(self, expression, token, pos, lineno): + def __init__(self, expression: str, token: str, pos: int, lineno: int): self.expression = expression self.token = token self.pos = pos @@ -222,7 +227,7 @@ def __init__(self, expression, token, pos, lineno): msg = msg.format(token, lineno, self.posInLine + 1) ParserYaccError.__init__(self, msg) - def _posInLine(self): + def _posInLine(self) -> int: """Return position in current line""" lines = self.expression.split("\n") pos = self.pos @@ -235,7 +240,7 @@ def _posInLine(self): class ParserEOFError(ParserYaccError): """Exception raised for EOF-during-parser.""" - def __init__(self): + def __init__(self) -> None: Exception.__init__(self, "End of input reached while expecting further input") @@ -247,24 +252,22 @@ class ParserYacc: Parameters ---------- - idMap : `collections.abc.Mapping` [ `str`, `Node` ], optional - Mapping that provides substitutions for identifiers in the expression. - The key in the map is the identifier name, the value is the - `exprTree.Node` instance that will replace identifier in the full - expression. If identifier does not exist in the mapping then - `Identifier` is inserted into parse tree. **kwargs Optional keyword arguments that are passed to `yacc.yacc` constructor. """ - def __init__(self, idMap=None, **kwargs): + def __init__(self, **kwargs: Any): kw = dict(write_tables=0, debug=False) kw.update(kwargs) + self.parser = self._parser_factory(**kw) - self.parser = yacc.yacc(module=self, **kw) - self._idMap = idMap or {} + @staticmethod + @functools.cache + def _parser_factory(**kwarg: Any) -> Any: + """Make parser instance.""" + return yacc.yacc(module=ParserYacc, **kwarg) - def parse(self, input, lexer=None, debug=False, tracking=False): + def parse(self, input: str, lexer: Any = None, debug: bool = False, tracking: bool = False) -> Node: """Parse input expression ad return parsed tree object. This is a trivial wrapper for yacc.LRParser.parse method which @@ -302,17 +305,20 @@ def parse(self, input, lexer=None, debug=False, tracking=False): ) # this is the starting rule - def p_input(self, p): + @classmethod + def p_input(cls, p: YaccProduction) -> None: """input : expr | empty """ p[0] = p[1] - def p_empty(self, p): + @classmethod + def p_empty(cls, p: YaccProduction) -> None: """empty :""" p[0] = None - def p_expr(self, p): + @classmethod + def p_expr(cls, p: YaccProduction) -> None: """expr : expr OR expr | expr AND expr | NOT expr @@ -325,7 +331,8 @@ def p_expr(self, p): else: p[0] = p[1] - def p_bool_primary(self, p): + @classmethod + def p_bool_primary(cls, p: YaccProduction) -> None: """bool_primary : bool_primary EQ predicate | bool_primary NE predicate | bool_primary LT predicate @@ -340,7 +347,8 @@ def p_bool_primary(self, p): else: p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) - def p_predicate(self, p): + @classmethod + def p_predicate(cls, p: YaccProduction) -> None: """predicate : bit_expr IN LPAREN literal_or_id_list RPAREN | bit_expr NOT IN LPAREN literal_or_id_list RPAREN | bit_expr @@ -352,16 +360,15 @@ def p_predicate(self, p): else: p[0] = p[1] - def p_identifier(self, p): + @classmethod + def p_identifier(cls, p: YaccProduction) -> None: """identifier : SIMPLE_IDENTIFIER | QUALIFIED_IDENTIFIER """ - node = self._idMap.get(p[1]) - if node is None: - node = Identifier(p[1]) - p[0] = node + p[0] = Identifier(p[1]) - def p_literal_or_id_list(self, p): + @classmethod + def p_literal_or_id_list(cls, p: YaccProduction) -> None: """literal_or_id_list : literal_or_id_list COMMA literal | literal_or_id_list COMMA identifier | literal @@ -372,7 +379,8 @@ def p_literal_or_id_list(self, p): else: p[0] = p[1] + [p[3]] - def p_bit_expr(self, p): + @classmethod + def p_bit_expr(cls, p: YaccProduction) -> None: """bit_expr : bit_expr ADD bit_expr | bit_expr SUB bit_expr | bit_expr MUL bit_expr @@ -385,49 +393,59 @@ def p_bit_expr(self, p): else: p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) - def p_simple_expr_lit(self, p): + @classmethod + def p_simple_expr_lit(cls, p: YaccProduction) -> None: """simple_expr : literal""" p[0] = p[1] - def p_simple_expr_id(self, p): + @classmethod + def p_simple_expr_id(cls, p: YaccProduction) -> None: """simple_expr : identifier""" p[0] = p[1] - def p_simple_expr_function_call(self, p): + @classmethod + def p_simple_expr_function_call(cls, p: YaccProduction) -> None: """simple_expr : function_call""" p[0] = p[1] - def p_simple_expr_unary(self, p): + @classmethod + def p_simple_expr_unary(cls, p: YaccProduction) -> None: """simple_expr : ADD simple_expr %prec UPLUS | SUB simple_expr %prec UMINUS """ p[0] = UnaryOp(op=p[1], operand=p[2]) - def p_simple_expr_paren(self, p): + @classmethod + def p_simple_expr_paren(cls, p: YaccProduction) -> None: """simple_expr : LPAREN expr RPAREN""" p[0] = Parens(p[2]) - def p_simple_expr_tuple(self, p): + @classmethod + def p_simple_expr_tuple(cls, p: YaccProduction) -> None: """simple_expr : LPAREN expr COMMA expr RPAREN""" # For now we only support tuples with two items, # these are used for time ranges. p[0] = TupleNode((p[2], p[4])) - def p_literal_num(self, p): + @classmethod + def p_literal_num(cls, p: YaccProduction) -> None: """literal : NUMERIC_LITERAL""" p[0] = NumericLiteral(p[1]) - def p_literal_num_signed(self, p): + @classmethod + def p_literal_num_signed(cls, p: YaccProduction) -> None: """literal : ADD NUMERIC_LITERAL %prec UPLUS | SUB NUMERIC_LITERAL %prec UMINUS """ p[0] = NumericLiteral(p[1] + p[2]) - def p_literal_str(self, p): + @classmethod + def p_literal_str(cls, p: YaccProduction) -> None: """literal : STRING_LITERAL""" p[0] = StringLiteral(p[1]) - def p_literal_time(self, p): + @classmethod + def p_literal_time(cls, p: YaccProduction) -> None: """literal : TIME_LITERAL""" try: value = _parseTimeString(p[1]) @@ -435,17 +453,20 @@ def p_literal_time(self, p): raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1)) from e p[0] = TimeLiteral(value) - def p_literal_range(self, p): + @classmethod + def p_literal_range(cls, p: YaccProduction) -> None: """literal : RANGE_LITERAL""" # RANGE_LITERAL value is tuple of three numbers start, stop, stride = p[1] p[0] = RangeLiteral(start, stop, stride) - def p_function_call(self, p): + @classmethod + def p_function_call(cls, p: YaccProduction) -> None: """function_call : SIMPLE_IDENTIFIER LPAREN expr_list RPAREN""" p[0] = function_call(p[1], p[3]) - def p_expr_list(self, p): + @classmethod + def p_expr_list(cls, p: YaccProduction) -> None: """expr_list : expr_list COMMA expr | expr | empty @@ -461,7 +482,8 @@ def p_expr_list(self, p): # ---------- end of all grammar rules ---------- # Error rule for syntax errors - def p_error(self, p): + @classmethod + def p_error(cls, p: LexToken | None) -> None: if p is None: raise ParserEOFError() else: diff --git a/tests/test_exprParserYacc.py b/tests/test_exprParserYacc.py index 2e52b5911e..7af6c8e8e0 100644 --- a/tests/test_exprParserYacc.py +++ b/tests/test_exprParserYacc.py @@ -437,45 +437,6 @@ def testExpression(self): self.assertIsInstance(tree.rhs.rhs, exprTree.StringLiteral) self.assertEqual(tree.rhs.rhs.value, "i") - def testSubstitution(self): - """Test for identifier substitution""" - # substitution is not recursive, so we can swap id2/id3 - idMap = { - "id1": exprTree.StringLiteral("id1 value"), - "id2": exprTree.Identifier("id3"), - "id3": exprTree.Identifier("id2"), - "POINT": exprTree.StringLiteral("not used"), - "OR": exprTree.StringLiteral("not used"), - } - parser = ParserYacc(idMap=idMap) - - expression = "id1 = 'v'" - tree = parser.parse(expression) - self.assertIsInstance(tree, exprTree.BinaryOp) - self.assertEqual(tree.op, "=") - self.assertIsInstance(tree.lhs, exprTree.StringLiteral) - self.assertEqual(tree.lhs.value, "id1 value") - - expression = "id2 - id3" - tree = parser.parse(expression) - self.assertIsInstance(tree, exprTree.BinaryOp) - self.assertEqual(tree.op, "-") - self.assertIsInstance(tree.lhs, exprTree.Identifier) - self.assertEqual(tree.lhs.name, "id3") - self.assertIsInstance(tree.rhs, exprTree.Identifier) - self.assertEqual(tree.rhs.name, "id2") - - # reserved words are not substituted - expression = "id2 OR id3" - tree = parser.parse(expression) - self.assertIsInstance(tree, exprTree.BinaryOp) - self.assertEqual(tree.op, "OR") - - # function names are not substituted - expression = "POINT(1, 2)" - tree = parser.parse(expression) - self.assertIsInstance(tree, exprTree.PointNode) - def testException(self): """Test for exceptional cases"""