diff --git a/src/main/java/org/perlonjava/parser/ListParser.java b/src/main/java/org/perlonjava/parser/ListParser.java index 8a12c7a..149e7d8 100644 --- a/src/main/java/org/perlonjava/parser/ListParser.java +++ b/src/main/java/org/perlonjava/parser/ListParser.java @@ -16,7 +16,7 @@ public class ListParser { // Comma is allowed after the argument: rand, rand 10, // static ListNode parseZeroOrOneList(Parser parser, int minItems) { - if (parser.looksLikeEmptyList()) { + if (looksLikeEmptyList(parser)) { // return an empty list if (minItems > 0) { throw new PerlCompilerException(parser.tokenIndex, "Syntax error", parser.ctx.errorUtil); @@ -25,11 +25,11 @@ static ListNode parseZeroOrOneList(Parser parser, int minItems) { } ListNode expr; - LexerToken token = parser.peek(); + LexerToken token = TokenUtils.peek(parser); if (token.text.equals("(")) { // argument in parentheses, can be 0 or 1 argument: rand(), rand(10) // Commas are allowed after the single argument: rand(10,) - parser.consume(); + TokenUtils.consume(parser); expr = new ListNode(parseList(parser, ")", 0), parser.tokenIndex); if (expr.elements.size() > 1) { throw new PerlCompilerException(parser.tokenIndex, "Syntax error", parser.ctx.errorUtil); @@ -71,20 +71,20 @@ static ListNode parseZeroOrMoreList(Parser parser, int minItems, boolean wantBlo if (wantRegex) { boolean matched = false; - if (parser.peek().text.equals("(")) { - parser.consume(); + if (TokenUtils.peek(parser).text.equals("(")) { + TokenUtils.consume(parser); hasParen = true; } - if (parser.peek().text.equals("/") || parser.peek().text.equals("//")) { - parser.consume(); + if (TokenUtils.peek(parser).text.equals("/") || TokenUtils.peek(parser).text.equals("//")) { + TokenUtils.consume(parser); Node regex = StringParser.parseRawString(parser, "/"); if (regex != null) { matched = true; expr.elements.add(regex); - token = parser.peek(); + token = TokenUtils.peek(parser); if (token.type != LexerTokenType.EOF && !Parser.LIST_TERMINATORS.contains(token.text)) { // consume comma - parser.consume(LexerTokenType.OPERATOR, ","); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, ","); } } } @@ -96,8 +96,8 @@ static ListNode parseZeroOrMoreList(Parser parser, int minItems, boolean wantBlo } if (wantFileHandle) { - if (parser.peek().text.equals("(")) { - parser.consume(); + if (TokenUtils.peek(parser).text.equals("(")) { + TokenUtils.consume(parser); hasParen = true; } expr.handle = parser.parseFileHandle(); @@ -109,37 +109,37 @@ static ListNode parseZeroOrMoreList(Parser parser, int minItems, boolean wantBlo } if (wantBlockNode) { - if (parser.peek().text.equals("(")) { - parser.consume(); + if (TokenUtils.peek(parser).text.equals("(")) { + TokenUtils.consume(parser); hasParen = true; } - if (parser.peek().text.equals("{")) { - parser.consume(); + if (TokenUtils.peek(parser).text.equals("{")) { + TokenUtils.consume(parser); expr.handle = parser.parseBlock(); - parser.consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); } - if (!parser.isSpaceAfterPrintBlock() || parser.looksLikeEmptyList()) { + if (!parser.isSpaceAfterPrintBlock() || looksLikeEmptyList(parser)) { throw new PerlCompilerException(parser.tokenIndex, "Syntax error", parser.ctx.errorUtil); } } - if (!parser.looksLikeEmptyList()) { + if (!looksLikeEmptyList(parser)) { // it doesn't look like an empty list - token = parser.peek(); + token = TokenUtils.peek(parser); if (obeyParentheses && token.text.equals("(")) { // arguments in parentheses, can be 0 or more arguments: print(), print(10) // Commas are allowed after the arguments: print(10,) - parser.consume(); + TokenUtils.consume(parser); expr.elements.addAll(parseList(parser, ")", 0)); } else { while (token.type != LexerTokenType.EOF && !Parser.LIST_TERMINATORS.contains(token.text)) { // Argument without parentheses expr.elements.add(parser.parseExpression(parser.getPrecedence(","))); - token = parser.peek(); + token = TokenUtils.peek(parser); if (token.text.equals(",") || token.text.equals("=>")) { while (token.text.equals(",") || token.text.equals("=>")) { - parser.consume(); - token = parser.peek(); + TokenUtils.consume(parser); + token = TokenUtils.peek(parser); } } else { break; @@ -149,7 +149,7 @@ static ListNode parseZeroOrMoreList(Parser parser, int minItems, boolean wantBlo } if (hasParen) { - parser.consume(LexerTokenType.OPERATOR, ")"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, ")"); } parser.ctx.logDebug("parseZeroOrMoreList end: " + expr); @@ -174,16 +174,16 @@ static List parseList(Parser parser, String close, int minItems) { parser.ctx.logDebug("parseList start"); ListNode expr; - LexerToken token = parser.peek(); + LexerToken token = TokenUtils.peek(parser); parser.ctx.logDebug("parseList start at " + token); if (token.text.equals(close)) { // empty list - parser.consume(); + TokenUtils.consume(parser); expr = new ListNode(parser.tokenIndex); } else { expr = ListNode.makeList(parser.parseExpression(0)); - parser.ctx.logDebug("parseList end at " + parser.peek()); - parser.consume(LexerTokenType.OPERATOR, close); + parser.ctx.logDebug("parseList end at " + TokenUtils.peek(parser)); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, close); } if (expr.elements.size() < minItems) { @@ -193,4 +193,43 @@ static List parseList(Parser parser, String close, int minItems) { return expr.elements; } + + public static boolean looksLikeEmptyList(Parser parser) { + boolean isEmptyList = false; + int previousIndex = parser.tokenIndex; + LexerToken token = TokenUtils.consume(parser); + LexerToken token1 = parser.tokens.get(parser.tokenIndex); // next token including spaces + LexerToken nextToken = TokenUtils.peek(parser); // after spaces + + if (token.type == LexerTokenType.EOF || Parser.LIST_TERMINATORS.contains(token.text)) { + isEmptyList = true; + } else if (token.text.equals("-") + && token1.type == LexerTokenType.IDENTIFIER + && token1.text.length() == 1) { + // -d, -e, -f, -l, -p, -x + isEmptyList = false; + } else if (Parser.INFIX_OP.contains(token.text) || token.text.equals(",")) { + // tokenIndex++; + parser.ctx.logDebug("parseZeroOrMoreList infix `" + token.text + "` followed by `" + nextToken.text + "`"); + if (token.text.equals("&")) { + // looks like a subroutine call, not an infix `&` + parser.ctx.logDebug("parseZeroOrMoreList looks like subroutine call"); + } else if (token.text.equals("%") && (nextToken.text.equals("$") || nextToken.type == LexerTokenType.IDENTIFIER)) { + // looks like a hash deref, not an infix `%` + parser.ctx.logDebug("parseZeroOrMoreList looks like Hash"); + } else if (token.text.equals(".") && token1.type == LexerTokenType.NUMBER) { + // looks like a fractional number, not an infix `.` + parser.ctx.logDebug("parseZeroOrMoreList looks like Number"); + } else { + // subroutine call with zero arguments, followed by infix operator: `pos = 3` + parser.ctx.logDebug("parseZeroOrMoreList return zero at `" + parser.tokens.get(parser.tokenIndex) + "`"); + // if (LVALUE_INFIX_OP.contains(token.text)) { + // throw new PerlCompilerException(tokenIndex, "Can't modify non-lvalue subroutine call", ctx.errorUtil); + // } + isEmptyList = true; + } + } + parser.tokenIndex = previousIndex; + return isEmptyList; + } } diff --git a/src/main/java/org/perlonjava/parser/NumberParser.java b/src/main/java/org/perlonjava/parser/NumberParser.java index bc38eb6..a0d5aa4 100644 --- a/src/main/java/org/perlonjava/parser/NumberParser.java +++ b/src/main/java/org/perlonjava/parser/NumberParser.java @@ -30,12 +30,12 @@ public static NumberNode parseNumber(Parser parser, LexerToken token) { char secondChar = letter.charAt(0); if (secondChar == 'b' || secondChar == 'B') { // Binary number: 0b... - parser.consume(); + TokenUtils.consume(parser); int num = Integer.parseInt(letter.substring(1), 2); return new NumberNode(Integer.toString(num), parser.tokenIndex); } else if (secondChar == 'x' || secondChar == 'X') { // Hexadecimal number: 0x... - parser.consume(); + TokenUtils.consume(parser); int num = Integer.parseInt(letter.substring(1), 16); return new NumberNode(Integer.toString(num), parser.tokenIndex); } @@ -48,9 +48,9 @@ public static NumberNode parseNumber(Parser parser, LexerToken token) { // Check for fractional part if (parser.tokens.get(parser.tokenIndex).text.equals(".")) { - number.append(parser.consume().text); // consume '.' + number.append(TokenUtils.consume(parser).text); // consume '.' if (parser.tokens.get(parser.tokenIndex).type == LexerTokenType.NUMBER) { - number.append(parser.consume().text); // consume digits after '.' + number.append(TokenUtils.consume(parser).text); // consume digits after '.' } } // Check for exponent part @@ -89,9 +89,9 @@ public static Node parseFractionalNumber(Parser parser) { */ public static void checkNumberExponent(Parser parser, StringBuilder number) { // Check for exponent part - String exponentPart = parser.peek().text; + String exponentPart = TokenUtils.peek(parser).text; if (exponentPart.startsWith("e") || exponentPart.startsWith("E")) { - parser.consume(); // consume 'e' or 'E' and possibly more 'E10' + TokenUtils.consume(parser); // consume 'e' or 'E' and possibly more 'E10' // Check if the rest of the token contains digits (e.g., "E10") int index = 1; @@ -106,11 +106,11 @@ public static void checkNumberExponent(Parser parser, StringBuilder number) { if (index == 1) { // Check for optional sign if (parser.tokens.get(parser.tokenIndex).text.equals("-") || parser.tokens.get(parser.tokenIndex).text.equals("+")) { - number.append(parser.consume().text); // consume '-' or '+' + number.append(TokenUtils.consume(parser).text); // consume '-' or '+' } // Consume exponent digits - number.append(parser.consume(LexerTokenType.NUMBER).text); + number.append(TokenUtils.consume(parser, LexerTokenType.NUMBER).text); } } } diff --git a/src/main/java/org/perlonjava/parser/OperatorParser.java b/src/main/java/org/perlonjava/parser/OperatorParser.java index a8d3516..72cb08f 100644 --- a/src/main/java/org/perlonjava/parser/OperatorParser.java +++ b/src/main/java/org/perlonjava/parser/OperatorParser.java @@ -48,7 +48,7 @@ static BinaryOperatorNode parseMapGrepSort(Parser parser, LexerToken token) { static Node parseRequire(Parser parser) { LexerToken token; // Handle 'require' keyword which can be followed by a version, bareword or filename - token = parser.peek(); + token = TokenUtils.peek(parser); Node operand; if (token.type == LexerTokenType.IDENTIFIER) { // TODO `require` version @@ -78,11 +78,11 @@ static Node parseDoOperator(Parser parser) { LexerToken token; Node block; // Handle 'do' keyword which can be followed by a block or filename - token = parser.peek(); + token = TokenUtils.peek(parser); if (token.type == LexerTokenType.OPERATOR && token.text.equals("{")) { - parser.consume(LexerTokenType.OPERATOR, "{"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "{"); block = parser.parseBlock(); - parser.consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); return block; } // `do` file @@ -101,12 +101,12 @@ static AbstractNode parseEval(Parser parser) { Node operand; LexerToken token; // Handle 'eval' keyword which can be followed by a block or an expression - token = parser.peek(); + token = TokenUtils.peek(parser); if (token.type == LexerTokenType.OPERATOR && token.text.equals("{")) { // If the next token is '{', parse a block - parser.consume(LexerTokenType.OPERATOR, "{"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "{"); block = parser.parseBlock(); - parser.consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); // transform: eval { 123 } // into: sub { 123 }->() with useTryCatch flag return new BinaryOperatorNode("->", @@ -146,7 +146,7 @@ static Node parseDiamondOperator(Parser parser, LexerToken token) { // Check if the next token is a closing angle bracket if (parser.tokens.get(parser.tokenIndex).text.equals(">")) { - parser.consume(); // Consume the '>' token + TokenUtils.consume(parser); // Consume the '>' token // Return a BinaryOperatorNode representing a readline operation return new BinaryOperatorNode("readline", var, @@ -165,7 +165,7 @@ static Node parseDiamondOperator(Parser parser, LexerToken token) { // Check if the next token is a closing angle bracket if (parser.tokens.get(parser.tokenIndex).text.equals(">")) { - parser.consume(); // Consume the '>' token + TokenUtils.consume(parser); // Consume the '>' token // Return a BinaryOperatorNode representing a readline operation return new BinaryOperatorNode("readline", new IdentifierNode("main::" + tokenText, currentTokenIndex), diff --git a/src/main/java/org/perlonjava/parser/Parser.java b/src/main/java/org/perlonjava/parser/Parser.java index 5e5060c..1e0e848 100644 --- a/src/main/java/org/perlonjava/parser/Parser.java +++ b/src/main/java/org/perlonjava/parser/Parser.java @@ -15,7 +15,7 @@ public class Parser { Set.of(":", ";", ")", "}", "]", "if", "unless", "while", "until", "for", "foreach", "when"); public static final Set LIST_TERMINATORS = Set.of(":", ";", ")", "}", "]", "if", "unless", "while", "until", "for", "foreach", "when", "not", "and", "or"); - private static final Set INFIX_OP = Set.of( + public static final Set INFIX_OP = Set.of( "or", "xor", "and", "||", "//", "&&", "|", "^", "&", "==", "!=", "<=>", "eq", "ne", "cmp", "<", ">", "<=", ">=", "lt", "gt", "le", "ge", "<<", ">>", "+", "-", "*", @@ -95,15 +95,15 @@ public BlockNode parseBlock() { int currentIndex = tokenIndex; ctx.symbolTable.enterScope(); List statements = new ArrayList<>(); - LexerToken token = peek(); + LexerToken token = TokenUtils.peek(this); while (token.type != LexerTokenType.EOF && !(token.type == LexerTokenType.OPERATOR && token.text.equals("}"))) { if (token.text.equals(";")) { - consume(); + TokenUtils.consume(this); } else { statements.add(parseStatement()); } - token = peek(); + token = TokenUtils.peek(this); } if (statements.isEmpty()) { statements.add(new ListNode(tokenIndex)); @@ -114,17 +114,17 @@ public BlockNode parseBlock() { public Node parseStatement() { int currentIndex = tokenIndex; - LexerToken token = peek(); + LexerToken token = TokenUtils.peek(this); ctx.logDebug("parseStatement `" + token.text + "`"); // check for label: String label = null; if (token.type == LexerTokenType.IDENTIFIER) { - String id = consume().text; - if (peek().text.equals(":")) { + String id = TokenUtils.consume(this).text; + if (TokenUtils.peek(this).text.equals(":")) { label = id; - consume(); - token = peek(); + TokenUtils.consume(this); + token = TokenUtils.peek(this); } else { tokenIndex = currentIndex; // backtrack } @@ -149,7 +149,7 @@ public Node parseStatement() { case "sub": // Must be followed by an identifier tokenIndex++; - if (peek().type == LexerTokenType.IDENTIFIER) { + if (TokenUtils.peek(this).type == LexerTokenType.IDENTIFIER) { return SubroutineParser.parseSubroutineDefinition(this, true); } // otherwise backtrack @@ -159,40 +159,40 @@ public Node parseStatement() { if (token.type == LexerTokenType.OPERATOR) { switch (token.text) { case "...": - consume(); + TokenUtils.consume(this); return new OperatorNode( "die", new StringNode("Unimplemented", tokenIndex), tokenIndex); case "{": if (!isHashLiteral()) { // bare-block - consume(LexerTokenType.OPERATOR, "{"); + TokenUtils.consume(this, LexerTokenType.OPERATOR, "{"); BlockNode block = parseBlock(); block.isLoop = true; block.labelName = label; - consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(this, LexerTokenType.OPERATOR, "}"); return block; } } } Node expression = parseExpression(0); - token = peek(); + token = TokenUtils.peek(this); if (token.type == LexerTokenType.IDENTIFIER) { // statement modifier: if, for ... switch (token.text) { case "if": - consume(); + TokenUtils.consume(this); Node modifierExpression = parseExpression(0); parseStatementTerminator(); return new BinaryOperatorNode("&&", modifierExpression, expression, tokenIndex); case "unless": - consume(); + TokenUtils.consume(this); modifierExpression = parseExpression(0); parseStatementTerminator(); return new BinaryOperatorNode("||", modifierExpression, expression, tokenIndex); case "for": case "foreach": - consume(); + TokenUtils.consume(this); modifierExpression = parseExpression(0); parseStatementTerminator(); return new For1Node( @@ -205,7 +205,7 @@ public Node parseStatement() { tokenIndex); case "while": case "until": - consume(); + TokenUtils.consume(this); modifierExpression = parseExpression(0); parseStatementTerminator(); if (token.text.equals("until")) { @@ -232,12 +232,12 @@ public Node parseStatement() { } public void parseStatementTerminator() { - LexerToken token = peek(); + LexerToken token = TokenUtils.peek(this); if (token.type != LexerTokenType.EOF && !token.text.equals("}") && !token.text.equals(";")) { throw new PerlCompilerException(tokenIndex, "Syntax error", ctx.errorUtil); } if (token.text.equals(";")) { - consume(); + TokenUtils.consume(this); } } @@ -246,12 +246,12 @@ public boolean isHashLiteral() { int currentIndex = tokenIndex; // Start after the opening '{' - consume(LexerTokenType.OPERATOR, "{"); + TokenUtils.consume(this, LexerTokenType.OPERATOR, "{"); int braceCount = 1; // Track nested braces bracesLoop: while (braceCount > 0) { - LexerToken token = consume(); + LexerToken token = TokenUtils.consume(this); ctx.logDebug("isHashLiteral " + token + " braceCount:" + braceCount); if (token.type == LexerTokenType.EOF) { break; // not a hash literal; @@ -306,7 +306,7 @@ public Node parseExpression(int precedence) { // Continuously process tokens until we reach the end of the expression. while (true) { // Peek at the next token to determine what to do next. - LexerToken token = peek(); + LexerToken token = TokenUtils.peek(this); // Check if we have reached the end of the input (EOF) or a terminator (like `;`). if (token.type == LexerTokenType.EOF || TERMINATORS.contains(token.text)) { @@ -625,12 +625,12 @@ private OperatorNode parseOperatorWithOneOptionalArgument(LexerToken token) { public Node parsePrimary() { int startIndex = tokenIndex; - LexerToken token = consume(); // Consume the next token from the input + LexerToken token = TokenUtils.consume(this); // Consume the next token from the input Node operand; switch (token.type) { case IDENTIFIER: - LexerToken nextToken = peek(); + LexerToken nextToken = TokenUtils.peek(this); if (nextToken.text.equals("=>")) { // Autoquote return new StringNode(token.text, tokenIndex); @@ -638,8 +638,8 @@ public Node parsePrimary() { // Try to parse a builtin operation; backtrack if it fails if (token.text.equals("CORE") && nextToken.text.equals("::")) { - consume(); // "::" - token = consume(); // operator + TokenUtils.consume(this); // "::" + token = TokenUtils.consume(this); // operator } Node operation = parseCoreOperator(token); if (operation != null) { @@ -707,10 +707,10 @@ public Node parsePrimary() { // Handle `-d` String operator = "-" + nextToken.text; tokenIndex++; - nextToken = peek(); + nextToken = TokenUtils.peek(this); if (nextToken.text.equals("_")) { // Handle `-f _` - consume(); + TokenUtils.consume(this); operand = new IdentifierNode("_", tokenIndex); } else { operand = parseExpression(getPrecedence("-d") + 1); @@ -747,7 +747,7 @@ public Node parseVariable(String sigil) { if (varName != null) { // Variable name is valid. // Check for illegal characters after a variable - if (peek().text.equals("(") && !sigil.equals("&") && !parsingForLoopVariable) { + if (TokenUtils.peek(this).text.equals("(") && !sigil.equals("&") && !parsingForLoopVariable) { // Parentheses are only allowed after a variable in specific cases: // - `for my $v (...` // - `&name(...` @@ -759,17 +759,17 @@ public Node parseVariable(String sigil) { Node opNode = new OperatorNode(sigil, new IdentifierNode(varName, tokenIndex), tokenIndex); // Handle auto-call: transform `&subr` to `&subr(@_)` - if (!peek().text.equals("(") && sigil.equals("&") && !parsingTakeReference) { + if (!TokenUtils.peek(this).text.equals("(") && sigil.equals("&") && !parsingTakeReference) { Node list = new OperatorNode("@", new IdentifierNode("_", tokenIndex), tokenIndex); return new BinaryOperatorNode("(", opNode, list, tokenIndex); } return opNode; - } else if (peek().text.equals("{")) { + } else if (TokenUtils.peek(this).text.equals("{")) { // Handle curly brackets to parse a nested expression `${v}` - consume(); // Consume the '{' + TokenUtils.consume(this); // Consume the '{' Node block = parseBlock(); // Parse the block inside the curly brackets - consume(LexerTokenType.OPERATOR, "}"); // Consume the '}' + TokenUtils.consume(this, LexerTokenType.OPERATOR, "}"); // Consume the '}' return new OperatorNode(sigil, block, tokenIndex); } @@ -789,7 +789,7 @@ public Node parseVariable(String sigil) { * @throws PerlCompilerException If there's an unexpected infix operator or syntax error. */ public Node parseInfixOperation(Node left, int precedence) { - LexerToken token = consume(); + LexerToken token = TokenUtils.consume(this); Node right; @@ -805,7 +805,7 @@ public Node parseInfixOperation(Node left, int precedence) { // Autoquote - Convert IdentifierNode to StringNode left = new StringNode(((IdentifierNode) left).name, ((IdentifierNode) left).tokenIndex); } - token = peek(); + token = TokenUtils.peek(this); if (token.type == LexerTokenType.EOF || LIST_TERMINATORS.contains(token.text) || token.text.equals(",") || token.text.equals("=>")) { // "postfix" comma return ListNode.makeList(left); @@ -814,22 +814,22 @@ public Node parseInfixOperation(Node left, int precedence) { return ListNode.makeList(left, right); case "?": Node middle = parseExpression(0); - consume(LexerTokenType.OPERATOR, ":"); + TokenUtils.consume(this, LexerTokenType.OPERATOR, ":"); right = parseExpression(precedence); return new TernaryOperatorNode(token.text, left, middle, right, tokenIndex); case "->": - String nextText = peek().text; + String nextText = TokenUtils.peek(this).text; switch (nextText) { case "(": - consume(); + TokenUtils.consume(this); right = new ListNode(ListParser.parseList(this, ")", 0), tokenIndex); return new BinaryOperatorNode(token.text, left, right, tokenIndex); case "{": - consume(); + TokenUtils.consume(this); right = new HashLiteralNode(parseHashSubscript(), tokenIndex); return new BinaryOperatorNode(token.text, left, right, tokenIndex); case "[": - consume(); + TokenUtils.consume(this); right = new ArrayLiteralNode(ListParser.parseList(this, "]", 1), tokenIndex); return new BinaryOperatorNode(token.text, left, right, tokenIndex); } @@ -861,125 +861,13 @@ public Node parseInfixOperation(Node left, int precedence) { throw new PerlCompilerException(tokenIndex, "Unexpected infix operator: " + token, ctx.errorUtil); } - public LexerToken peek() { - tokenIndex = Whitespace.skipWhitespace(tokenIndex, tokens); - if (tokenIndex >= tokens.size()) { - return new LexerToken(LexerTokenType.EOF, ""); - } - return tokens.get(tokenIndex); - } - - public String consumeChar() { - String str; - if (tokenIndex >= tokens.size()) { - str = ""; - } else { - LexerToken token = tokens.get(tokenIndex); - if (token.type == LexerTokenType.EOF) { - str = ""; - } else if (token.text.length() == 1) { - str = token.text; - tokenIndex++; - } else { - str = token.text.substring(0, 1); - token.text = token.text.substring(1); - } - } - return str; - } - - public String peekChar() { - String str; - if (tokenIndex >= tokens.size()) { - str = ""; - } else { - LexerToken token = tokens.get(tokenIndex); - if (token.type == LexerTokenType.EOF) { - str = ""; - } else if (token.text.length() == 1) { - str = token.text; - } else { - str = token.text.substring(0, 1); - } - } - return str; - } - - public LexerToken consume() { - tokenIndex = Whitespace.skipWhitespace(tokenIndex, tokens); - if (tokenIndex >= tokens.size()) { - return new LexerToken(LexerTokenType.EOF, ""); - } - return tokens.get(tokenIndex++); - } - - public LexerToken consume(LexerTokenType type) { - LexerToken token = consume(); - if (token.type != type) { - throw new PerlCompilerException( - tokenIndex, "Expected token " + type + " but got " + token, ctx.errorUtil); - } - return token; - } - - public void consume(LexerTokenType type, String text) { - LexerToken token = consume(); - if (token.type != type || !token.text.equals(text)) { - throw new PerlCompilerException( - tokenIndex, - "Expected token " + type + " with text " + text + " but got " + token, - ctx.errorUtil); - } - } - - // List parsers - - public boolean looksLikeEmptyList() { - boolean isEmptyList = false; - int previousIndex = tokenIndex; - LexerToken token = consume(); - LexerToken token1 = tokens.get(tokenIndex); // next token including spaces - LexerToken nextToken = peek(); // after spaces - - if (token.type == LexerTokenType.EOF || LIST_TERMINATORS.contains(token.text)) { - isEmptyList = true; - } else if (token.text.equals("-") - && token1.type == LexerTokenType.IDENTIFIER - && token1.text.length() == 1) { - // -d, -e, -f, -l, -p, -x - isEmptyList = false; - } else if (INFIX_OP.contains(token.text) || token.text.equals(",")) { - // tokenIndex++; - ctx.logDebug("parseZeroOrMoreList infix `" + token.text + "` followed by `" + nextToken.text + "`"); - if (token.text.equals("&")) { - // looks like a subroutine call, not an infix `&` - ctx.logDebug("parseZeroOrMoreList looks like subroutine call"); - } else if (token.text.equals("%") && (nextToken.text.equals("$") || nextToken.type == LexerTokenType.IDENTIFIER)) { - // looks like a hash deref, not an infix `%` - ctx.logDebug("parseZeroOrMoreList looks like Hash"); - } else if (token.text.equals(".") && token1.type == LexerTokenType.NUMBER) { - // looks like a fractional number, not an infix `.` - ctx.logDebug("parseZeroOrMoreList looks like Number"); - } else { - // subroutine call with zero arguments, followed by infix operator: `pos = 3` - ctx.logDebug("parseZeroOrMoreList return zero at `" + tokens.get(tokenIndex) + "`"); - // if (LVALUE_INFIX_OP.contains(token.text)) { - // throw new PerlCompilerException(tokenIndex, "Can't modify non-lvalue subroutine call", ctx.errorUtil); - // } - isEmptyList = true; - } - } - tokenIndex = previousIndex; - return isEmptyList; - } - public Node parseFileHandle() { boolean hasBracket = false; - if (peek().text.equals("{")) { - consume(); + if (TokenUtils.peek(this).text.equals("{")) { + TokenUtils.consume(this); hasBracket = true; } - LexerToken token = peek(); + LexerToken token = TokenUtils.peek(this); Node fileHandle = null; if (token.type == LexerTokenType.IDENTIFIER) { // bareword @@ -1001,27 +889,27 @@ public Node parseFileHandle() { fileHandle = parsePrimary(); if (!hasBracket) { // assert that is not followed by infix - String nextText = peek().text; + String nextText = TokenUtils.peek(this).text; if (INFIX_OP.contains(nextText) || "{[".contains(nextText) || "->".equals(nextText)) { // print $fh + 2; # not a file handle fileHandle = null; } // assert that list is not empty - if (looksLikeEmptyList()) { + if (ListParser.looksLikeEmptyList(this)) { // print $fh; # not a file handle fileHandle = null; } } } if (hasBracket) { - consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(this, LexerTokenType.OPERATOR, "}"); } return fileHandle; } public boolean isSpaceAfterPrintBlock() { int currentIndex = tokenIndex; - LexerToken token = peek(); + LexerToken token = TokenUtils.peek(this); boolean isSpace = false; switch (token.type) { case EOF: @@ -1055,7 +943,7 @@ public boolean isSpaceAfterPrintBlock() { break; case ".": // must be followed by NUMBER - consume(); + TokenUtils.consume(this); if (tokens.get(tokenIndex).type == LexerTokenType.NUMBER) { isSpace = true; } @@ -1064,7 +952,7 @@ public boolean isSpaceAfterPrintBlock() { case "(": case "'": // must have space before - consume(); + TokenUtils.consume(this); if (tokenIndex != currentIndex) { isSpace = true; } @@ -1080,8 +968,8 @@ private List parseHashSubscript() { ctx.logDebug("parseHashSubscript start"); int currentIndex = tokenIndex; - LexerToken ident = consume(); - LexerToken close = consume(); + LexerToken ident = TokenUtils.consume(this); + LexerToken close = TokenUtils.consume(this); if (ident.type == LexerTokenType.IDENTIFIER && close.text.equals("}")) { // autoquote List list = new ArrayList<>(); diff --git a/src/main/java/org/perlonjava/parser/StatementParser.java b/src/main/java/org/perlonjava/parser/StatementParser.java index e4b7636..be3a26c 100644 --- a/src/main/java/org/perlonjava/parser/StatementParser.java +++ b/src/main/java/org/perlonjava/parser/StatementParser.java @@ -23,23 +23,23 @@ public class StatementParser { * @return A For3Node representing the while/until loop */ public static Node parseWhileStatement(Parser parser, String label) { - LexerToken operator = parser.consume(LexerTokenType.IDENTIFIER); // "while" "until" + LexerToken operator = TokenUtils.consume(parser, LexerTokenType.IDENTIFIER); // "while" "until" - parser.consume(LexerTokenType.OPERATOR, "("); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "("); Node condition = parser.parseExpression(0); - parser.consume(LexerTokenType.OPERATOR, ")"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, ")"); // Parse the body of the loop - parser.consume(LexerTokenType.OPERATOR, "{"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "{"); Node body = parser.parseBlock(); - parser.consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); Node continueNode = null; - if (parser.peek().text.equals("continue")) { - parser.consume(); - parser.consume(LexerTokenType.OPERATOR, "{"); + if (TokenUtils.peek(parser).text.equals("continue")) { + TokenUtils.consume(parser); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "{"); continueNode = parser.parseBlock(); - parser.consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); } if (operator.text.equals("until")) { @@ -57,25 +57,25 @@ public static Node parseWhileStatement(Parser parser, String label) { * @return A For1Node or For3Node representing the for/foreach loop */ public static Node parseForStatement(Parser parser, String label) { - parser.consume(LexerTokenType.IDENTIFIER); // "for" or "foreach" + TokenUtils.consume(parser, LexerTokenType.IDENTIFIER); // "for" or "foreach" // Parse optional loop variable Node varNode = null; - LexerToken token = parser.peek(); // "my" "$" "(" + LexerToken token = TokenUtils.peek(parser); // "my" "$" "(" if (token.text.equals("my") || token.text.equals("$")) { parser.parsingForLoopVariable = true; varNode = parser.parsePrimary(); parser.parsingForLoopVariable = false; } - parser.consume(LexerTokenType.OPERATOR, "("); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "("); // Parse the initialization part Node initialization = null; - if (!parser.peek().text.equals(";")) { + if (!TokenUtils.peek(parser).text.equals(";")) { initialization = parser.parseExpression(0); - token = parser.peek(); + token = TokenUtils.peek(parser); if (token.text.equals(")")) { // 1-argument for loop (foreach-like) return parseOneArgumentForLoop(parser, label, varNode, initialization); @@ -90,20 +90,20 @@ public static Node parseForStatement(Parser parser, String label) { * Helper method to parse a one-argument for loop (foreach-like). */ private static Node parseOneArgumentForLoop(Parser parser, String label, Node varNode, Node initialization) { - parser.consume(); // Consume ")" + TokenUtils.consume(parser); // Consume ")" // Parse the body of the loop - parser.consume(LexerTokenType.OPERATOR, "{"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "{"); Node body = parser.parseBlock(); - parser.consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); // Parse optional continue block Node continueNode = null; - if (parser.peek().text.equals("continue")) { - parser.consume(); - parser.consume(LexerTokenType.OPERATOR, "{"); + if (TokenUtils.peek(parser).text.equals("continue")) { + TokenUtils.consume(parser); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "{"); continueNode = parser.parseBlock(); - parser.consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); } // Use $_ as the default loop variable if not specified @@ -123,26 +123,26 @@ private static Node parseThreeArgumentForLoop(Parser parser, String label, Node throw new PerlCompilerException(parser.tokenIndex, "Syntax error", parser.ctx.errorUtil); } - parser.consume(LexerTokenType.OPERATOR, ";"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, ";"); // Parse the condition part Node condition = null; - if (!parser.peek().text.equals(";")) { + if (!TokenUtils.peek(parser).text.equals(";")) { condition = parser.parseExpression(0); } - parser.consume(LexerTokenType.OPERATOR, ";"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, ";"); // Parse the increment part Node increment = null; - if (!parser.peek().text.equals(")")) { + if (!TokenUtils.peek(parser).text.equals(")")) { increment = parser.parseExpression(0); } - parser.consume(LexerTokenType.OPERATOR, ")"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, ")"); // Parse the body of the loop - parser.consume(LexerTokenType.OPERATOR, "{"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "{"); Node body = parser.parseBlock(); - parser.consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); // 3-argument for doesn't have a continue block @@ -157,20 +157,20 @@ private static Node parseThreeArgumentForLoop(Parser parser, String label, Node * @return An IfNode representing the if/unless/elsif statement */ public static Node parseIfStatement(Parser parser) { - LexerToken operator = parser.consume(LexerTokenType.IDENTIFIER); // "if", "unless", "elsif" - parser.consume(LexerTokenType.OPERATOR, "("); + LexerToken operator = TokenUtils.consume(parser, LexerTokenType.IDENTIFIER); // "if", "unless", "elsif" + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "("); Node condition = parser.parseExpression(0); - parser.consume(LexerTokenType.OPERATOR, ")"); - parser.consume(LexerTokenType.OPERATOR, "{"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, ")"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "{"); Node thenBranch = parser.parseBlock(); - parser.consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); Node elseBranch = null; - LexerToken token = parser.peek(); + LexerToken token = TokenUtils.peek(parser); if (token.text.equals("else")) { - parser.consume(LexerTokenType.IDENTIFIER); // "else" - parser.consume(LexerTokenType.OPERATOR, "{"); + TokenUtils.consume(parser, LexerTokenType.IDENTIFIER); // "else" + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "{"); elseBranch = parser.parseBlock(); - parser.consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); } else if (token.text.equals("elsif")) { elseBranch = parseIfStatement(parser); } @@ -189,8 +189,8 @@ public static Node parseUseDeclaration(Parser parser, LexerToken token) { ctx.logDebug("use: " + token.text); boolean isNoDeclaration = token.text.equals("no"); - parser.consume(); // "use" - token = parser.peek(); + TokenUtils.consume(parser); // "use" + token = TokenUtils.peek(parser); String fullName = null; String packageName = null; @@ -211,14 +211,14 @@ public static Node parseUseDeclaration(Parser parser, LexerToken token) { parser.ctx.logDebug("use version: " + version); if (version != null) { // `use` statement can terminate after Version - token = parser.peek(); + token = TokenUtils.peek(parser); if (token.type == LexerTokenType.EOF || token.text.equals("}") || token.text.equals(";")) { return new ListNode(parser.tokenIndex); } } // Parse the parameter list - boolean hasParentheses = parser.peek().text.equals("("); + boolean hasParentheses = TokenUtils.peek(parser).text.equals("("); Node list = ListParser.parseZeroOrMoreList(parser, 0, false, false, false, false); ctx.logDebug("Use statement list hasParentheses:" + hasParentheses + " ast:" + list); @@ -280,7 +280,7 @@ public static Node parseUseDeclaration(Parser parser, LexerToken token) { * @return An OperatorNode or BlockNode representing the package declaration */ public static Node parsePackageDeclaration(Parser parser, LexerToken token) { - parser.consume(); + TokenUtils.consume(parser); String packageName = IdentifierParser.parseSubroutineIdentifier(parser); if (packageName == null) { throw new PerlCompilerException(parser.tokenIndex, "Syntax error", parser.ctx.errorUtil); @@ -310,10 +310,10 @@ public static Node parsePackageDeclaration(Parser parser, LexerToken token) { */ public static BlockNode parseOptionalPackageBlock(Parser parser, IdentifierNode nameNode, OperatorNode packageNode) { LexerToken token; - token = parser.peek(); + token = TokenUtils.peek(parser); if (token.type == LexerTokenType.OPERATOR && token.text.equals("{")) { // package NAME BLOCK - parser.consume(LexerTokenType.OPERATOR, "{"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "{"); parser.ctx.symbolTable.enterScope(); parser.ctx.symbolTable.setCurrentPackage(nameNode.name); BlockNode block = parser.parseBlock(); @@ -322,7 +322,7 @@ public static BlockNode parseOptionalPackageBlock(Parser parser, IdentifierNode block.elements.add(0, packageNode); parser.ctx.symbolTable.exitScope(); - parser.consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); return block; } return null; @@ -336,19 +336,19 @@ public static BlockNode parseOptionalPackageBlock(Parser parser, IdentifierNode */ public static String parseOptionalPackageVersion(Parser parser) { LexerToken token; - token = parser.peek(); + token = TokenUtils.peek(parser); if (token.type == LexerTokenType.NUMBER) { - return NumberParser.parseNumber(parser, parser.consume()).value; + return NumberParser.parseNumber(parser, TokenUtils.consume(parser)).value; } else if (token.text.startsWith("v")) { // parseDottedDecimalVersion StringBuilder version = new StringBuilder(token.text); // start with 'v' - parser.consume(); + TokenUtils.consume(parser); int componentCount = 0; // Loop through components separated by '.' while (true) { - if (!parser.peek().text.equals(".")) { + if (!TokenUtils.peek(parser).text.equals(".")) { if (componentCount < 2) { // Ensures at least 3 components (v1.2.3) throw new PerlCompilerException(parser.tokenIndex, "Dotted-decimal version must have at least 3 components", parser.ctx.errorUtil); } else { @@ -356,10 +356,10 @@ public static String parseOptionalPackageVersion(Parser parser) { } } - version.append(parser.consume().text); // consume '.' + version.append(TokenUtils.consume(parser).text); // consume '.' - if (parser.peek().type == LexerTokenType.NUMBER) { - version.append(parser.consume().text); // consume number + if (TokenUtils.peek(parser).type == LexerTokenType.NUMBER) { + version.append(TokenUtils.consume(parser).text); // consume number componentCount++; } else { throw new PerlCompilerException(parser.tokenIndex, "Invalid dotted-decimal format", parser.ctx.errorUtil); diff --git a/src/main/java/org/perlonjava/parser/StringParser.java b/src/main/java/org/perlonjava/parser/StringParser.java index 8dcc863..b4dccce 100644 --- a/src/main/java/org/perlonjava/parser/StringParser.java +++ b/src/main/java/org/perlonjava/parser/StringParser.java @@ -211,7 +211,7 @@ static Node parseDoubleQuotedString(EmitterContext ctx, ParsedString rawStr, boo ctx.logDebug("str sigil"); Node operand; boolean isArray = sigil.equals("@"); - if (parser.peek().text.equals("{")) { + if (TokenUtils.peek(parser).text.equals("{")) { // block-like // extract the string between brackets StringParser.ParsedString rawStr2 = StringParser.parseRawStrings(ctx, parser.tokens, parser.tokenIndex, 1); @@ -227,7 +227,7 @@ static Node parseDoubleQuotedString(EmitterContext ctx, ParsedString rawStr, boo if (identifier == null) { // parse $$$a @$$a int dollarCount = 0; - while (parser.peek().text.equals("$")) { + while (TokenUtils.peek(parser).text.equals("$")) { dollarCount++; parser.tokenIndex++; } @@ -314,17 +314,17 @@ private static void parseDoubleQuotedEscapes(EmitterContext ctx, List= 0 && chr.compareTo("7") <= 0) { - octalStr.append(parser.consumeChar()); - chr = parser.peekChar(); + octalStr.append(TokenUtils.consumeChar(parser)); + chr = TokenUtils.peekChar(parser); } ctx.logDebug("octalStr: " + octalStr); str.append((char) Integer.parseInt(octalStr.toString(), 8)); return; } - escape = parser.consumeChar(); + escape = TokenUtils.consumeChar(parser); switch (escape) { case "\\": case "\"": @@ -352,7 +352,7 @@ private static void parseDoubleQuotedEscapes(EmitterContext ctx, List attributes = new ArrayList<>(); // While there are attributes (denoted by a colon ':'), we keep parsing them. - while (parser.peek().text.equals(":")) { + while (TokenUtils.peek(parser).text.equals(":")) { // Consume the colon operator. - parser.consume(LexerTokenType.OPERATOR, ":"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, ":"); // Consume the attribute name (an identifier) and add it to the attributes list. - attributes.add(parser.consume(LexerTokenType.IDENTIFIER).text); + attributes.add(TokenUtils.consume(parser, LexerTokenType.IDENTIFIER).text); } // After parsing name, prototype, and attributes, we expect an opening curly brace '{' to denote the start of the subroutine block. - parser.consume(LexerTokenType.OPERATOR, "{"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "{"); // Parse the block of the subroutine, which contains the actual code. Node block = parser.parseBlock(); // After the block, we expect a closing curly brace '}' to denote the end of the subroutine. - parser.consume(LexerTokenType.OPERATOR, "}"); + TokenUtils.consume(parser, LexerTokenType.OPERATOR, "}"); // Now we check if the next token is one of the illegal characters that cannot follow a subroutine. // These are '(', '{', and '['. If any of these follow, we throw a syntax error. - LexerToken token = parser.peek(); + LexerToken token = TokenUtils.peek(parser); if (token.text.equals("(") || token.text.equals("{") || token.text.equals("[")) { // Throw an exception indicating a syntax error. throw new PerlCompilerException(parser.tokenIndex, "Syntax error", parser.ctx.errorUtil); diff --git a/src/main/java/org/perlonjava/parser/TokenUtils.java b/src/main/java/org/perlonjava/parser/TokenUtils.java new file mode 100644 index 0000000..95f4245 --- /dev/null +++ b/src/main/java/org/perlonjava/parser/TokenUtils.java @@ -0,0 +1,78 @@ +package org.perlonjava.parser; + +import org.perlonjava.lexer.LexerToken; +import org.perlonjava.lexer.LexerTokenType; +import org.perlonjava.runtime.PerlCompilerException; + +public class TokenUtils { + public static LexerToken peek(Parser parser) { + parser.tokenIndex = Whitespace.skipWhitespace(parser.tokenIndex, parser.tokens); + if (parser.tokenIndex >= parser.tokens.size()) { + return new LexerToken(LexerTokenType.EOF, ""); + } + return parser.tokens.get(parser.tokenIndex); + } + + public static String consumeChar(Parser parser) { + String str; + if (parser.tokenIndex >= parser.tokens.size()) { + str = ""; + } else { + LexerToken token = parser.tokens.get(parser.tokenIndex); + if (token.type == LexerTokenType.EOF) { + str = ""; + } else if (token.text.length() == 1) { + str = token.text; + parser.tokenIndex++; + } else { + str = token.text.substring(0, 1); + token.text = token.text.substring(1); + } + } + return str; + } + + public static String peekChar(Parser parser) { + String str; + if (parser.tokenIndex >= parser.tokens.size()) { + str = ""; + } else { + LexerToken token = parser.tokens.get(parser.tokenIndex); + if (token.type == LexerTokenType.EOF) { + str = ""; + } else if (token.text.length() == 1) { + str = token.text; + } else { + str = token.text.substring(0, 1); + } + } + return str; + } + + public static LexerToken consume(Parser parser) { + parser.tokenIndex = Whitespace.skipWhitespace(parser.tokenIndex, parser.tokens); + if (parser.tokenIndex >= parser.tokens.size()) { + return new LexerToken(LexerTokenType.EOF, ""); + } + return parser.tokens.get(parser.tokenIndex++); + } + + public static LexerToken consume(Parser parser, LexerTokenType type) { + LexerToken token = consume(parser); + if (token.type != type) { + throw new PerlCompilerException( + parser.tokenIndex, "Expected token " + type + " but got " + token, parser.ctx.errorUtil); + } + return token; + } + + public static void consume(Parser parser, LexerTokenType type, String text) { + LexerToken token = consume(parser); + if (token.type != type || !token.text.equals(text)) { + throw new PerlCompilerException( + parser.tokenIndex, + "Expected token " + type + " with text " + text + " but got " + token, + parser.ctx.errorUtil); + } + } +}