Skip to content

Commit

Permalink
handle special quotes like qq()
Browse files Browse the repository at this point in the history
  • Loading branch information
fglock committed Aug 20, 2024
1 parent ca2252d commit 2dea0db
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 159 deletions.
161 changes: 2 additions & 159 deletions src/main/java/org/perlonjava/Parser.java
Original file line number Diff line number Diff line change
Expand Up @@ -506,172 +506,15 @@ private Node parseRawString(String operator) {
switch (operator) {
case "'":
case "q":
return parseSingleQuotedString(rawStr.buffer, rawStr.startDelim, rawStr.endDelim, rawStr.index);
return StringParser.parseSingleQuotedString(rawStr.buffer, rawStr.startDelim, rawStr.endDelim, rawStr.index);
case "\"":
case "qq":
return parseDoubleQuotedString(rawStr.buffer, rawStr.index);
return StringParser.parseDoubleQuotedString(rawStr.buffer, errorUtil, rawStr.index);
}

return new UnaryOperatorNode(operator, new StringNode(rawStr.buffer, rawStr.index), rawStr.index);
}

private Node parseDoubleQuotedString(String input, int tokenIndex) {
StringBuilder str = new StringBuilder(); // Buffer to hold the parsed string
List<Node> parts = new ArrayList<>(); // List to hold parts of the parsed string
char[] chars = input.toCharArray(); // Convert the input string to a character array
int length = chars.length; // Length of the character array
int index = 0; // Current position in the character array

// Loop through the character array until the end
while (index < length) {
char ch = chars[index]; // Get the current character
if (ch == '\\') {
index++; // Move to the next character
if (index < length) {
char nextChar = chars[index]; // Get the next character
switch (nextChar) {
case '\\':
case '"':
str.append(nextChar); // Append the escaped character
break;
case 'n':
str.append('\n'); // Append newline
break;
case 't':
str.append('\t'); // Append tab
break;
case 'r':
str.append('\r'); // Append carriage return
break;
case 'f':
str.append('\f'); // Append form feed
break;
case 'b':
str.append('\b'); // Append backspace
break;
case 'x':
// Handle \x{...} for Unicode
StringBuilder unicodeSeq = new StringBuilder();
index++; // Move to the next character
if (index < length && chars[index] == '{') {
index++; // Move to the next character
while (index < length && chars[index] != '}') {
unicodeSeq.append(chars[index]);
index++;
}
if (index < length && chars[index] == '}') {
str.append((char) Integer.parseInt(unicodeSeq.toString(), 16));
} else {
throw new PerlCompilerException(tokenIndex, "Expected '}' after \\x{", errorUtil);
}
} else {
throw new PerlCompilerException(tokenIndex, "Expected '{' after \\x", errorUtil);
}
break;
default:
str.append('\\').append(nextChar); // Append the backslash and the next character
break;
}
}
} else if (ch == '$' || ch == '@') {
boolean isArray = ch == '@';
Node operand;
if (str.length() > 0) {
parts.add(new StringNode(str.toString(), tokenIndex)); // Add the string so far to parts
str = new StringBuilder(); // Reset the buffer
}
index++; // Move to the next character
if (index < length && (chars[index] == '_' || chars[index] == '@' || Character.isDigit(chars[index]))) {
// Handle special variables like $@, $1, etc.
StringBuilder specialVar = new StringBuilder();
specialVar.append(chars[index]);
index++; // Move past the special variable character
operand = new UnaryOperatorNode(String.valueOf(ch), new IdentifierNode(specialVar.toString(), tokenIndex), tokenIndex);
} else if (index < length && Character.isJavaIdentifierStart(chars[index])) {
StringBuilder identifier = new StringBuilder();
while (index < length && Character.isJavaIdentifierPart(chars[index])) {
identifier.append(chars[index]);
index++;
}
operand = new UnaryOperatorNode(String.valueOf(ch), new IdentifierNode(identifier.toString(), tokenIndex), tokenIndex);
} else if (index < length && chars[index] == '{') {
index++; // Move to the next character
StringBuilder varName = new StringBuilder();
while (index < length && Character.isJavaIdentifierPart(chars[index])) {
varName.append(chars[index]);
index++;
}
if (index < length && chars[index] == '}') {
index++; // Consume the closing '}'
operand = new UnaryOperatorNode(String.valueOf(ch), new IdentifierNode(varName.toString(), tokenIndex), tokenIndex);
} else {
throw new PerlCompilerException(tokenIndex, "Expected '}' after variable name", errorUtil);
}
} else {
throw new PerlCompilerException(tokenIndex, "Invalid variable name after " + ch, errorUtil);
}
if (isArray) {
operand = new BinaryOperatorNode("join", new UnaryOperatorNode("$", new IdentifierNode("\"", tokenIndex), tokenIndex), operand, tokenIndex);
}
parts.add(operand);
} else {
str.append(ch); // Append the current character
index++; // Move to the next character
}
}

if (str.length() > 0) {
parts.add(new StringNode(str.toString(), tokenIndex)); // Add the remaining string to parts
}

// Join the parts
if (parts.isEmpty()) {
return new StringNode("", tokenIndex);
} else if (parts.size() == 1) {
Node result = parts.get(0);
if (result instanceof StringNode) {
return parts.get(0);
}
// stringify using: "" . $a
return new BinaryOperatorNode(".", new StringNode("", tokenIndex), parts.get(0), tokenIndex);
} else {
Node result = parts.get(0);
for (int i = 1; i < parts.size(); i++) {
result = new BinaryOperatorNode(".", result, parts.get(i), tokenIndex);
}
return result;
}
}

private Node parseSingleQuotedString(String input, char startDelim, char endDelim, int tokenIndex) {
StringBuilder str = new StringBuilder(); // Buffer to hold the parsed string
char[] chars = input.toCharArray(); // Convert the input string to a character array
int length = chars.length; // Length of the character array
int index = 0; // Current position in the character array

// Loop through the character array until the end
while (index < length) {
char ch = chars[index]; // Get the current character
if (ch == '\\') {
index++; // Move to the next character
if (index < length) {
char nextChar = chars[index]; // Get the next character
if (nextChar == '\\' || nextChar == startDelim || nextChar == endDelim) {
str.append(nextChar); // Append the escaped character
} else {
str.append('\\').append(nextChar); // Append the backslash and the next character
}
}
} else {
str.append(ch); // Append the current character
}
index++; // Move to the next character
}

// Return a new StringNode with the parsed string and the token index
return new StringNode(str.toString(), tokenIndex);
}

private Node parseNumber(LexerToken token) {
StringBuilder number = new StringBuilder(token.text);

Expand Down
160 changes: 160 additions & 0 deletions src/main/java/org/perlonjava/StringParser.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package org.perlonjava;

import org.perlonjava.node.*;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -107,6 +110,163 @@ public static ParsedString parseRawStringWithDelimiter(List<LexerToken> tokens,
return new ParsedString(index, tokPos, buffer.toString(), startDelim, endDelim);
}

static Node parseDoubleQuotedString(String input, ErrorMessageUtil errorUtil, int tokenIndex) {
StringBuilder str = new StringBuilder(); // Buffer to hold the parsed string
List<Node> parts = new ArrayList<>(); // List to hold parts of the parsed string
char[] chars = input.toCharArray(); // Convert the input string to a character array
int length = chars.length; // Length of the character array
int index = 0; // Current position in the character array

// Loop through the character array until the end
while (index < length) {
char ch = chars[index]; // Get the current character
if (ch == '\\') {
index++; // Move to the next character
if (index < length) {
char nextChar = chars[index]; // Get the next character
switch (nextChar) {
case '\\':
case '"':
str.append(nextChar); // Append the escaped character
break;
case 'n':
str.append('\n'); // Append newline
break;
case 't':
str.append('\t'); // Append tab
break;
case 'r':
str.append('\r'); // Append carriage return
break;
case 'f':
str.append('\f'); // Append form feed
break;
case 'b':
str.append('\b'); // Append backspace
break;
case 'x':
// Handle \x{...} for Unicode
StringBuilder unicodeSeq = new StringBuilder();
index++; // Move to the next character
if (index < length && chars[index] == '{') {
index++; // Move to the next character
while (index < length && chars[index] != '}') {
unicodeSeq.append(chars[index]);
index++;
}
if (index < length && chars[index] == '}') {
str.append((char) Integer.parseInt(unicodeSeq.toString(), 16));
} else {
throw new PerlCompilerException(tokenIndex, "Expected '}' after \\x{", errorUtil);
}
} else {
throw new PerlCompilerException(tokenIndex, "Expected '{' after \\x", errorUtil);
}
break;
default:
str.append('\\').append(nextChar); // Append the backslash and the next character
break;
}
}
} else if (ch == '$' || ch == '@') {
boolean isArray = ch == '@';
Node operand;
if (str.length() > 0) {
parts.add(new StringNode(str.toString(), tokenIndex)); // Add the string so far to parts
str = new StringBuilder(); // Reset the buffer
}
index++; // Move to the next character
if (index < length && (chars[index] == '_' || chars[index] == '@' || Character.isDigit(chars[index]))) {
// Handle special variables like $@, $1, etc.
StringBuilder specialVar = new StringBuilder();
specialVar.append(chars[index]);
index++; // Move past the special variable character
operand = new UnaryOperatorNode(String.valueOf(ch), new IdentifierNode(specialVar.toString(), tokenIndex), tokenIndex);
} else if (index < length && Character.isJavaIdentifierStart(chars[index])) {
StringBuilder identifier = new StringBuilder();
while (index < length && Character.isJavaIdentifierPart(chars[index])) {
identifier.append(chars[index]);
index++;
}
operand = new UnaryOperatorNode(String.valueOf(ch), new IdentifierNode(identifier.toString(), tokenIndex), tokenIndex);
} else if (index < length && chars[index] == '{') {
index++; // Move to the next character
StringBuilder varName = new StringBuilder();
while (index < length && Character.isJavaIdentifierPart(chars[index])) {
varName.append(chars[index]);
index++;
}
if (index < length && chars[index] == '}') {
index++; // Consume the closing '}'
operand = new UnaryOperatorNode(String.valueOf(ch), new IdentifierNode(varName.toString(), tokenIndex), tokenIndex);
} else {
throw new PerlCompilerException(tokenIndex, "Expected '}' after variable name", errorUtil);
}
} else {
throw new PerlCompilerException(tokenIndex, "Invalid variable name after " + ch, errorUtil);
}
if (isArray) {
operand = new BinaryOperatorNode("join", new UnaryOperatorNode("$", new IdentifierNode("\"", tokenIndex), tokenIndex), operand, tokenIndex);
}
parts.add(operand);
} else {
str.append(ch); // Append the current character
index++; // Move to the next character
}
}

if (str.length() > 0) {
parts.add(new StringNode(str.toString(), tokenIndex)); // Add the remaining string to parts
}

// Join the parts
if (parts.isEmpty()) {
return new StringNode("", tokenIndex);
} else if (parts.size() == 1) {
Node result = parts.get(0);
if (result instanceof StringNode) {
return parts.get(0);
}
// stringify using: "" . $a
return new BinaryOperatorNode(".", new StringNode("", tokenIndex), parts.get(0), tokenIndex);
} else {
Node result = parts.get(0);
for (int i = 1; i < parts.size(); i++) {
result = new BinaryOperatorNode(".", result, parts.get(i), tokenIndex);
}
return result;
}
}

static Node parseSingleQuotedString(String input, char startDelim, char endDelim, int tokenIndex) {
StringBuilder str = new StringBuilder(); // Buffer to hold the parsed string
char[] chars = input.toCharArray(); // Convert the input string to a character array
int length = chars.length; // Length of the character array
int index = 0; // Current position in the character array

// Loop through the character array until the end
while (index < length) {
char ch = chars[index]; // Get the current character
if (ch == '\\') {
index++; // Move to the next character
if (index < length) {
char nextChar = chars[index]; // Get the next character
if (nextChar == '\\' || nextChar == startDelim || nextChar == endDelim) {
str.append(nextChar); // Append the escaped character
} else {
str.append('\\').append(nextChar); // Append the backslash and the next character
}
}
} else {
str.append(ch); // Append the current character
}
index++; // Move to the next character
}

// Return a new StringNode with the parsed string and the token index
return new StringNode(str.toString(), tokenIndex);
}

/**
* Class to represent the parsed string and its position in the tokens list.
*/
Expand Down

0 comments on commit 2dea0db

Please sign in to comment.