Skip to content

Commit

Permalink
Allow unescaped ] as leading char in char class (fixes #6)
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Nov 26, 2024
1 parent df8a9ec commit 1c49b25
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 17 deletions.
13 changes: 9 additions & 4 deletions src/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -248,9 +248,10 @@ function parseCharacterClassHyphen(context, state) {

function parseCharacterClassOpen(context, state) {
const {token, tokens, verbose, walk} = context;
const firstClassToken = tokens[context.current];
let node = createCharacterClass({negate: token.negate});
const intersection = node.elements[0];
let nextToken = throwIfUnclosedCharacterClass(tokens[context.current]);
let nextToken = throwIfUnclosedCharacterClass(firstClassToken);
while (nextToken.type !== TokenTypes.CharacterClassClose) {
if (nextToken.type === TokenTypes.CharacterClassIntersector) {
intersection.classes.push(createCharacterClass({negate: false, baseOnly: true}));
Expand All @@ -260,7 +261,7 @@ function parseCharacterClassOpen(context, state) {
const cc = intersection.classes.at(-1);
cc.elements.push(walk(cc, state));
}
nextToken = throwIfUnclosedCharacterClass(tokens[context.current]);
nextToken = throwIfUnclosedCharacterClass(tokens[context.current], firstClassToken);
}
if (!verbose) {
optimizeCharacterClassIntersection(intersection);
Expand Down Expand Up @@ -699,8 +700,12 @@ function optimizeCharacterClassIntersection(intersection) {
}
}

function throwIfUnclosedCharacterClass(token) {
return throwIfNot(token, 'Unclosed character class');
function throwIfUnclosedCharacterClass(token, firstClassToken) {
return throwIfNot(
token,
// Easier to understand error when applicable
`${firstClassToken?.value === 93 ? 'Empty' : 'Unclosed'} character class`
);
}

function throwIfUnclosedGroup(token) {
Expand Down
25 changes: 12 additions & 13 deletions src/tokenize.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ const EscapeCharCodes = new Map([
['v', 11], // vertical tab
]);

const charClassOpenPattern = r`\[\^?\]?`;
const charClassOpenPattern = r`\[\^?`;
const sharedEscapesPattern = `${
// Control char
'c.? | C(?:-.?)?'
Expand Down Expand Up @@ -408,7 +408,6 @@ function getTokenWithDetails(context, pattern, m, lastIndex) {
}

function getAllTokensForCharClass(pattern, opener, lastIndex) {
assertNonEmptyCharClass(opener);
const tokens = [createToken(TokenTypes.CharacterClassOpen, opener, {
negate: opener[1] === '^',
})];
Expand All @@ -420,16 +419,22 @@ function getAllTokensForCharClass(pattern, opener, lastIndex) {
// Start of nested char class
// POSIX classes are handled as a single token; not as a nested char class
if (m[0] === '[' && m[1] !== ':') {
assertNonEmptyCharClass(m);
numCharClassesOpen++;
tokens.push(createToken(TokenTypes.CharacterClassOpen, m, {
negate: m[1] === '^',
}));
} else if (m === ']') {
numCharClassesOpen--;
tokens.push(createToken(TokenTypes.CharacterClassClose, m));
if (!numCharClassesOpen) {
break;
if (tokens.at(-1).type === TokenTypes.CharacterClassOpen) {
// Allow unescaped `]` as leading char
tokens.push(createToken(TokenTypes.Character, m, {
value: 93,
}));
} else {
numCharClassesOpen--;
tokens.push(createToken(TokenTypes.CharacterClassClose, m));
if (!numCharClassesOpen) {
break;
}
}
} else {
const result = createTokenForAnyTokenWithinCharClass(m);
Expand Down Expand Up @@ -739,12 +744,6 @@ function splitEscapedNumToken(token, numCaptures) {
return tokens;
}

function assertNonEmptyCharClass(raw) {
if (raw.endsWith(']')) {
throw new Error(`Empty character class "${raw}" unsupported in Oniguruma`);
}
}

function assertSingleCodePoint(raw) {
if ([...raw].length !== 1) {
throw new Error(`Expected "${raw}" to be a single code point`);
Expand Down

0 comments on commit 1c49b25

Please sign in to comment.