Skip to content

Commit

Permalink
Error for octal encoded btye > 177
Browse files Browse the repository at this point in the history
  • Loading branch information
slevithan committed Nov 11, 2024
1 parent 81d68d6 commit 9aa7683
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 8 deletions.
7 changes: 6 additions & 1 deletion spec/match-char-class.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@ describe('CharacterClass', () => {
expect('\u{1}').toExactlyMatch(r`[\01]`);
expect('\u{1}').toExactlyMatch(r`[\001]`);
expect(cp(0o17)).toExactlyMatch(r`[\17]`);
expect(cp(0o777)).toExactlyMatch(r`[\777]`);
expect(cp(0o177)).toExactlyMatch(r`[\177]`);
});

it(r`should throw for UTF-8 encoded byte sequence in octal (above \177)`, () => {
expect(() => toDetails(r`[\200]`)).toThrow();
expect(() => toDetails(r`[\777]`)).toThrow();
});

it('should match octals followed by literal digits', () => {
Expand Down
7 changes: 6 additions & 1 deletion spec/match-char.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,12 @@ describe('Character', () => {
expect('\u{1}').toExactlyMatch(r`\01`);
expect('\u{1}').toExactlyMatch(r`\001`);
expect(cp(0o17)).toExactlyMatch(r`\17`);
expect(cp(0o777)).toExactlyMatch(r`\777`);
expect(cp(0o177)).toExactlyMatch(r`\177`);
});

it(r`should throw for UTF-8 encoded byte sequence in octal (above \177)`, () => {
expect(() => toDetails(r`\200`)).toThrow();
expect(() => toDetails(r`\777`)).toThrow();
});

it('should match octals followed by literal digits', () => {
Expand Down
18 changes: 12 additions & 6 deletions src/tokenize.js
Original file line number Diff line number Diff line change
Expand Up @@ -676,9 +676,8 @@ function getValidatedHexCharCode(raw) {
/^\\x\{\s*(?<hex>\p{AHex}+)/u.exec(raw).groups.hex :
raw.slice(2);
const dec = parseInt(hex, 16);
if (dec > 0x7F && /^\\x\p{AHex}/u.test(raw)) {
throw new Error(r`\xNN above 7F unsupported in Oniguruma "${raw}"`);
} else if (dec > 0x13FFFF) {
// `\xNN` above 0x7F is handled elsewhere as a UTF-8 encoded byte sequence
if (dec > 0x13FFFF) {
throw new Error(`Invalid out of range "${raw}"`);
} else if (dec > 0x10FFFF) {
throw new Error(`Invalid out of range in JS "${raw}"`);
Expand Down Expand Up @@ -708,10 +707,17 @@ function splitEscapedNumToken(token, numCaptures) {
const matches = value.match(/^[0-7]+|\d/g);
for (let i = 0; i < matches.length; i++) {
const m = matches[i];
let value;
// Octal digits are 0-7
const value = (i === 0 && m !== '8' && m !== '9') ?
parseInt(m, 8) :
m.codePointAt(0);
if (i === 0 && m !== '8' && m !== '9') {
value = parseInt(m, 8);
if (value > 0o177) {
// UTF-8 encoded byte sequence in octal; unsupported
throw new Error(r`Octal encoded byte above 177 unsupported "${raw}"`);
}
} else {
value = m.codePointAt(0);
}
tokens.push(createToken(TokenTypes.Character, (i === 0 ? '\\' : '') + m, {
value,
}));
Expand Down

0 comments on commit 9aa7683

Please sign in to comment.