diff --git a/spec/match-char-class.spec.js b/spec/match-char-class.spec.js index 7c0cac7..4e606ae 100644 --- a/spec/match-char-class.spec.js +++ b/spec/match-char-class.spec.js @@ -38,7 +38,12 @@ describe('CharacterClass', () => { expect('\u{1}').toExactlyMatch(r`[\01]`); expect('\u{1}').toExactlyMatch(r`[\001]`); expect(cp(0o17)).toExactlyMatch(r`[\17]`); - expect(cp(0o777)).toExactlyMatch(r`[\777]`); + expect(cp(0o177)).toExactlyMatch(r`[\177]`); + }); + + it(r`should throw for UTF-8 encoded byte sequence in octal (above \177)`, () => { + expect(() => toDetails(r`[\200]`)).toThrow(); + expect(() => toDetails(r`[\777]`)).toThrow(); }); it('should match octals followed by literal digits', () => { diff --git a/spec/match-char.spec.js b/spec/match-char.spec.js index b769a15..386d188 100644 --- a/spec/match-char.spec.js +++ b/spec/match-char.spec.js @@ -133,7 +133,12 @@ describe('Character', () => { expect('\u{1}').toExactlyMatch(r`\01`); expect('\u{1}').toExactlyMatch(r`\001`); expect(cp(0o17)).toExactlyMatch(r`\17`); - expect(cp(0o777)).toExactlyMatch(r`\777`); + expect(cp(0o177)).toExactlyMatch(r`\177`); + }); + + it(r`should throw for UTF-8 encoded byte sequence in octal (above \177)`, () => { + expect(() => toDetails(r`\200`)).toThrow(); + expect(() => toDetails(r`\777`)).toThrow(); }); it('should match octals followed by literal digits', () => { diff --git a/src/tokenize.js b/src/tokenize.js index 1291bd6..8cd019f 100644 --- a/src/tokenize.js +++ b/src/tokenize.js @@ -676,9 +676,8 @@ function getValidatedHexCharCode(raw) { /^\\x\{\s*(?\p{AHex}+)/u.exec(raw).groups.hex : raw.slice(2); const dec = parseInt(hex, 16); - if (dec > 0x7F && /^\\x\p{AHex}/u.test(raw)) { - throw new Error(r`\xNN above 7F unsupported in Oniguruma "${raw}"`); - } else if (dec > 0x13FFFF) { + // `\xNN` above 0x7F is handled elsewhere as a UTF-8 encoded byte sequence + if (dec > 0x13FFFF) { throw new Error(`Invalid out of range "${raw}"`); } else if (dec > 0x10FFFF) { throw new Error(`Invalid out of range in JS "${raw}"`); @@ -708,10 +707,17 @@ function splitEscapedNumToken(token, numCaptures) { const matches = value.match(/^[0-7]+|\d/g); for (let i = 0; i < matches.length; i++) { const m = matches[i]; + let value; // Octal digits are 0-7 - const value = (i === 0 && m !== '8' && m !== '9') ? - parseInt(m, 8) : - m.codePointAt(0); + if (i === 0 && m !== '8' && m !== '9') { + value = parseInt(m, 8); + if (value > 0o177) { + // UTF-8 encoded byte sequence in octal; unsupported + throw new Error(r`Octal encoded byte above 177 unsupported "${raw}"`); + } + } else { + value = m.codePointAt(0); + } tokens.push(createToken(TokenTypes.Character, (i === 0 ? '\\' : '') + m, { value, }));