From 72e9b5ac42fcdfed011026fb50e2e1fb3a1a4eff Mon Sep 17 00:00:00 2001 From: Yosuke Ota Date: Wed, 26 Jul 2023 08:27:27 +0900 Subject: [PATCH] fix: wrong syntax error message when using `RegExpValidator#validatePattern` and `RegExpValidator#validateFlags`. (#113) --- src/regexp-syntax-error.ts | 23 +++-- src/validator.ts | 197 ++++++++++++++++++++++--------------- test/validate-error.ts | 176 +++++++++++++++++++++++++++++++++ 3 files changed, 305 insertions(+), 91 deletions(-) create mode 100644 test/validate-error.ts diff --git a/src/regexp-syntax-error.ts b/src/regexp-syntax-error.ts index 8e88a58..ccd51cf 100644 --- a/src/regexp-syntax-error.ts +++ b/src/regexp-syntax-error.ts @@ -1,22 +1,27 @@ +import type { RegExpValidatorSourceContext } from "./validator" + export class RegExpSyntaxError extends SyntaxError { public index: number public constructor( - source: string, + srcCtx: RegExpValidatorSourceContext, flags: { unicode: boolean; unicodeSets: boolean }, index: number, message: string, ) { - /*eslint-disable no-param-reassign */ - if (source) { - if (!source.startsWith("/")) { - source = `/${source}/${flags.unicode ? "u" : ""}${ - flags.unicodeSets ? "v" : "" - }` + let source = "" + if (srcCtx.kind === "literal") { + const literal = srcCtx.source.slice(srcCtx.start, srcCtx.end) + if (literal) { + source = `: ${literal}` } - source = `: ${source}` + } else if (srcCtx.kind === "pattern") { + const pattern = srcCtx.source.slice(srcCtx.start, srcCtx.end) + const flagsText = `${flags.unicode ? "u" : ""}${ + flags.unicodeSets ? "v" : "" + }` + source = `: /${pattern}/${flagsText}` } - /*eslint-enable no-param-reassign */ super(`Invalid regular expression${source}: ${message}`) this.index = index diff --git a/src/validator.ts b/src/validator.ts index 6ff44e3..2c1b684 100644 --- a/src/validator.ts +++ b/src/validator.ts @@ -213,6 +213,13 @@ function isUnicodePropertyValueCharacter(cp: number): boolean { return isUnicodePropertyNameCharacter(cp) || isDecimalDigit(cp) } +export type RegExpValidatorSourceContext = { + readonly source: string + readonly start: number + readonly end: number + readonly kind: "flags" | "literal" | "pattern" +} + export namespace RegExpValidator { /** * The options for RegExpValidator construction. @@ -628,6 +635,8 @@ export class RegExpValidator { private _backreferenceNames = new Set() + private _srcCtx: RegExpValidatorSourceContext | null = null + /** * Initialize this validator. * @param options The options of validator. @@ -647,6 +656,7 @@ export class RegExpValidator { start = 0, end: number = source.length, ): void { + this._srcCtx = { source, start, end, kind: "literal" } this._unicodeSetsMode = this._unicodeMode = this._nFlag = false this.reset(source, start, end) @@ -655,8 +665,8 @@ export class RegExpValidator { const flagStart = this.index const unicode = source.includes("u", flagStart) const unicodeSets = source.includes("v", flagStart) - this.validateFlags(source, flagStart, end) - this.validatePattern(source, start + 1, flagStart - 1, { + this.validateFlagsInternal(source, flagStart, end) + this.validatePatternInternal(source, start + 1, flagStart - 1, { unicode, unicodeSets, }) @@ -680,68 +690,8 @@ export class RegExpValidator { start = 0, end: number = source.length, ): void { - const existingFlags = new Set() - let global = false - let ignoreCase = false - let multiline = false - let sticky = false - let unicode = false - let dotAll = false - let hasIndices = false - let unicodeSets = false - for (let i = start; i < end; ++i) { - const flag = source.charCodeAt(i) - - if (existingFlags.has(flag)) { - this.raise(`Duplicated flag '${source[i]}'`) - } - existingFlags.add(flag) - - if (flag === LATIN_SMALL_LETTER_G) { - global = true - } else if (flag === LATIN_SMALL_LETTER_I) { - ignoreCase = true - } else if (flag === LATIN_SMALL_LETTER_M) { - multiline = true - } else if ( - flag === LATIN_SMALL_LETTER_U && - this.ecmaVersion >= 2015 - ) { - unicode = true - } else if ( - flag === LATIN_SMALL_LETTER_Y && - this.ecmaVersion >= 2015 - ) { - sticky = true - } else if ( - flag === LATIN_SMALL_LETTER_S && - this.ecmaVersion >= 2018 - ) { - dotAll = true - } else if ( - flag === LATIN_SMALL_LETTER_D && - this.ecmaVersion >= 2022 - ) { - hasIndices = true - } else if ( - flag === LATIN_SMALL_LETTER_V && - this.ecmaVersion >= 2024 - ) { - unicodeSets = true - } else { - this.raise(`Invalid flag '${source[i]}'`) - } - } - this.onRegExpFlags(start, end, { - global, - ignoreCase, - multiline, - unicode, - sticky, - dotAll, - hasIndices, - unicodeSets, - }) + this._srcCtx = { source, start, end, kind: "flags" } + this.validateFlagsInternal(source, start, end) } /** @@ -786,7 +736,23 @@ export class RegExpValidator { } | undefined = undefined, ): void { - const mode = this._parseFlagsOptionToMode(uFlagOrFlags, source, end) + this._srcCtx = { source, start, end, kind: "pattern" } + this.validatePatternInternal(source, start, end, uFlagOrFlags) + } + + private validatePatternInternal( + source: string, + start = 0, + end: number = source.length, + uFlagOrFlags: + | boolean // The unicode flag (backward compatibility). + | { + unicode?: boolean + unicodeSets?: boolean + } + | undefined = undefined, + ): void { + const mode = this._parseFlagsOptionToMode(uFlagOrFlags, end) this._unicodeMode = mode.unicodeMode this._nFlag = mode.nFlag @@ -805,6 +771,75 @@ export class RegExpValidator { } } + private validateFlagsInternal( + source: string, + start: number, + end: number, + ): void { + const existingFlags = new Set() + let global = false + let ignoreCase = false + let multiline = false + let sticky = false + let unicode = false + let dotAll = false + let hasIndices = false + let unicodeSets = false + for (let i = start; i < end; ++i) { + const flag = source.charCodeAt(i) + + if (existingFlags.has(flag)) { + this.raise(`Duplicated flag '${source[i]}'`, { index: start }) + } + existingFlags.add(flag) + + if (flag === LATIN_SMALL_LETTER_G) { + global = true + } else if (flag === LATIN_SMALL_LETTER_I) { + ignoreCase = true + } else if (flag === LATIN_SMALL_LETTER_M) { + multiline = true + } else if ( + flag === LATIN_SMALL_LETTER_U && + this.ecmaVersion >= 2015 + ) { + unicode = true + } else if ( + flag === LATIN_SMALL_LETTER_Y && + this.ecmaVersion >= 2015 + ) { + sticky = true + } else if ( + flag === LATIN_SMALL_LETTER_S && + this.ecmaVersion >= 2018 + ) { + dotAll = true + } else if ( + flag === LATIN_SMALL_LETTER_D && + this.ecmaVersion >= 2022 + ) { + hasIndices = true + } else if ( + flag === LATIN_SMALL_LETTER_V && + this.ecmaVersion >= 2024 + ) { + unicodeSets = true + } else { + this.raise(`Invalid flag '${source[i]}'`, { index: start }) + } + } + this.onRegExpFlags(start, end, { + global, + ignoreCase, + multiline, + unicode, + sticky, + dotAll, + hasIndices, + unicodeSets, + }) + } + private _parseFlagsOptionToMode( uFlagOrFlags: | boolean // The unicode flag (backward compatibility). @@ -813,7 +848,6 @@ export class RegExpValidator { unicodeSets?: boolean } | undefined, - source: string, sourceEnd: number, ): { unicodeMode: boolean @@ -837,12 +871,11 @@ export class RegExpValidator { if (unicode && unicodeSets) { // 1. If v is true and u is true, then // a. Let parseResult be a List containing one SyntaxError object. - throw new RegExpSyntaxError( - source, - { unicode, unicodeSets }, - sourceEnd + 1 /* `/` */, - "Invalid regular expression flags", - ) + this.raise("Invalid regular expression flags", { + index: sourceEnd + 1 /* `/` */, + unicode, + unicodeSets, + }) } const unicodeMode = unicode || unicodeSets @@ -856,7 +889,6 @@ export class RegExpValidator { return { unicodeMode, nFlag, unicodeSetsMode } } - // #region Delegate for Options private get strict() { @@ -1164,10 +1196,6 @@ export class RegExpValidator { // #region Delegate for Reader - private get source(): string { - return this._reader.source - } - private get index(): number { return this._reader.index } @@ -1214,14 +1242,19 @@ export class RegExpValidator { // #endregion - private raise(message: string): never { + private raise( + message: string, + context?: { index?: number; unicode?: boolean; unicodeSets?: boolean }, + ): never { throw new RegExpSyntaxError( - this.source, + this._srcCtx!, { - unicode: this._unicodeMode && !this._unicodeSetsMode, - unicodeSets: this._unicodeSetsMode, + unicode: + context?.unicode ?? + (this._unicodeMode && !this._unicodeSetsMode), + unicodeSets: context?.unicodeSets ?? this._unicodeSetsMode, }, - this.index, + context?.index ?? this.index, message, ) } diff --git a/test/validate-error.ts b/test/validate-error.ts new file mode 100644 index 0000000..4cf675b --- /dev/null +++ b/test/validate-error.ts @@ -0,0 +1,176 @@ +import assert from "assert" +import { RegExpValidator } from "../src/index" +import type { RegExpSyntaxError } from "../src/regexp-syntax-error" + +const validator = new RegExpValidator() + +function getErrorForPattern( + source: string, + start: number, + end: number, + flags: { + unicode?: boolean + unicodeSets?: boolean + }, +): RegExpSyntaxError { + try { + validator.validatePattern(source, start, end, flags) + } catch (err) { + const error = err as RegExpSyntaxError + return error + } + return assert.fail("Should fail, but succeeded.") +} + +function getErrorForFlags( + source: string, + start: number, + end: number, +): RegExpSyntaxError { + try { + validator.validateFlags(source, start, end) + } catch (err) { + const error = err as RegExpSyntaxError + return error + } + return assert.fail("Should fail, but succeeded.") +} + +function getErrorForLiteral( + source: string, + start: number, + end: number, +): RegExpSyntaxError { + try { + validator.validateLiteral(source, start, end) + } catch (err) { + const error = err as RegExpSyntaxError + return error + } + return assert.fail("Should fail, but succeeded.") +} + +describe("RegExpValidator#validatePattern error:", () => { + for (const test of [ + { + source: "abcd", + start: 0, + end: 2, + flags: { unicode: true, unicodeSets: true }, + error: { + message: + "Invalid regular expression: /ab/uv: Invalid regular expression flags", + index: 3, + }, + }, + { + source: "[A]", + start: 0, + end: 2, + flags: { unicode: true, unicodeSets: false }, + error: { + message: + "Invalid regular expression: /[A/u: Unterminated character class", + index: 2, + }, + }, + { + source: "[[A]]", + start: 0, + end: 4, + flags: { unicode: false, unicodeSets: true }, + error: { + message: + "Invalid regular expression: /[[A]/v: Unterminated character class", + index: 4, + }, + }, + { + source: " /[[A]/v ", + start: 2, + end: 6, + flags: { unicode: false, unicodeSets: true }, + error: { + message: + "Invalid regular expression: /[[A]/v: Unterminated character class", + index: 6, + }, + }, + ]) { + it(`${JSON.stringify(test)} should throw syntax error.`, () => { + const error = getErrorForPattern( + test.source, + test.start, + test.end, + test.flags, + ) + assert.deepStrictEqual( + { message: error.message, index: error.index }, + test.error, + ) + }) + } +}) + +describe("RegExpValidator#validateFlags error:", () => { + for (const test of [ + { + source: "abcd", + start: 0, + end: 2, + error: { + message: "Invalid regular expression: Invalid flag 'a'", + index: 0, + }, + }, + { + source: "dd", + start: 0, + end: 2, + error: { + message: "Invalid regular expression: Duplicated flag 'd'", + index: 0, + }, + }, + { + source: "/a/dd", + start: 3, + end: 5, + error: { + message: "Invalid regular expression: Duplicated flag 'd'", + index: 3, + }, + }, + ]) { + it(`${JSON.stringify(test)} should throw syntax error.`, () => { + const error = getErrorForFlags(test.source, test.start, test.end) + assert.deepStrictEqual( + { message: error.message, index: error.index }, + test.error, + ) + }) + } +}) + +describe("RegExpValidator#validateLiteral error:", () => { + for (const test of [ + { + source: " /[/ ", + start: 1, + end: 4, + error: { + message: + "Invalid regular expression: /[/: Unterminated character class", + index: 4, + }, + }, + ]) { + it(`${JSON.stringify(test)} should throw syntax error.`, () => { + const error = getErrorForLiteral(test.source, test.start, test.end) + assert.deepStrictEqual( + { message: error.message, index: error.index }, + test.error, + ) + }) + } +})