diff --git a/README.md b/README.md index a4534545..ccb65669 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco | [regexp/no-lazy-ends](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-lazy-ends.html) | disallow lazy quantifiers at the end of an expression | | | [regexp/no-optional-assertion](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-optional-assertion.html) | disallow optional assertions | | | [regexp/no-potentially-useless-backreference](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-potentially-useless-backreference.html) | disallow backreferences that reference a group that might not be matched | | +| [regexp/no-super-linear-backtracking](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-super-linear-backtracking.html) | disallow exponential and polynomial backtracking | :wrench: | | [regexp/no-useless-assertions](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-assertions.html) | disallow assertions that are known to always accept (or reject) | | | [regexp/no-useless-backreference](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-backreference.html) | disallow useless backreferences in regular expressions | :star: | | [regexp/no-useless-dollar-replacements](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-dollar-replacements.html) | disallow useless `$` replacements in replacement string | | diff --git a/docs/rules/README.md b/docs/rules/README.md index 53656e64..6bd65caf 100644 --- a/docs/rules/README.md +++ b/docs/rules/README.md @@ -23,6 +23,7 @@ The rules with the following star :star: are included in the `plugin:regexp/reco | [regexp/no-lazy-ends](./no-lazy-ends.md) | disallow lazy quantifiers at the end of an expression | | | [regexp/no-optional-assertion](./no-optional-assertion.md) | disallow optional assertions | | | [regexp/no-potentially-useless-backreference](./no-potentially-useless-backreference.md) | disallow backreferences that reference a group that might not be matched | | +| [regexp/no-super-linear-backtracking](./no-super-linear-backtracking.md) | disallow exponential and polynomial backtracking | :wrench: | | [regexp/no-useless-assertions](./no-useless-assertions.md) | disallow assertions that are known to always accept (or reject) | | | [regexp/no-useless-backreference](./no-useless-backreference.md) | disallow useless backreferences in regular expressions | :star: | | [regexp/no-useless-dollar-replacements](./no-useless-dollar-replacements.md) | disallow useless `$` replacements in replacement string | | diff --git a/docs/rules/no-super-linear-backtracking.md b/docs/rules/no-super-linear-backtracking.md new file mode 100644 index 00000000..d80edfac --- /dev/null +++ b/docs/rules/no-super-linear-backtracking.md @@ -0,0 +1,92 @@ +--- +pageClass: "rule-details" +sidebarDepth: 0 +title: "regexp/no-super-linear-backtracking" +description: "disallow exponential and polynomial backtracking" +--- +# regexp/no-super-linear-backtracking + +> disallow exponential and polynomial backtracking + +- :exclamation: ***This rule has not been released yet.*** +- :wrench: The `--fix` option on the [command line](https://eslint.org/docs/user-guide/command-line-interface#fixing-problems) can automatically fix some of the problems reported by this rule. + +## :book: Rule Details + +This rule reports cases of exponential and polynomial backtracking. + +These types of backtracking almost always cause an exponential or polynomial worst-case runtime. This super-linear worst-case runtime can be exploited by attackers in what is called [Regular expression Denial of Service - ReDoS][1]. + + + +```js +/* eslint regexp/no-super-linear-backtracking: "error" */ + +/* ✓ GOOD */ +var foo = /a*b+a*$/; +var foo = /(?:a+)?/; + +/* ✗ BAD */ +var foo = /(?:a+)+$/; +var foo = /a*b?a*$/; +var foo = /(?:a|b|c+)*$/; +// not all cases can automatically be fixed +var foo = /\s*(.*?)(?=:)/; +var foo = /.+?(?=\s*=)/; +``` + + + +### Limitations + +The rule only implements a very simplistic detection method and can only detect very simple cases of super-linear backtracking right now. + +While the detection will improve in the future, this rule will never be able to perfectly detect all cases super-linear backtracking. + + +## :wrench: Options + +```json +{ + "regexp/no-super-linear-backtracking": ["error", { + "report": "certain" + }] +} +``` + +### `report` + +Every input string that exploits super-linear worst-case runtime can be separated into 3 parts: + +1. A prefix to leads to exploitable part of the regex. +2. A non-empty string that will be repeated to exploit the ambiguity. +3. A rejecting suffix that forces the regex engine to backtrack. + +For some regexes it is not possible to find a rejecting suffix even though the regex contains exploitable ambiguity (e.g. `/(?:a+)+/`). These regexes are safe as long as they are used as is. However, regexes can also be used as building blocks to create more complex regexes. In this case, the ambiguity might cause super-linear backtracking in the composite regex. + +This options control whether ambiguity that might cause super-linear backtracking will be reported. + +- `report: "certain"` (_default_) + + Only certain cases of super-linear backtracking will be reported. + + This means that ambiguity will only be reported if this rule can prove that there exists a rejecting suffix. + +- `report: "potential"` + + All certain and potential cases of super-linear backtracking will be reported. + + Potential cases are ones where a rejecting might be possible. Whether the reported potential cases are false positives or not has to be decided by the developer. + +## :books: Further reading + +- [Regular expression Denial of Service - ReDoS][1] +- [scslre] + +[1]: https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS +[scslre]: https://github.com/RunDevelopment/scslre + +## :mag: Implementation + +- [Rule source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/lib/rules/no-super-linear-backtracking.ts) +- [Test source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/tests/lib/rules/no-super-linear-backtracking.ts) diff --git a/lib/rules/no-super-linear-backtracking.ts b/lib/rules/no-super-linear-backtracking.ts new file mode 100644 index 00000000..a3ebbbd0 --- /dev/null +++ b/lib/rules/no-super-linear-backtracking.ts @@ -0,0 +1,166 @@ +import type { RegExpVisitor } from "regexpp/visitor" +import type { RegExpContext } from "../utils" +import { createRule, defineRegexpVisitor } from "../utils" +import { UsageOfPattern } from "../utils/get-usage-of-pattern" +import type { ParsedLiteral } from "scslre" +import { analyse } from "scslre" +import type { Position, SourceLocation } from "estree" + +/** + * Returns the combined source location of the two given locations. + */ +function unionLocations(a: SourceLocation, b: SourceLocation): SourceLocation { + /** x < y */ + function less(x: Position, y: Position): boolean { + if (x.line < y.line) { + return true + } else if (x.line > y.line) { + return false + } + return x.column < y.column + } + + return { + start: { ...(less(a.start, b.start) ? a.start : b.start) }, + end: { ...(less(a.end, b.end) ? b.end : a.end) }, + } +} + +/** + * Create a parsed literal object as required by the scslre library. + */ +function getParsedLiteral(context: RegExpContext): ParsedLiteral { + const { flags, flagsString, patternAst } = context + + return { + pattern: patternAst, + flags: { + type: "Flags", + raw: flagsString ?? "", + parent: null, + start: NaN, + end: NaN, + dotAll: flags.dotAll ?? false, + global: flags.dotAll ?? false, + ignoreCase: flags.dotAll ?? false, + multiline: flags.dotAll ?? false, + sticky: flags.dotAll ?? false, + unicode: flags.dotAll ?? false, + }, + } +} + +export default createRule("no-super-linear-backtracking", { + meta: { + docs: { + description: "disallow exponential and polynomial backtracking", + category: "Possible Errors", + // TODO Switch to recommended in the major version. + // recommended: true, + recommended: false, + }, + fixable: "code", + schema: [ + { + type: "object", + properties: { + report: { + enum: ["certain", "potential"], + }, + }, + additionalProperties: false, + }, + ], + messages: { + self: + "This quantifier can reach itself via the loop '{{parent}}'." + + " Using any string accepted by {{attack}}, this can be exploited to cause at least polynomial backtracking." + + "{{exp}}", + trade: + "The quantifier '{{start}}' can exchange characters with '{{end}}'." + + " Using any string accepted by {{attack}}, this can be exploited to cause at least polynomial backtracking." + + "{{exp}}", + }, + type: "problem", + }, + create(context) { + const reportUncertain = + (context.options[0]?.report ?? "certain") === "potential" + + /** + * Create visitor + */ + function createVisitor( + regexpContext: RegExpContext, + ): RegExpVisitor.Handlers { + const { + node, + patternAst, + flags, + getRegexpLocation, + fixReplaceNode, + getUsageOfPattern, + } = regexpContext + + const result = analyse(getParsedLiteral(regexpContext), { + reportTypes: { Move: false }, + assumeRejectingSuffix: + reportUncertain && + getUsageOfPattern() !== UsageOfPattern.whole, + }) + + for (const report of result.reports) { + const exp = report.exponential + ? " This is going to cause exponential backtracking resulting in exponential worst-case runtime behavior." + : getUsageOfPattern() !== UsageOfPattern.whole + ? " This might cause exponential backtracking." + : "" + + const attack = `/${report.character.literal.source}+/${ + flags.ignoreCase ? "i" : "" + }` + + const fix = fixReplaceNode( + patternAst, + () => report.fix()?.source ?? null, + ) + + if (report.type === "Self") { + context.report({ + node, + loc: getRegexpLocation(report.quant), + messageId: "self", + data: { + exp, + attack, + parent: report.parentQuant.raw, + }, + fix, + }) + } else if (report.type === "Trade") { + context.report({ + node, + loc: unionLocations( + getRegexpLocation(report.startQuant), + getRegexpLocation(report.endQuant), + ), + messageId: "trade", + data: { + exp, + attack, + start: report.startQuant.raw, + end: report.endQuant.raw, + }, + fix, + }) + } + } + + return {} + } + + return defineRegexpVisitor(context, { + createVisitor, + }) + }, +}) diff --git a/lib/utils/rules.ts b/lib/utils/rules.ts index a0841825..08e1a723 100644 --- a/lib/utils/rules.ts +++ b/lib/utils/rules.ts @@ -22,6 +22,7 @@ import noOctal from "../rules/no-octal" import noOptionalAssertion from "../rules/no-optional-assertion" import noPotentiallyUselessBackreference from "../rules/no-potentially-useless-backreference" import noStandaloneBackslash from "../rules/no-standalone-backslash" +import noSuperLinearBacktracking from "../rules/no-super-linear-backtracking" import noTriviallyNestedAssertion from "../rules/no-trivially-nested-assertion" import noTriviallyNestedQuantifier from "../rules/no-trivially-nested-quantifier" import noUnusedCapturingGroup from "../rules/no-unused-capturing-group" @@ -87,6 +88,7 @@ export const rules = [ noOptionalAssertion, noPotentiallyUselessBackreference, noStandaloneBackslash, + noSuperLinearBacktracking, noTriviallyNestedAssertion, noTriviallyNestedQuantifier, noUnusedCapturingGroup, diff --git a/package-lock.json b/package-lock.json index 080e8ba9..6cea41be 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12574,6 +12574,16 @@ "ajv-keywords": "^3.5.2" } }, + "scslre": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/scslre/-/scslre-0.1.5.tgz", + "integrity": "sha512-PUWMog0DhV8dYB9zWV/YDDS9AT8pBbR12cWbqqzwRrhFpOGwu0OOFafFpUFD0Iw0+ZY5D4EpU4VWJai0SGwOCQ==", + "requires": { + "refa": "^0.8.0", + "regexp-ast-analysis": "^0.2.2", + "regexpp": "^3.1.0" + } + }, "section-matter": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/section-matter/-/section-matter-1.0.0.tgz", diff --git a/package.json b/package.json index 129fc623..03f4e451 100644 --- a/package.json +++ b/package.json @@ -90,6 +90,7 @@ "jsdoctypeparser": "^9.0.0", "refa": "^0.8.0", "regexp-ast-analysis": "^0.2.2", - "regexpp": "^3.1.0" + "regexpp": "^3.1.0", + "scslre": "^0.1.5" } } diff --git a/tests/lib/rules/no-super-linear-backtracking.ts b/tests/lib/rules/no-super-linear-backtracking.ts new file mode 100644 index 00000000..5bdb8e8b --- /dev/null +++ b/tests/lib/rules/no-super-linear-backtracking.ts @@ -0,0 +1,57 @@ +import { RuleTester } from "eslint" +import rule from "../../../lib/rules/no-super-linear-backtracking" + +const tester = new RuleTester({ + parserOptions: { + ecmaVersion: 2020, + sourceType: "module", + }, +}) + +tester.run("no-super-linear-backtracking", rule as any, { + valid: [ + String.raw`/regexp/`, + String.raw`/a+b+a+b+/`, + String.raw`/\w+\b[\w-]+/`, + ], + invalid: [ + // self + { + code: String.raw`/b(?:a+)+b/`, + output: String.raw`/ba+b/`, + errors: [ + "This quantifier can reach itself via the loop '(?:a+)+'. Using any string accepted by /a+/, this can be exploited to cause at least polynomial backtracking. This is going to cause exponential backtracking resulting in exponential worst-case runtime behavior.", + ], + }, + { + code: String.raw`/(?:ba+|a+b){2}/`, + output: null, + errors: [ + "The quantifier 'a+' can exchange characters with 'a+'. Using any string accepted by /a+/, this can be exploited to cause at least polynomial backtracking. This might cause exponential backtracking.", + ], + }, + + // trade + { + code: String.raw`/\ba+a+$/`, + output: String.raw`/\ba{2,}$/`, + errors: [ + "The quantifier 'a+' can exchange characters with 'a+'. Using any string accepted by /a+/, this can be exploited to cause at least polynomial backtracking. This might cause exponential backtracking.", + ], + }, + { + code: String.raw`/\b\w+a\w+$/`, + output: String.raw`/\b\w[\dA-Z_b-z]*a\w+$/`, + errors: [ + "The quantifier '\\w+' can exchange characters with '\\w+'. Using any string accepted by /a+/, this can be exploited to cause at least polynomial backtracking. This might cause exponential backtracking.", + ], + }, + { + code: String.raw`/\b\w+a?b{4}\w+$/`, + output: null, + errors: [ + "The quantifier '\\w+' can exchange characters with '\\w+'. Using any string accepted by /b+/, this can be exploited to cause at least polynomial backtracking. This might cause exponential backtracking.", + ], + }, + ], +})