Skip to content

Shared: support quoted operands in access path components #13441

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
*
* This file is used by the shared data flow library and by the JavaScript libraries
* (which does not use the shared data flow libraries).
*
* An access path consists of a `.`-separated list of components, which each consist
* of an identifier optionally followed by a `[]`-enclosed argument.
*
* The argument can be be enclosed in single quotes. In this case, any character is allowed in-between
* the single quotes, and two consecutive single quotes will be interpreted as an individual single quote.
*/

/**
Expand Down Expand Up @@ -116,7 +122,7 @@ private string getRawToken(AccessPath path, int n) {
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
// Instead use regexpFind to match valid tokens, and supplement with a final length
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
result = path.regexpFind("\\w+(?:\\[(?:[^'\\]]*|'(?:[^']|'')*')\\])?(?=\\.|$)", n, _)
}

/**
Expand Down Expand Up @@ -152,7 +158,7 @@ class AccessPathToken extends string {
AccessPathToken() { this = getRawToken(_, _) }

private string getPart(int part) {
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
result = this.regexpCapture("([^\\[]+)(?:\\[(?:([^'\\]]*)|'((?:[^']|'')*)')\\])?", part)
}

/** Gets the name of the token, such as `Member` from `Member[x]` */
Expand All @@ -162,10 +168,20 @@ class AccessPathToken extends string {
* Gets the argument list, such as `1,2` from `Member[1,2]`,
* or has no result if there are no arguments.
*/
string getArgumentList() { result = this.getPart(2) }
string getArgumentList() {
result = this.getPart(2) or result = this.getPart(3).regexpReplaceAll("''", "'")
}

/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
/**
* Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`, or
* `foo,bar` from `Member['foo,bar']`.
*/
pragma[nomagic]
string getArgument(int n) {
result = this.getPart(2).splitAt(",", n).trim()
or
result = this.getPart(3).replaceAll("''", "'") and n = 0
}

/** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
pragma[nomagic]
Expand Down
26 changes: 21 additions & 5 deletions go/ql/lib/semmle/go/dataflow/internal/AccessPathSyntax.qll
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
*
* This file is used by the shared data flow library and by the JavaScript libraries
* (which does not use the shared data flow libraries).
*
* An access path consists of a `.`-separated list of components, which each consist
* of an identifier optionally followed by a `[]`-enclosed argument.
*
* The argument can be be enclosed in single quotes. In this case, any character is allowed in-between
* the single quotes, and two consecutive single quotes will be interpreted as an individual single quote.
*/

/**
Expand Down Expand Up @@ -116,7 +122,7 @@ private string getRawToken(AccessPath path, int n) {
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
// Instead use regexpFind to match valid tokens, and supplement with a final length
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
result = path.regexpFind("\\w+(?:\\[(?:[^'\\]]*|'(?:[^']|'')*')\\])?(?=\\.|$)", n, _)
}

/**
Expand Down Expand Up @@ -152,7 +158,7 @@ class AccessPathToken extends string {
AccessPathToken() { this = getRawToken(_, _) }

private string getPart(int part) {
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
result = this.regexpCapture("([^\\[]+)(?:\\[(?:([^'\\]]*)|'((?:[^']|'')*)')\\])?", part)
}

/** Gets the name of the token, such as `Member` from `Member[x]` */
Expand All @@ -162,10 +168,20 @@ class AccessPathToken extends string {
* Gets the argument list, such as `1,2` from `Member[1,2]`,
* or has no result if there are no arguments.
*/
string getArgumentList() { result = this.getPart(2) }
string getArgumentList() {
result = this.getPart(2) or result = this.getPart(3).regexpReplaceAll("''", "'")
}

/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
/**
* Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`, or
* `foo,bar` from `Member['foo,bar']`.
*/
pragma[nomagic]
string getArgument(int n) {
result = this.getPart(2).splitAt(",", n).trim()
or
result = this.getPart(3).replaceAll("''", "'") and n = 0
}

/** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
pragma[nomagic]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
*
* This file is used by the shared data flow library and by the JavaScript libraries
* (which does not use the shared data flow libraries).
*
* An access path consists of a `.`-separated list of components, which each consist
* of an identifier optionally followed by a `[]`-enclosed argument.
*
* The argument can be be enclosed in single quotes. In this case, any character is allowed in-between
* the single quotes, and two consecutive single quotes will be interpreted as an individual single quote.
*/

/**
Expand Down Expand Up @@ -116,7 +122,7 @@ private string getRawToken(AccessPath path, int n) {
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
// Instead use regexpFind to match valid tokens, and supplement with a final length
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
result = path.regexpFind("\\w+(?:\\[(?:[^'\\]]*|'(?:[^']|'')*')\\])?(?=\\.|$)", n, _)
}

/**
Expand Down Expand Up @@ -152,7 +158,7 @@ class AccessPathToken extends string {
AccessPathToken() { this = getRawToken(_, _) }

private string getPart(int part) {
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
result = this.regexpCapture("([^\\[]+)(?:\\[(?:([^'\\]]*)|'((?:[^']|'')*)')\\])?", part)
}

/** Gets the name of the token, such as `Member` from `Member[x]` */
Expand All @@ -162,10 +168,20 @@ class AccessPathToken extends string {
* Gets the argument list, such as `1,2` from `Member[1,2]`,
* or has no result if there are no arguments.
*/
string getArgumentList() { result = this.getPart(2) }
string getArgumentList() {
result = this.getPart(2) or result = this.getPart(3).regexpReplaceAll("''", "'")
}

/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
/**
* Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`, or
* `foo,bar` from `Member['foo,bar']`.
*/
pragma[nomagic]
string getArgument(int n) {
result = this.getPart(2).splitAt(",", n).trim()
or
result = this.getPart(3).replaceAll("''", "'") and n = 0
}

/** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
pragma[nomagic]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
*
* This file is used by the shared data flow library and by the JavaScript libraries
* (which does not use the shared data flow libraries).
*
* An access path consists of a `.`-separated list of components, which each consist
* of an identifier optionally followed by a `[]`-enclosed argument.
*
* The argument can be be enclosed in single quotes. In this case, any character is allowed in-between
* the single quotes, and two consecutive single quotes will be interpreted as an individual single quote.
*/

/**
Expand Down Expand Up @@ -116,7 +122,7 @@ private string getRawToken(AccessPath path, int n) {
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
// Instead use regexpFind to match valid tokens, and supplement with a final length
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
result = path.regexpFind("\\w+(?:\\[(?:[^'\\]]*|'(?:[^']|'')*')\\])?(?=\\.|$)", n, _)
}

/**
Expand Down Expand Up @@ -152,7 +158,7 @@ class AccessPathToken extends string {
AccessPathToken() { this = getRawToken(_, _) }

private string getPart(int part) {
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
result = this.regexpCapture("([^\\[]+)(?:\\[(?:([^'\\]]*)|'((?:[^']|'')*)')\\])?", part)
}

/** Gets the name of the token, such as `Member` from `Member[x]` */
Expand All @@ -162,10 +168,20 @@ class AccessPathToken extends string {
* Gets the argument list, such as `1,2` from `Member[1,2]`,
* or has no result if there are no arguments.
*/
string getArgumentList() { result = this.getPart(2) }
string getArgumentList() {
result = this.getPart(2) or result = this.getPart(3).regexpReplaceAll("''", "'")
}

/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
/**
* Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`, or
* `foo,bar` from `Member['foo,bar']`.
*/
pragma[nomagic]
string getArgument(int n) {
result = this.getPart(2).splitAt(",", n).trim()
or
result = this.getPart(3).replaceAll("''", "'") and n = 0
}

/** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
pragma[nomagic]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
*
* This file is used by the shared data flow library and by the JavaScript libraries
* (which does not use the shared data flow libraries).
*
* An access path consists of a `.`-separated list of components, which each consist
* of an identifier optionally followed by a `[]`-enclosed argument.
*
* The argument can be be enclosed in single quotes. In this case, any character is allowed in-between
* the single quotes, and two consecutive single quotes will be interpreted as an individual single quote.
*/

/**
Expand Down Expand Up @@ -116,7 +122,7 @@ private string getRawToken(AccessPath path, int n) {
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
// Instead use regexpFind to match valid tokens, and supplement with a final length
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
result = path.regexpFind("\\w+(?:\\[(?:[^'\\]]*|'(?:[^']|'')*')\\])?(?=\\.|$)", n, _)
}

/**
Expand Down Expand Up @@ -152,7 +158,7 @@ class AccessPathToken extends string {
AccessPathToken() { this = getRawToken(_, _) }

private string getPart(int part) {
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
result = this.regexpCapture("([^\\[]+)(?:\\[(?:([^'\\]]*)|'((?:[^']|'')*)')\\])?", part)
}

/** Gets the name of the token, such as `Member` from `Member[x]` */
Expand All @@ -162,10 +168,20 @@ class AccessPathToken extends string {
* Gets the argument list, such as `1,2` from `Member[1,2]`,
* or has no result if there are no arguments.
*/
string getArgumentList() { result = this.getPart(2) }
string getArgumentList() {
result = this.getPart(2) or result = this.getPart(3).regexpReplaceAll("''", "'")
}

/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
/**
* Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`, or
* `foo,bar` from `Member['foo,bar']`.
*/
pragma[nomagic]
string getArgument(int n) {
result = this.getPart(2).splitAt(",", n).trim()
or
result = this.getPart(3).replaceAll("''", "'") and n = 0
}

/** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
pragma[nomagic]
Expand Down
26 changes: 21 additions & 5 deletions ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
*
* This file is used by the shared data flow library and by the JavaScript libraries
* (which does not use the shared data flow libraries).
*
* An access path consists of a `.`-separated list of components, which each consist
* of an identifier optionally followed by a `[]`-enclosed argument.
*
* The argument can be be enclosed in single quotes. In this case, any character is allowed in-between
* the single quotes, and two consecutive single quotes will be interpreted as an individual single quote.
*/

/**
Expand Down Expand Up @@ -116,7 +122,7 @@ private string getRawToken(AccessPath path, int n) {
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
// Instead use regexpFind to match valid tokens, and supplement with a final length
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
result = path.regexpFind("\\w+(?:\\[(?:[^'\\]]*|'(?:[^']|'')*')\\])?(?=\\.|$)", n, _)
}

/**
Expand Down Expand Up @@ -152,7 +158,7 @@ class AccessPathToken extends string {
AccessPathToken() { this = getRawToken(_, _) }

private string getPart(int part) {
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
result = this.regexpCapture("([^\\[]+)(?:\\[(?:([^'\\]]*)|'((?:[^']|'')*)')\\])?", part)
}

/** Gets the name of the token, such as `Member` from `Member[x]` */
Expand All @@ -162,10 +168,20 @@ class AccessPathToken extends string {
* Gets the argument list, such as `1,2` from `Member[1,2]`,
* or has no result if there are no arguments.
*/
string getArgumentList() { result = this.getPart(2) }
string getArgumentList() {
result = this.getPart(2) or result = this.getPart(3).regexpReplaceAll("''", "'")
}

/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
/**
* Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`, or
* `foo,bar` from `Member['foo,bar']`.
*/
pragma[nomagic]
string getArgument(int n) {
result = this.getPart(2).splitAt(",", n).trim()
or
result = this.getPart(3).replaceAll("''", "'") and n = 0
}

/** Gets the `n`th argument to this `name` token, such as `x` or `y` from `Member[x,y]`. */
pragma[nomagic]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ edges
| summaries.rb:1:1:1:7 | tainted | summaries.rb:147:16:147:22 | tainted |
| summaries.rb:1:1:1:7 | tainted | summaries.rb:150:39:150:45 | tainted |
| summaries.rb:1:1:1:7 | tainted | summaries.rb:150:39:150:45 | tainted |
| summaries.rb:1:1:1:7 | tainted | summaries.rb:154:5:154:11 | tainted |
| summaries.rb:1:1:1:7 | tainted | summaries.rb:154:5:154:11 | tainted |
| summaries.rb:1:1:1:7 | tainted | summaries.rb:155:10:155:16 | ... = ... |
| summaries.rb:1:1:1:7 | tainted | summaries.rb:155:10:155:16 | ... = ... |
| summaries.rb:1:11:1:36 | call to identity | summaries.rb:1:1:1:7 | tainted |
| summaries.rb:1:11:1:36 | call to identity | summaries.rb:1:1:1:7 | tainted |
| summaries.rb:1:20:1:36 | call to source | summaries.rb:1:11:1:36 | call to identity |
Expand Down Expand Up @@ -231,6 +235,8 @@ edges
| summaries.rb:122:16:122:22 | [post] tainted | summaries.rb:145:26:145:32 | tainted |
| summaries.rb:122:16:122:22 | [post] tainted | summaries.rb:147:16:147:22 | tainted |
| summaries.rb:122:16:122:22 | [post] tainted | summaries.rb:150:39:150:45 | tainted |
| summaries.rb:122:16:122:22 | [post] tainted | summaries.rb:154:5:154:11 | tainted |
| summaries.rb:122:16:122:22 | [post] tainted | summaries.rb:155:10:155:16 | ... = ... |
| summaries.rb:122:16:122:22 | tainted | summaries.rb:122:16:122:22 | [post] tainted |
| summaries.rb:122:16:122:22 | tainted | summaries.rb:122:25:122:25 | [post] y |
| summaries.rb:122:16:122:22 | tainted | summaries.rb:122:33:122:33 | [post] z |
Expand Down Expand Up @@ -474,6 +480,10 @@ nodes
| summaries.rb:147:16:147:22 | tainted | semmle.label | tainted |
| summaries.rb:150:39:150:45 | tainted | semmle.label | tainted |
| summaries.rb:150:39:150:45 | tainted | semmle.label | tainted |
| summaries.rb:154:5:154:11 | tainted | semmle.label | tainted |
| summaries.rb:154:5:154:11 | tainted | semmle.label | tainted |
| summaries.rb:155:10:155:16 | ... = ... | semmle.label | ... = ... |
| summaries.rb:155:10:155:16 | ... = ... | semmle.label | ... = ... |
subpaths
invalidSpecComponent
#select
Expand Down Expand Up @@ -573,6 +583,10 @@ invalidSpecComponent
| summaries.rb:147:16:147:22 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:147:16:147:22 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source |
| summaries.rb:150:39:150:45 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:150:39:150:45 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source |
| summaries.rb:150:39:150:45 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:150:39:150:45 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source |
| summaries.rb:154:5:154:11 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:154:5:154:11 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source |
| summaries.rb:154:5:154:11 | tainted | summaries.rb:1:20:1:36 | call to source | summaries.rb:154:5:154:11 | tainted | $@ | summaries.rb:1:20:1:36 | call to source | call to source |
| summaries.rb:155:10:155:16 | ... = ... | summaries.rb:1:20:1:36 | call to source | summaries.rb:155:10:155:16 | ... = ... | $@ | summaries.rb:1:20:1:36 | call to source | call to source |
| summaries.rb:155:10:155:16 | ... = ... | summaries.rb:1:20:1:36 | call to source | summaries.rb:155:10:155:16 | ... = ... | $@ | summaries.rb:1:20:1:36 | call to source | call to source |
warning
| CSV type row should have 3 columns but has 1: TooFewColumns |
| CSV type row should have 3 columns but has 6: TooManyColumns;;Member[Foo].Instance;too;many;columns |
Expand Down
2 changes: 2 additions & 0 deletions ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ private class SinkFromModel extends ModelInput::SinkModelCsv {
"Foo!;Method[getSinks].ReturnValue.Element[any].Method[mySink].Argument[0];test-sink", //
"Foo!;Method[arraySink].Argument[0].Element[any];test-sink", //
"Foo!;Method[secondArrayElementIsSink].Argument[0].Element[1];test-sink", //
"Foo!;Method['[]'].Argument[0];test-sink", //
"Bar!;Method['[]='].Argument[1];test-sink", //
]
}
}
Expand Down
3 changes: 3 additions & 0 deletions ruby/ql/test/library-tests/dataflow/summaries/summaries.rb
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,6 @@ def userDefinedFunction(x, y)
Foo.secondArrayElementIsSink(["safe", tainted, "safe"]) # $ hasValueFlow=tainted
Foo.secondArrayElementIsSink(["safe", "safe", tainted])
Foo.secondArrayElementIsSink([tainted] * 10) # $ MISSING: hasValueFlow=tainted

Foo[tainted] # $ hasValueFlow=tainted
Bar[1] = tainted # $ hasValueFlow=tainted
Loading