Skip to content

JS: Generate flow summaries from summaryModels; only generate steps as a fallback #19445

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,7 @@ module Stage {
cached
predicate backref() { optionalStep(_, _, _) }
}

predicate unsupportedCallable = Private::unsupportedCallable/1;

predicate unsupportedCallable = Private::unsupportedCallable/4;
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
private import javascript
private import internal.ApiGraphModels as Shared
private import internal.ApiGraphModelsSpecific as Specific
private import semmle.javascript.dataflow.internal.FlowSummaryPrivate
private import semmle.javascript.endpoints.EndpointNaming as EndpointNaming
import Shared::ModelInput as ModelInput
import Shared::ModelOutput as ModelOutput
Expand All @@ -45,12 +46,94 @@ private class ThreatModelSourceFromDataExtension extends ThreatModelSource::Rang
}
}

private class SummarizedCallableFromModel extends DataFlow::SummarizedCallable {
string type;
string path;

SummarizedCallableFromModel() {
ModelOutput::relevantSummaryModel(type, path, _, _, _, _) and
this = type + ";" + path
}

override DataFlow::InvokeNode getACall() { ModelOutput::resolvedSummaryBase(type, path, result) }

override predicate propagatesFlow(
string input, string output, boolean preservesValue, string model
) {
exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind, model) |
kind = "value" and
preservesValue = true
or
kind = "taint" and
preservesValue = false
)
}

predicate hasTypeAndPath(string type_, string path_) { type = type_ and path = path_ }

predicate isUnsupportedByFlowSummaries() { unsupportedCallable(this) }
}

private predicate shouldInduceStepsFromSummary(string type, string path) {
exists(SummarizedCallableFromModel callable |
callable.isUnsupportedByFlowSummaries() and
callable.hasTypeAndPath(type, path)
)
}

/**
* Holds if `path` is an input or output spec for a summary with the given `base` node.
*/
pragma[nomagic]
private predicate relevantInputOutputPath(API::InvokeNode base, AccessPath inputOrOutput) {
exists(string type, string input, string output, string path |
// If the summary for 'callable' could not be handled as a flow summary, we need to evaluate
// its inputs and outputs to a set of nodes, so we can generate steps instead.
shouldInduceStepsFromSummary(type, path) and
ModelOutput::resolvedSummaryBase(type, path, base) and
ModelOutput::relevantSummaryModel(type, path, input, output, _, _) and
inputOrOutput = [input, output]
)
}

/**
* Gets the API node for the first `n` tokens of the given input/output path, evaluated relative to `baseNode`.
*/
private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path, int n) {
relevantInputOutputPath(baseNode, path) and
(
n = 1 and
result = Shared::getSuccessorFromInvoke(baseNode, path.getToken(0))
or
result =
Shared::getSuccessorFromNode(getNodeFromInputOutputPath(baseNode, path, n - 1),
path.getToken(n - 1))
)
}

/**
* Gets the API node for the given input/output path, evaluated relative to `baseNode`.
*/
private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path) {
result = getNodeFromInputOutputPath(baseNode, path, path.getNumToken())
}

private predicate summaryStep(API::Node pred, API::Node succ, string kind) {
exists(string type, string path, API::InvokeNode base, AccessPath input, AccessPath output |
shouldInduceStepsFromSummary(type, path) and
ModelOutput::relevantSummaryModel(type, path, input, output, kind, _) and
ModelOutput::resolvedSummaryBase(type, path, base) and
pred = getNodeFromInputOutputPath(base, input) and
succ = getNodeFromInputOutputPath(base, output)
)
}

/**
* Like `ModelOutput::summaryStep` but with API nodes mapped to data-flow nodes.
*/
private predicate summaryStepNodes(DataFlow::Node pred, DataFlow::Node succ, string kind) {
exists(API::Node predNode, API::Node succNode |
Specific::summaryStep(predNode, succNode, kind) and
summaryStep(predNode, succNode, kind) and
pred = predNode.asSink() and
succ = succNode.asSource()
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,51 +272,6 @@ predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPat
)
}

/**
* Holds if `path` is an input or output spec for a summary with the given `base` node.
*/
pragma[nomagic]
private predicate relevantInputOutputPath(API::InvokeNode base, AccessPath inputOrOutput) {
exists(string type, string input, string output, string path |
ModelOutput::relevantSummaryModel(type, path, input, output, _, _) and
ModelOutput::resolvedSummaryBase(type, path, base) and
inputOrOutput = [input, output]
)
}

/**
* Gets the API node for the first `n` tokens of the given input/output path, evaluated relative to `baseNode`.
*/
private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path, int n) {
relevantInputOutputPath(baseNode, path) and
(
n = 1 and
result = getSuccessorFromInvoke(baseNode, path.getToken(0))
or
result =
getSuccessorFromNode(getNodeFromInputOutputPath(baseNode, path, n - 1), path.getToken(n - 1))
)
}

/**
* Gets the API node for the given input/output path, evaluated relative to `baseNode`.
*/
private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path) {
result = getNodeFromInputOutputPath(baseNode, path, path.getNumToken())
}

/**
* Holds if a CSV summary contributed the step `pred -> succ` of the given `kind`.
*/
predicate summaryStep(API::Node pred, API::Node succ, string kind) {
exists(string type, string path, API::InvokeNode base, AccessPath input, AccessPath output |
ModelOutput::relevantSummaryModel(type, path, input, output, kind, _) and
ModelOutput::resolvedSummaryBase(type, path, base) and
pred = getNodeFromInputOutputPath(base, input) and
succ = getNodeFromInputOutputPath(base, output)
)
}

class InvokeNode = API::InvokeNode;

/** Gets an `InvokeNode` corresponding to an invocation of `node`. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ function strToStr() {
}

function strToArray() {
sink(s.chop(source("s1"), 3)); // $ MISSING: hasTaintFlow=s1
sink(s.chop(source("s1"), 3)); // $ hasTaintFlow=s1
sink(s.chars(source("s2"))[0]); // $ hasTaintFlow=s2
sink(s.words(source("s3"))[0]); // $ hasTaintFlow=s3
sink(s.lines(source("s7"))[0]); // $ hasTaintFlow=s7
Expand Down Expand Up @@ -97,7 +97,7 @@ function multiSource() {

function chaining() {
sink(s(source("s1"))
.slugify().capitalize().decapitalize().clean().cleanDiacritics()
.slugify().capitalize().decapitalize().clean().cleanDiacritics()
.swapCase().escapeHTML().unescapeHTML().wrap().dedent()
.reverse().pred().succ().titleize().camelize().classify()
.underscored().dasherize().humanize().trim().ltrim().rtrim()
Expand All @@ -119,8 +119,8 @@ function chaining() {
.q(source("s17")).ljust(10, source("s18"))
.rjust(10, source("s19"))); // $ hasTaintFlow=s16 hasTaintFlow=s17 hasTaintFlow=s18 hasTaintFlow=s19

sink(s(source("s20")).tap(function(value) {
return value + source("s21");
sink(s(source("s20")).tap(function(value) {
return value + source("s21");
}).value()); // $ hasTaintFlow=s20 hasTaintFlow=s21
}

Expand Down
17 changes: 17 additions & 0 deletions javascript/ql/test/library-tests/frameworks/data/test.expected
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
legacyDataFlowDifference
| test.js:5:30:5:37 | source() | test.js:5:8:5:38 | testlib ... urce()) | only flow with NEW data flow library |
| test.js:6:22:6:29 | source() | test.js:6:8:6:30 | preserv ... urce()) | only flow with NEW data flow library |
| test.js:7:41:7:48 | source() | test.js:7:8:7:49 | require ... urce()) | only flow with NEW data flow library |
| test.js:11:38:11:45 | source() | test.js:11:8:11:55 | testlib ... , 1, 1) | only flow with NEW data flow library |
| test.js:13:44:13:51 | source() | test.js:13:8:13:55 | testlib ... e(), 1) | only flow with NEW data flow library |
| test.js:17:47:17:54 | source() | test.js:17:8:17:61 | testlib ... , 1, 1) | only flow with NEW data flow library |
| test.js:18:50:18:57 | source() | test.js:18:8:18:61 | testlib ... e(), 1) | only flow with NEW data flow library |
| test.js:19:53:19:60 | source() | test.js:19:8:19:61 | testlib ... urce()) | only flow with NEW data flow library |
| test.js:21:34:21:41 | source() | test.js:21:8:21:51 | testlib ... , 1, 1) | only flow with NEW data flow library |
| test.js:22:37:22:44 | source() | test.js:22:8:22:51 | testlib ... , 1, 1) | only flow with NEW data flow library |
| test.js:23:40:23:47 | source() | test.js:23:8:23:51 | testlib ... e(), 1) | only flow with NEW data flow library |
| test.js:24:43:24:50 | source() | test.js:24:8:24:51 | testlib ... urce()) | only flow with NEW data flow library |
| test.js:31:29:31:36 | source() | test.js:32:10:32:10 | y | only flow with NEW data flow library |
| test.js:37:29:37:36 | source() | test.js:38:10:38:10 | y | only flow with NEW data flow library |
| test.js:43:29:43:36 | source() | test.js:44:10:44:10 | y | only flow with NEW data flow library |
| test.js:47:33:47:40 | source() | test.js:49:10:49:13 | this | only flow with NEW data flow library |
| test.js:102:16:102:34 | testlib.getSource() | test.js:104:8:104:24 | source.continue() | only flow with NEW data flow library |
consistencyIssue
taintFlow
| guardedRouteHandler.js:6:10:6:28 | req.injectedReqData | guardedRouteHandler.js:6:10:6:28 | req.injectedReqData |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ edges
| ReflectedXss.js:22:19:22:26 | req.body | ReflectedXss.js:22:12:22:27 | marked(req.body) | provenance | |
| ReflectedXss.js:29:7:32:4 | mytable | ReflectedXss.js:33:12:33:18 | mytable | provenance | |
| ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | ReflectedXss.js:29:7:32:4 | mytable | provenance | |
| ReflectedXss.js:31:14:31:21 | req.body | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | provenance | |
| ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | provenance | |
| ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | provenance | |
| ReflectedXss.js:31:14:31:21 | req.body | ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | provenance | |
| ReflectedXss.js:41:31:41:38 | req.body | ReflectedXss.js:41:12:41:39 | convert ... q.body) | provenance | |
| ReflectedXss.js:63:14:63:21 | req.body | ReflectedXss.js:63:39:63:42 | file | provenance | |
| ReflectedXss.js:63:39:63:42 | file | ReflectedXss.js:64:16:64:19 | file | provenance | |
Expand Down Expand Up @@ -253,6 +255,8 @@ nodes
| ReflectedXss.js:28:12:28:19 | req.body | semmle.label | req.body |
| ReflectedXss.js:29:7:32:4 | mytable | semmle.label | mytable |
| ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | semmle.label | table([ ... ce\\n ]) |
| ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | semmle.label | [\\n [ ... rce\\n ] [1, 1] |
| ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | semmle.label | ['body', req.body] [1] |
| ReflectedXss.js:31:14:31:21 | req.body | semmle.label | req.body |
| ReflectedXss.js:33:12:33:18 | mytable | semmle.label | mytable |
| ReflectedXss.js:40:12:40:19 | req.body | semmle.label | req.body |
Expand Down
87 changes: 87 additions & 0 deletions shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,93 @@ module Make<
}
}

private predicate isNonLocalSummaryComponent(SummaryComponent c) {
c instanceof TArgumentSummaryComponent or
c instanceof TParameterSummaryComponent or
c instanceof TReturnSummaryComponent
}

private predicate isLocalSummaryComponent(SummaryComponent c) {
not isNonLocalSummaryComponent(c)
}

/**
* Holds if `s` is a valid input stack, in the sense that we generate a data flow graph
* that faithfully represents this flow, and lambda-tracking can be expected to track
* lambdas to the relevant callbacks in practice.
*/
private predicate isSupportedInputStack(SummaryComponentStack s) {
// Argument[n].*
s.length() = 1 and
s.head() instanceof TArgumentSummaryComponent
or
// Argument[n].ReturnValue.*
s.length() = 2 and
s.head() instanceof TReturnSummaryComponent and
s.tail().head() instanceof TArgumentSummaryComponent
or
// Argument[n].Parameter[n].Content.*
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hadn't considered the fact that we also support this case, a lambda passed in which has a side-effect on one of it's arguments.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a good thing we do. We rely on this to model the Promise constructor in JavaScript.

s.length() = 3 and
s.head() instanceof TContentSummaryComponent and
s.tail().head() instanceof TParameterSummaryComponent and
s.drop(2).head() instanceof TArgumentSummaryComponent
or
isSupportedInputStack(s.tail()) and
isLocalSummaryComponent(s.head())
}

private predicate isSupportedOutputStack1(SummaryComponentStack s) {
// ReturnValue.*
s.length() = 1 and
s.head() instanceof TReturnSummaryComponent
or
// Argument[n].Content.*
s.length() = 2 and
s.head() instanceof TContentSummaryComponent and
s.tail().head() instanceof TArgumentSummaryComponent
Comment on lines +573 to +576
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We actually support just Argument[n] as well, for example one might model a string builder append method as input = Argument[0] and output = Argument[self] and preservesValue = false`.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call. I pushed a commit that recognises this, please take a look.

or
// Argument[n].Parameter[n].*
s.length() = 2 and
s.head() instanceof TParameterSummaryComponent and
s.tail().head() instanceof TArgumentSummaryComponent
or
isSupportedOutputStack1(s.tail()) and
isLocalSummaryComponent(s.head())
}

/** Like `isSupportedInputStack` but for output stacks. */
private predicate isSupportedOutputStack(SummaryComponentStack s) {
isSupportedOutputStack1(s)
or
// `Argument[n]` not followed by anything. Needs to be outside the recursion.
s.length() = 1 and
s.head() instanceof TArgumentSummaryComponent
}

/**
* Holds if `callable` has an unsupported flow `input -> output`.
*
* `whichOne` indicates if the `input` or `output` contains the unsupported sequence.
*/
predicate unsupportedCallable(
SummarizedCallableImpl callable, SummaryComponentStack input, SummaryComponentStack output,
string whichOne
) {
callable.propagatesFlow(input, output, _, _) and
(
not isSupportedInputStack(input) and whichOne = "input"
or
not isSupportedOutputStack(output) and whichOne = "output"
)
}

/**
* Holds if `callable` has an unsupported flow.
*/
predicate unsupportedCallable(SummarizedCallableImpl callable) {
unsupportedCallable(callable, _, _, _)
}

private predicate summarySpec(string spec) {
exists(SummarizedCallable c |
c.propagatesFlow(spec, _, _, _)
Expand Down
Loading