From ca5f8b0c1d1872d6a9b38d91139a1e6b82659385 Mon Sep 17 00:00:00 2001 From: Asger F Date: Thu, 1 May 2025 15:17:07 +0200 Subject: [PATCH 1/6] JS: Move some code into ModelsAsData.qll --- .../frameworks/data/ModelsAsData.qll | 45 ++++++++++++++++++- .../data/internal/ApiGraphModelsSpecific.qll | 45 ------------------- 2 files changed, 44 insertions(+), 46 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll index 856a61276a0a..9f143a9713ad 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll @@ -45,12 +45,55 @@ private class ThreatModelSourceFromDataExtension extends ThreatModelSource::Rang } } +/** + * Holds if `path` is an input or output spec for a summary with the given `base` node. + */ +pragma[nomagic] +private predicate relevantInputOutputPath(API::InvokeNode base, AccessPath inputOrOutput) { + exists(string type, string input, string output, string path | + ModelOutput::resolvedSummaryBase(type, path, base) and + ModelOutput::relevantSummaryModel(type, path, input, output, _, _) and + inputOrOutput = [input, output] + ) +} + +/** + * Gets the API node for the first `n` tokens of the given input/output path, evaluated relative to `baseNode`. + */ +private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path, int n) { + relevantInputOutputPath(baseNode, path) and + ( + n = 1 and + result = Shared::getSuccessorFromInvoke(baseNode, path.getToken(0)) + or + result = + Shared::getSuccessorFromNode(getNodeFromInputOutputPath(baseNode, path, n - 1), + path.getToken(n - 1)) + ) +} + +/** + * Gets the API node for the given input/output path, evaluated relative to `baseNode`. + */ +private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path) { + result = getNodeFromInputOutputPath(baseNode, path, path.getNumToken()) +} + +private predicate summaryStep(API::Node pred, API::Node succ, string kind) { + exists(string type, string path, API::InvokeNode base, AccessPath input, AccessPath output | + ModelOutput::relevantSummaryModel(type, path, input, output, kind, _) and + ModelOutput::resolvedSummaryBase(type, path, base) and + pred = getNodeFromInputOutputPath(base, input) and + succ = getNodeFromInputOutputPath(base, output) + ) +} + /** * Like `ModelOutput::summaryStep` but with API nodes mapped to data-flow nodes. */ private predicate summaryStepNodes(DataFlow::Node pred, DataFlow::Node succ, string kind) { exists(API::Node predNode, API::Node succNode | - Specific::summaryStep(predNode, succNode, kind) and + summaryStep(predNode, succNode, kind) and pred = predNode.asSink() and succ = succNode.asSource() ) diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll index 1f51af3efda0..473dfe46527b 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -272,51 +272,6 @@ predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPat ) } -/** - * Holds if `path` is an input or output spec for a summary with the given `base` node. - */ -pragma[nomagic] -private predicate relevantInputOutputPath(API::InvokeNode base, AccessPath inputOrOutput) { - exists(string type, string input, string output, string path | - ModelOutput::relevantSummaryModel(type, path, input, output, _, _) and - ModelOutput::resolvedSummaryBase(type, path, base) and - inputOrOutput = [input, output] - ) -} - -/** - * Gets the API node for the first `n` tokens of the given input/output path, evaluated relative to `baseNode`. - */ -private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path, int n) { - relevantInputOutputPath(baseNode, path) and - ( - n = 1 and - result = getSuccessorFromInvoke(baseNode, path.getToken(0)) - or - result = - getSuccessorFromNode(getNodeFromInputOutputPath(baseNode, path, n - 1), path.getToken(n - 1)) - ) -} - -/** - * Gets the API node for the given input/output path, evaluated relative to `baseNode`. - */ -private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path) { - result = getNodeFromInputOutputPath(baseNode, path, path.getNumToken()) -} - -/** - * Holds if a CSV summary contributed the step `pred -> succ` of the given `kind`. - */ -predicate summaryStep(API::Node pred, API::Node succ, string kind) { - exists(string type, string path, API::InvokeNode base, AccessPath input, AccessPath output | - ModelOutput::relevantSummaryModel(type, path, input, output, kind, _) and - ModelOutput::resolvedSummaryBase(type, path, base) and - pred = getNodeFromInputOutputPath(base, input) and - succ = getNodeFromInputOutputPath(base, output) - ) -} - class InvokeNode = API::InvokeNode; /** Gets an `InvokeNode` corresponding to an invocation of `node`. */ From 0fc1ae272e8d08ab90b9d62b277c9772cc79d76e Mon Sep 17 00:00:00 2001 From: Asger F Date: Thu, 1 May 2025 15:22:12 +0200 Subject: [PATCH 2/6] DataFlow: expose from FlowSummaries whether a summary is supported --- .../dataflow/internal/FlowSummaryImpl.qll | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll b/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll index 7fa38a327f16..9e6c4ff8b817 100644 --- a/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll @@ -530,6 +530,85 @@ module Make< } } + private predicate isNonLocalSummaryComponent(SummaryComponent c) { + c instanceof TArgumentSummaryComponent or + c instanceof TParameterSummaryComponent or + c instanceof TReturnSummaryComponent + } + + private predicate isLocalSummaryComponent(SummaryComponent c) { + not isNonLocalSummaryComponent(c) + } + + /** + * Holds if `s` is a valid input stack, in the sense that we generate data flow graph + * that faithfully represents this flow, and lambda-tracking can be expected to track + * lambdas to the relevant callbacks in practice. + */ + private predicate isSupportedInputStack(SummaryComponentStack s) { + // Argument[n].* + s.length() = 1 and + s.head() instanceof TArgumentSummaryComponent + or + // Argument[n].ReturnValue.* + s.length() = 2 and + s.head() instanceof TReturnSummaryComponent and + s.tail().head() instanceof TArgumentSummaryComponent + or + // Argument[n].Parameter[n].Content.* + s.length() = 3 and + s.head() instanceof TContentSummaryComponent and + s.tail().head() instanceof TParameterSummaryComponent and + s.drop(2).head() instanceof TArgumentSummaryComponent + or + isSupportedInputStack(s.tail()) and + isLocalSummaryComponent(s.head()) + } + + /** Like `isSupportedInputStack` but for output stacks. */ + private predicate isSupportedOutputStack(SummaryComponentStack s) { + // ReturnValue.* + s.length() = 1 and + s.head() instanceof TReturnSummaryComponent + or + // Argument[n].Content.* + s.length() = 2 and + s.head() instanceof TContentSummaryComponent and + s.tail().head() instanceof TArgumentSummaryComponent + or + // Argument[n].Parameter[n].* + s.length() = 2 and + s.head() instanceof TParameterSummaryComponent and + s.tail().head() instanceof TArgumentSummaryComponent + or + isSupportedOutputStack(s.tail()) and + isLocalSummaryComponent(s.head()) + } + + /** + * Holds if `callable` has an unsupported flow `input -> output`. + * + * `whichOne` indicates if the `input` or `output` contains the unsupported sequence. + */ + predicate unsupportedCallable( + SummarizedCallableImpl callable, SummaryComponentStack input, SummaryComponentStack output, + string whichOne + ) { + callable.propagatesFlow(input, output, _, _) and + ( + not isSupportedInputStack(input) and whichOne = "input" + or + not isSupportedOutputStack(output) and whichOne = "output" + ) + } + + /** + * Holds if `callable` has an unsupported flow. + */ + predicate unsupportedCallable(SummarizedCallableImpl callable) { + unsupportedCallable(callable, _, _, _) + } + private predicate summarySpec(string spec) { exists(SummarizedCallable c | c.propagatesFlow(spec, _, _, _) From a44bdf3be231dcfc083279ca0e49ddbede93debe Mon Sep 17 00:00:00 2001 From: Asger F Date: Thu, 1 May 2025 15:22:38 +0200 Subject: [PATCH 3/6] JS: Generate summaries from summaryModel, and only generate steps as a fallback --- .../dataflow/internal/FlowSummaryPrivate.qll | 4 ++ .../frameworks/data/ModelsAsData.qll | 40 +++++++++++++++++++ .../TripleDot/underscore.string.js | 8 ++-- 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll b/javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll index 0f25c694f61c..31f5f16bbfb1 100644 --- a/javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll +++ b/javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll @@ -264,3 +264,7 @@ module Stage { cached predicate backref() { optionalStep(_, _, _) } } + +predicate unsupportedCallable = Private::unsupportedCallable/1; + +predicate unsupportedCallable = Private::unsupportedCallable/4; diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll index 9f143a9713ad..0e19e84b666f 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll @@ -19,6 +19,7 @@ private import javascript private import internal.ApiGraphModels as Shared private import internal.ApiGraphModelsSpecific as Specific +private import semmle.javascript.dataflow.internal.FlowSummaryPrivate private import semmle.javascript.endpoints.EndpointNaming as EndpointNaming import Shared::ModelInput as ModelInput import Shared::ModelOutput as ModelOutput @@ -45,12 +46,50 @@ private class ThreatModelSourceFromDataExtension extends ThreatModelSource::Rang } } +private class SummarizedCallableFromModel extends DataFlow::SummarizedCallable { + string type; + string path; + + SummarizedCallableFromModel() { + ModelOutput::relevantSummaryModel(type, path, _, _, _, _) and + this = type + ";" + path + } + + override DataFlow::InvokeNode getACall() { ModelOutput::resolvedSummaryBase(type, path, result) } + + override predicate propagatesFlow( + string input, string output, boolean preservesValue, string model + ) { + exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind, model) | + kind = "value" and + preservesValue = true + or + kind = "taint" and + preservesValue = false + ) + } + + predicate hasTypeAndPath(string type_, string path_) { type = type_ and path = path_ } + + predicate isUnsupportedByFlowSummaries() { unsupportedCallable(this) } +} + +private predicate shouldInduceStepsFromSummary(string type, string path) { + exists(SummarizedCallableFromModel callable | + callable.isUnsupportedByFlowSummaries() and + callable.hasTypeAndPath(type, path) + ) +} + /** * Holds if `path` is an input or output spec for a summary with the given `base` node. */ pragma[nomagic] private predicate relevantInputOutputPath(API::InvokeNode base, AccessPath inputOrOutput) { exists(string type, string input, string output, string path | + // If the summary for 'callable' could not be handled as a flow summary, we need to evaluate + // its inputs and outputs to a set of nodes, so we can generate steps instead. + shouldInduceStepsFromSummary(type, path) and ModelOutput::resolvedSummaryBase(type, path, base) and ModelOutput::relevantSummaryModel(type, path, input, output, _, _) and inputOrOutput = [input, output] @@ -81,6 +120,7 @@ private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPat private predicate summaryStep(API::Node pred, API::Node succ, string kind) { exists(string type, string path, API::InvokeNode base, AccessPath input, AccessPath output | + shouldInduceStepsFromSummary(type, path) and ModelOutput::relevantSummaryModel(type, path, input, output, kind, _) and ModelOutput::resolvedSummaryBase(type, path, base) and pred = getNodeFromInputOutputPath(base, input) and diff --git a/javascript/ql/test/library-tests/TripleDot/underscore.string.js b/javascript/ql/test/library-tests/TripleDot/underscore.string.js index 07f186343ce2..dd904cd78c3b 100644 --- a/javascript/ql/test/library-tests/TripleDot/underscore.string.js +++ b/javascript/ql/test/library-tests/TripleDot/underscore.string.js @@ -39,7 +39,7 @@ function strToStr() { } function strToArray() { - sink(s.chop(source("s1"), 3)); // $ MISSING: hasTaintFlow=s1 + sink(s.chop(source("s1"), 3)); // $ hasTaintFlow=s1 sink(s.chars(source("s2"))[0]); // $ hasTaintFlow=s2 sink(s.words(source("s3"))[0]); // $ hasTaintFlow=s3 sink(s.lines(source("s7"))[0]); // $ hasTaintFlow=s7 @@ -97,7 +97,7 @@ function multiSource() { function chaining() { sink(s(source("s1")) - .slugify().capitalize().decapitalize().clean().cleanDiacritics() + .slugify().capitalize().decapitalize().clean().cleanDiacritics() .swapCase().escapeHTML().unescapeHTML().wrap().dedent() .reverse().pred().succ().titleize().camelize().classify() .underscored().dasherize().humanize().trim().ltrim().rtrim() @@ -119,8 +119,8 @@ function chaining() { .q(source("s17")).ljust(10, source("s18")) .rjust(10, source("s19"))); // $ hasTaintFlow=s16 hasTaintFlow=s17 hasTaintFlow=s18 hasTaintFlow=s19 - sink(s(source("s20")).tap(function(value) { - return value + source("s21"); + sink(s(source("s20")).tap(function(value) { + return value + source("s21"); }).value()); // $ hasTaintFlow=s20 hasTaintFlow=s21 } From 16fc8c3d9e46143d2872dcb91b7aa0bd01f5bbef Mon Sep 17 00:00:00 2001 From: Asger F Date: Fri, 2 May 2025 11:09:19 +0200 Subject: [PATCH 4/6] JS: Benign test updates --- .../library-tests/frameworks/data/test.expected | 17 +++++++++++++++++ .../CWE-079/ReflectedXss/ReflectedXss.expected | 6 +++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/javascript/ql/test/library-tests/frameworks/data/test.expected b/javascript/ql/test/library-tests/frameworks/data/test.expected index 0bc1b6b6ee07..875b0189d619 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.expected +++ b/javascript/ql/test/library-tests/frameworks/data/test.expected @@ -1,4 +1,21 @@ legacyDataFlowDifference +| test.js:5:30:5:37 | source() | test.js:5:8:5:38 | testlib ... urce()) | only flow with NEW data flow library | +| test.js:6:22:6:29 | source() | test.js:6:8:6:30 | preserv ... urce()) | only flow with NEW data flow library | +| test.js:7:41:7:48 | source() | test.js:7:8:7:49 | require ... urce()) | only flow with NEW data flow library | +| test.js:11:38:11:45 | source() | test.js:11:8:11:55 | testlib ... , 1, 1) | only flow with NEW data flow library | +| test.js:13:44:13:51 | source() | test.js:13:8:13:55 | testlib ... e(), 1) | only flow with NEW data flow library | +| test.js:17:47:17:54 | source() | test.js:17:8:17:61 | testlib ... , 1, 1) | only flow with NEW data flow library | +| test.js:18:50:18:57 | source() | test.js:18:8:18:61 | testlib ... e(), 1) | only flow with NEW data flow library | +| test.js:19:53:19:60 | source() | test.js:19:8:19:61 | testlib ... urce()) | only flow with NEW data flow library | +| test.js:21:34:21:41 | source() | test.js:21:8:21:51 | testlib ... , 1, 1) | only flow with NEW data flow library | +| test.js:22:37:22:44 | source() | test.js:22:8:22:51 | testlib ... , 1, 1) | only flow with NEW data flow library | +| test.js:23:40:23:47 | source() | test.js:23:8:23:51 | testlib ... e(), 1) | only flow with NEW data flow library | +| test.js:24:43:24:50 | source() | test.js:24:8:24:51 | testlib ... urce()) | only flow with NEW data flow library | +| test.js:31:29:31:36 | source() | test.js:32:10:32:10 | y | only flow with NEW data flow library | +| test.js:37:29:37:36 | source() | test.js:38:10:38:10 | y | only flow with NEW data flow library | +| test.js:43:29:43:36 | source() | test.js:44:10:44:10 | y | only flow with NEW data flow library | +| test.js:47:33:47:40 | source() | test.js:49:10:49:13 | this | only flow with NEW data flow library | +| test.js:102:16:102:34 | testlib.getSource() | test.js:104:8:104:24 | source.continue() | only flow with NEW data flow library | consistencyIssue taintFlow | guardedRouteHandler.js:6:10:6:28 | req.injectedReqData | guardedRouteHandler.js:6:10:6:28 | req.injectedReqData | diff --git a/javascript/ql/test/query-tests/Security/CWE-079/ReflectedXss/ReflectedXss.expected b/javascript/ql/test/query-tests/Security/CWE-079/ReflectedXss/ReflectedXss.expected index 5681ae99aa85..09874ecef104 100644 --- a/javascript/ql/test/query-tests/Security/CWE-079/ReflectedXss/ReflectedXss.expected +++ b/javascript/ql/test/query-tests/Security/CWE-079/ReflectedXss/ReflectedXss.expected @@ -78,7 +78,9 @@ edges | ReflectedXss.js:22:19:22:26 | req.body | ReflectedXss.js:22:12:22:27 | marked(req.body) | provenance | | | ReflectedXss.js:29:7:32:4 | mytable | ReflectedXss.js:33:12:33:18 | mytable | provenance | | | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | ReflectedXss.js:29:7:32:4 | mytable | provenance | | -| ReflectedXss.js:31:14:31:21 | req.body | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | provenance | | +| ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | provenance | | +| ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | provenance | | +| ReflectedXss.js:31:14:31:21 | req.body | ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | provenance | | | ReflectedXss.js:41:31:41:38 | req.body | ReflectedXss.js:41:12:41:39 | convert ... q.body) | provenance | | | ReflectedXss.js:63:14:63:21 | req.body | ReflectedXss.js:63:39:63:42 | file | provenance | | | ReflectedXss.js:63:39:63:42 | file | ReflectedXss.js:64:16:64:19 | file | provenance | | @@ -253,6 +255,8 @@ nodes | ReflectedXss.js:28:12:28:19 | req.body | semmle.label | req.body | | ReflectedXss.js:29:7:32:4 | mytable | semmle.label | mytable | | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | semmle.label | table([ ... ce\\n ]) | +| ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | semmle.label | [\\n [ ... rce\\n ] [1, 1] | +| ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | semmle.label | ['body', req.body] [1] | | ReflectedXss.js:31:14:31:21 | req.body | semmle.label | req.body | | ReflectedXss.js:33:12:33:18 | mytable | semmle.label | mytable | | ReflectedXss.js:40:12:40:19 | req.body | semmle.label | req.body | From 8fab235d666c3de13897f88ec5b4464d120e9a42 Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 12 May 2025 09:41:49 +0200 Subject: [PATCH 5/6] DataFlow: Fix typo in a comment --- shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll b/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll index 9e6c4ff8b817..794fd340bcf8 100644 --- a/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll @@ -541,7 +541,7 @@ module Make< } /** - * Holds if `s` is a valid input stack, in the sense that we generate data flow graph + * Holds if `s` is a valid input stack, in the sense that we generate a data flow graph * that faithfully represents this flow, and lambda-tracking can be expected to track * lambdas to the relevant callbacks in practice. */ From 891b2b8335cd2b785b987ded03d4ae37378410e2 Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 12 May 2025 09:42:45 +0200 Subject: [PATCH 6/6] DataFlow: Support a bare Argument[n] as a valid output stack --- .../codeql/dataflow/internal/FlowSummaryImpl.qll | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll b/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll index 794fd340bcf8..95d29153f47e 100644 --- a/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll @@ -565,8 +565,7 @@ module Make< isLocalSummaryComponent(s.head()) } - /** Like `isSupportedInputStack` but for output stacks. */ - private predicate isSupportedOutputStack(SummaryComponentStack s) { + private predicate isSupportedOutputStack1(SummaryComponentStack s) { // ReturnValue.* s.length() = 1 and s.head() instanceof TReturnSummaryComponent @@ -581,10 +580,19 @@ module Make< s.head() instanceof TParameterSummaryComponent and s.tail().head() instanceof TArgumentSummaryComponent or - isSupportedOutputStack(s.tail()) and + isSupportedOutputStack1(s.tail()) and isLocalSummaryComponent(s.head()) } + /** Like `isSupportedInputStack` but for output stacks. */ + private predicate isSupportedOutputStack(SummaryComponentStack s) { + isSupportedOutputStack1(s) + or + // `Argument[n]` not followed by anything. Needs to be outside the recursion. + s.length() = 1 and + s.head() instanceof TArgumentSummaryComponent + } + /** * Holds if `callable` has an unsupported flow `input -> output`. *