diff --git a/javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll b/javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll index 0f25c694f61c..31f5f16bbfb1 100644 --- a/javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll +++ b/javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll @@ -264,3 +264,7 @@ module Stage { cached predicate backref() { optionalStep(_, _, _) } } + +predicate unsupportedCallable = Private::unsupportedCallable/1; + +predicate unsupportedCallable = Private::unsupportedCallable/4; diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll index 856a61276a0a..0e19e84b666f 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/ModelsAsData.qll @@ -19,6 +19,7 @@ private import javascript private import internal.ApiGraphModels as Shared private import internal.ApiGraphModelsSpecific as Specific +private import semmle.javascript.dataflow.internal.FlowSummaryPrivate private import semmle.javascript.endpoints.EndpointNaming as EndpointNaming import Shared::ModelInput as ModelInput import Shared::ModelOutput as ModelOutput @@ -45,12 +46,94 @@ private class ThreatModelSourceFromDataExtension extends ThreatModelSource::Rang } } +private class SummarizedCallableFromModel extends DataFlow::SummarizedCallable { + string type; + string path; + + SummarizedCallableFromModel() { + ModelOutput::relevantSummaryModel(type, path, _, _, _, _) and + this = type + ";" + path + } + + override DataFlow::InvokeNode getACall() { ModelOutput::resolvedSummaryBase(type, path, result) } + + override predicate propagatesFlow( + string input, string output, boolean preservesValue, string model + ) { + exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind, model) | + kind = "value" and + preservesValue = true + or + kind = "taint" and + preservesValue = false + ) + } + + predicate hasTypeAndPath(string type_, string path_) { type = type_ and path = path_ } + + predicate isUnsupportedByFlowSummaries() { unsupportedCallable(this) } +} + +private predicate shouldInduceStepsFromSummary(string type, string path) { + exists(SummarizedCallableFromModel callable | + callable.isUnsupportedByFlowSummaries() and + callable.hasTypeAndPath(type, path) + ) +} + +/** + * Holds if `path` is an input or output spec for a summary with the given `base` node. + */ +pragma[nomagic] +private predicate relevantInputOutputPath(API::InvokeNode base, AccessPath inputOrOutput) { + exists(string type, string input, string output, string path | + // If the summary for 'callable' could not be handled as a flow summary, we need to evaluate + // its inputs and outputs to a set of nodes, so we can generate steps instead. + shouldInduceStepsFromSummary(type, path) and + ModelOutput::resolvedSummaryBase(type, path, base) and + ModelOutput::relevantSummaryModel(type, path, input, output, _, _) and + inputOrOutput = [input, output] + ) +} + +/** + * Gets the API node for the first `n` tokens of the given input/output path, evaluated relative to `baseNode`. + */ +private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path, int n) { + relevantInputOutputPath(baseNode, path) and + ( + n = 1 and + result = Shared::getSuccessorFromInvoke(baseNode, path.getToken(0)) + or + result = + Shared::getSuccessorFromNode(getNodeFromInputOutputPath(baseNode, path, n - 1), + path.getToken(n - 1)) + ) +} + +/** + * Gets the API node for the given input/output path, evaluated relative to `baseNode`. + */ +private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path) { + result = getNodeFromInputOutputPath(baseNode, path, path.getNumToken()) +} + +private predicate summaryStep(API::Node pred, API::Node succ, string kind) { + exists(string type, string path, API::InvokeNode base, AccessPath input, AccessPath output | + shouldInduceStepsFromSummary(type, path) and + ModelOutput::relevantSummaryModel(type, path, input, output, kind, _) and + ModelOutput::resolvedSummaryBase(type, path, base) and + pred = getNodeFromInputOutputPath(base, input) and + succ = getNodeFromInputOutputPath(base, output) + ) +} + /** * Like `ModelOutput::summaryStep` but with API nodes mapped to data-flow nodes. */ private predicate summaryStepNodes(DataFlow::Node pred, DataFlow::Node succ, string kind) { exists(API::Node predNode, API::Node succNode | - Specific::summaryStep(predNode, succNode, kind) and + summaryStep(predNode, succNode, kind) and pred = predNode.asSink() and succ = succNode.asSource() ) diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll index 1f51af3efda0..473dfe46527b 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -272,51 +272,6 @@ predicate invocationMatchesExtraCallSiteFilter(API::InvokeNode invoke, AccessPat ) } -/** - * Holds if `path` is an input or output spec for a summary with the given `base` node. - */ -pragma[nomagic] -private predicate relevantInputOutputPath(API::InvokeNode base, AccessPath inputOrOutput) { - exists(string type, string input, string output, string path | - ModelOutput::relevantSummaryModel(type, path, input, output, _, _) and - ModelOutput::resolvedSummaryBase(type, path, base) and - inputOrOutput = [input, output] - ) -} - -/** - * Gets the API node for the first `n` tokens of the given input/output path, evaluated relative to `baseNode`. - */ -private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path, int n) { - relevantInputOutputPath(baseNode, path) and - ( - n = 1 and - result = getSuccessorFromInvoke(baseNode, path.getToken(0)) - or - result = - getSuccessorFromNode(getNodeFromInputOutputPath(baseNode, path, n - 1), path.getToken(n - 1)) - ) -} - -/** - * Gets the API node for the given input/output path, evaluated relative to `baseNode`. - */ -private API::Node getNodeFromInputOutputPath(API::InvokeNode baseNode, AccessPath path) { - result = getNodeFromInputOutputPath(baseNode, path, path.getNumToken()) -} - -/** - * Holds if a CSV summary contributed the step `pred -> succ` of the given `kind`. - */ -predicate summaryStep(API::Node pred, API::Node succ, string kind) { - exists(string type, string path, API::InvokeNode base, AccessPath input, AccessPath output | - ModelOutput::relevantSummaryModel(type, path, input, output, kind, _) and - ModelOutput::resolvedSummaryBase(type, path, base) and - pred = getNodeFromInputOutputPath(base, input) and - succ = getNodeFromInputOutputPath(base, output) - ) -} - class InvokeNode = API::InvokeNode; /** Gets an `InvokeNode` corresponding to an invocation of `node`. */ diff --git a/javascript/ql/test/library-tests/TripleDot/underscore.string.js b/javascript/ql/test/library-tests/TripleDot/underscore.string.js index 07f186343ce2..dd904cd78c3b 100644 --- a/javascript/ql/test/library-tests/TripleDot/underscore.string.js +++ b/javascript/ql/test/library-tests/TripleDot/underscore.string.js @@ -39,7 +39,7 @@ function strToStr() { } function strToArray() { - sink(s.chop(source("s1"), 3)); // $ MISSING: hasTaintFlow=s1 + sink(s.chop(source("s1"), 3)); // $ hasTaintFlow=s1 sink(s.chars(source("s2"))[0]); // $ hasTaintFlow=s2 sink(s.words(source("s3"))[0]); // $ hasTaintFlow=s3 sink(s.lines(source("s7"))[0]); // $ hasTaintFlow=s7 @@ -97,7 +97,7 @@ function multiSource() { function chaining() { sink(s(source("s1")) - .slugify().capitalize().decapitalize().clean().cleanDiacritics() + .slugify().capitalize().decapitalize().clean().cleanDiacritics() .swapCase().escapeHTML().unescapeHTML().wrap().dedent() .reverse().pred().succ().titleize().camelize().classify() .underscored().dasherize().humanize().trim().ltrim().rtrim() @@ -119,8 +119,8 @@ function chaining() { .q(source("s17")).ljust(10, source("s18")) .rjust(10, source("s19"))); // $ hasTaintFlow=s16 hasTaintFlow=s17 hasTaintFlow=s18 hasTaintFlow=s19 - sink(s(source("s20")).tap(function(value) { - return value + source("s21"); + sink(s(source("s20")).tap(function(value) { + return value + source("s21"); }).value()); // $ hasTaintFlow=s20 hasTaintFlow=s21 } diff --git a/javascript/ql/test/library-tests/frameworks/data/test.expected b/javascript/ql/test/library-tests/frameworks/data/test.expected index 0bc1b6b6ee07..875b0189d619 100644 --- a/javascript/ql/test/library-tests/frameworks/data/test.expected +++ b/javascript/ql/test/library-tests/frameworks/data/test.expected @@ -1,4 +1,21 @@ legacyDataFlowDifference +| test.js:5:30:5:37 | source() | test.js:5:8:5:38 | testlib ... urce()) | only flow with NEW data flow library | +| test.js:6:22:6:29 | source() | test.js:6:8:6:30 | preserv ... urce()) | only flow with NEW data flow library | +| test.js:7:41:7:48 | source() | test.js:7:8:7:49 | require ... urce()) | only flow with NEW data flow library | +| test.js:11:38:11:45 | source() | test.js:11:8:11:55 | testlib ... , 1, 1) | only flow with NEW data flow library | +| test.js:13:44:13:51 | source() | test.js:13:8:13:55 | testlib ... e(), 1) | only flow with NEW data flow library | +| test.js:17:47:17:54 | source() | test.js:17:8:17:61 | testlib ... , 1, 1) | only flow with NEW data flow library | +| test.js:18:50:18:57 | source() | test.js:18:8:18:61 | testlib ... e(), 1) | only flow with NEW data flow library | +| test.js:19:53:19:60 | source() | test.js:19:8:19:61 | testlib ... urce()) | only flow with NEW data flow library | +| test.js:21:34:21:41 | source() | test.js:21:8:21:51 | testlib ... , 1, 1) | only flow with NEW data flow library | +| test.js:22:37:22:44 | source() | test.js:22:8:22:51 | testlib ... , 1, 1) | only flow with NEW data flow library | +| test.js:23:40:23:47 | source() | test.js:23:8:23:51 | testlib ... e(), 1) | only flow with NEW data flow library | +| test.js:24:43:24:50 | source() | test.js:24:8:24:51 | testlib ... urce()) | only flow with NEW data flow library | +| test.js:31:29:31:36 | source() | test.js:32:10:32:10 | y | only flow with NEW data flow library | +| test.js:37:29:37:36 | source() | test.js:38:10:38:10 | y | only flow with NEW data flow library | +| test.js:43:29:43:36 | source() | test.js:44:10:44:10 | y | only flow with NEW data flow library | +| test.js:47:33:47:40 | source() | test.js:49:10:49:13 | this | only flow with NEW data flow library | +| test.js:102:16:102:34 | testlib.getSource() | test.js:104:8:104:24 | source.continue() | only flow with NEW data flow library | consistencyIssue taintFlow | guardedRouteHandler.js:6:10:6:28 | req.injectedReqData | guardedRouteHandler.js:6:10:6:28 | req.injectedReqData | diff --git a/javascript/ql/test/query-tests/Security/CWE-079/ReflectedXss/ReflectedXss.expected b/javascript/ql/test/query-tests/Security/CWE-079/ReflectedXss/ReflectedXss.expected index 5681ae99aa85..09874ecef104 100644 --- a/javascript/ql/test/query-tests/Security/CWE-079/ReflectedXss/ReflectedXss.expected +++ b/javascript/ql/test/query-tests/Security/CWE-079/ReflectedXss/ReflectedXss.expected @@ -78,7 +78,9 @@ edges | ReflectedXss.js:22:19:22:26 | req.body | ReflectedXss.js:22:12:22:27 | marked(req.body) | provenance | | | ReflectedXss.js:29:7:32:4 | mytable | ReflectedXss.js:33:12:33:18 | mytable | provenance | | | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | ReflectedXss.js:29:7:32:4 | mytable | provenance | | -| ReflectedXss.js:31:14:31:21 | req.body | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | provenance | | +| ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | provenance | | +| ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | provenance | | +| ReflectedXss.js:31:14:31:21 | req.body | ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | provenance | | | ReflectedXss.js:41:31:41:38 | req.body | ReflectedXss.js:41:12:41:39 | convert ... q.body) | provenance | | | ReflectedXss.js:63:14:63:21 | req.body | ReflectedXss.js:63:39:63:42 | file | provenance | | | ReflectedXss.js:63:39:63:42 | file | ReflectedXss.js:64:16:64:19 | file | provenance | | @@ -253,6 +255,8 @@ nodes | ReflectedXss.js:28:12:28:19 | req.body | semmle.label | req.body | | ReflectedXss.js:29:7:32:4 | mytable | semmle.label | mytable | | ReflectedXss.js:29:17:32:4 | table([ ... ce\\n ]) | semmle.label | table([ ... ce\\n ]) | +| ReflectedXss.js:29:23:32:3 | [\\n [ ... rce\\n ] [1, 1] | semmle.label | [\\n [ ... rce\\n ] [1, 1] | +| ReflectedXss.js:31:5:31:22 | ['body', req.body] [1] | semmle.label | ['body', req.body] [1] | | ReflectedXss.js:31:14:31:21 | req.body | semmle.label | req.body | | ReflectedXss.js:33:12:33:18 | mytable | semmle.label | mytable | | ReflectedXss.js:40:12:40:19 | req.body | semmle.label | req.body | diff --git a/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll b/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll index 7fa38a327f16..95d29153f47e 100644 --- a/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/FlowSummaryImpl.qll @@ -530,6 +530,93 @@ module Make< } } + private predicate isNonLocalSummaryComponent(SummaryComponent c) { + c instanceof TArgumentSummaryComponent or + c instanceof TParameterSummaryComponent or + c instanceof TReturnSummaryComponent + } + + private predicate isLocalSummaryComponent(SummaryComponent c) { + not isNonLocalSummaryComponent(c) + } + + /** + * Holds if `s` is a valid input stack, in the sense that we generate a data flow graph + * that faithfully represents this flow, and lambda-tracking can be expected to track + * lambdas to the relevant callbacks in practice. + */ + private predicate isSupportedInputStack(SummaryComponentStack s) { + // Argument[n].* + s.length() = 1 and + s.head() instanceof TArgumentSummaryComponent + or + // Argument[n].ReturnValue.* + s.length() = 2 and + s.head() instanceof TReturnSummaryComponent and + s.tail().head() instanceof TArgumentSummaryComponent + or + // Argument[n].Parameter[n].Content.* + s.length() = 3 and + s.head() instanceof TContentSummaryComponent and + s.tail().head() instanceof TParameterSummaryComponent and + s.drop(2).head() instanceof TArgumentSummaryComponent + or + isSupportedInputStack(s.tail()) and + isLocalSummaryComponent(s.head()) + } + + private predicate isSupportedOutputStack1(SummaryComponentStack s) { + // ReturnValue.* + s.length() = 1 and + s.head() instanceof TReturnSummaryComponent + or + // Argument[n].Content.* + s.length() = 2 and + s.head() instanceof TContentSummaryComponent and + s.tail().head() instanceof TArgumentSummaryComponent + or + // Argument[n].Parameter[n].* + s.length() = 2 and + s.head() instanceof TParameterSummaryComponent and + s.tail().head() instanceof TArgumentSummaryComponent + or + isSupportedOutputStack1(s.tail()) and + isLocalSummaryComponent(s.head()) + } + + /** Like `isSupportedInputStack` but for output stacks. */ + private predicate isSupportedOutputStack(SummaryComponentStack s) { + isSupportedOutputStack1(s) + or + // `Argument[n]` not followed by anything. Needs to be outside the recursion. + s.length() = 1 and + s.head() instanceof TArgumentSummaryComponent + } + + /** + * Holds if `callable` has an unsupported flow `input -> output`. + * + * `whichOne` indicates if the `input` or `output` contains the unsupported sequence. + */ + predicate unsupportedCallable( + SummarizedCallableImpl callable, SummaryComponentStack input, SummaryComponentStack output, + string whichOne + ) { + callable.propagatesFlow(input, output, _, _) and + ( + not isSupportedInputStack(input) and whichOne = "input" + or + not isSupportedOutputStack(output) and whichOne = "output" + ) + } + + /** + * Holds if `callable` has an unsupported flow. + */ + predicate unsupportedCallable(SummarizedCallableImpl callable) { + unsupportedCallable(callable, _, _, _) + } + private predicate summarySpec(string spec) { exists(SummarizedCallable c | c.propagatesFlow(spec, _, _, _)