From 90a1534bb7154e0f875c13e630d21bc2806ca95b Mon Sep 17 00:00:00 2001 From: Tobias Specht Date: Wed, 20 Nov 2024 23:25:12 +0100 Subject: [PATCH 01/10] Fix overload operator-> for CallExpression (#1842) * Handle overloaded operator-> for CallExpression * Add test case for handle overloaded operator-> for CallExpression --- .../aisec/cpg/passes/SymbolResolver.kt | 53 ++++++++++++------- .../cpg/frontends/cxx/CXXDeclarationTest.kt | 44 +++++++++++++++ .../cxx/operators/call_expression.cpp | 27 ++++++++++ 3 files changed, 105 insertions(+), 19 deletions(-) create mode 100644 cpg-language-cxx/src/test/resources/cxx/operators/call_expression.cpp diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt index 10c18881a7..42722b74f7 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt @@ -322,6 +322,35 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } + /** + * This function resolves a possible overloaded -> (arrow) operator, for languages which support + * operator overloading. The implicit call to the overloaded operator function is inserted as + * base for the MemberExpression. This can be the case for a [MemberExpression] or + * [MemberCallExpression] + */ + private fun resolveOverloadedArrowOperator(ex: Expression): Type? { + var type: Type? = null + if ( + ex.language is HasOperatorOverloading && + ex is MemberExpression && + ex.operatorCode == "->" && + ex.base.type !is PointerType + ) { + val result = resolveOperator(ex) + val op = result?.bestViable?.singleOrNull() + if (result?.success == SUCCESSFUL && op is OperatorDeclaration) { + type = op.returnTypes.singleOrNull()?.root ?: unknownType() + + // We need to insert a new operator call expression in between + val call = operatorCallFromDeclaration(op, ex) + + // Make the call our new base + ex.base = call + } + } + return type + } + protected fun resolveMember( containingClass: ObjectType, reference: Reference @@ -334,25 +363,8 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { var member: ValueDeclaration? = null var type: Type = containingClass - // Check for a possible overloaded operator-> - if ( - reference.language is HasOperatorOverloading && - reference is MemberExpression && - reference.operatorCode == "->" && - reference.base.type !is PointerType - ) { - val result = resolveOperator(reference) - val op = result?.bestViable?.singleOrNull() - if (result?.success == SUCCESSFUL && op is OperatorDeclaration) { - type = op.returnTypes.singleOrNull()?.root ?: unknownType() - - // We need to insert a new operator call expression in between - val call = operatorCallFromDeclaration(op, reference) - - // Make the call our new base - reference.base = call - } - } + // Handle a possible overloaded operator-> + type = resolveOverloadedArrowOperator(reference) ?: type val record = type.recordDeclaration if (record != null) { @@ -398,6 +410,9 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { val callee = call.callee val language = call.language + // Handle a possible overloaded operator-> + resolveOverloadedArrowOperator(callee) + // Dynamic function invokes (such as function pointers) are handled by extra pass, so we are // not resolving them here. // diff --git a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXDeclarationTest.kt b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXDeclarationTest.kt index b874b2a2ea..b0ade57095 100644 --- a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXDeclarationTest.kt +++ b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXDeclarationTest.kt @@ -306,4 +306,48 @@ class CXXDeclarationTest { assertEquals(p, opCall.base) assertInvokes(opCall, op) } + + @Test + fun testCallExpressionOperator() { + val file = File("src/test/resources/cxx/operators/call_expression.cpp") + val result = + analyze(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + } + assertNotNull(result) + + var proxy = result.records["Proxy"] + assertNotNull(proxy) + + var funcBar = proxy.functions["bar"] + assertNotNull(funcBar) + + var op = proxy.operators["operator->"] + assertNotNull(op) + + var data = result.records["Data"] + assertNotNull(data) + + var funcFoo = data.functions["foo"] + assertNotNull(funcFoo) + + val p = result.refs["p"] + assertNotNull(p) + assertEquals(proxy.toType(), p.type) + + var funcFooRef = result.memberExpressions["foo"] + assertNotNull(funcFooRef) + assertRefersTo(funcFooRef, funcFoo) + + var funcBarRef = result.memberExpressions["bar"] + assertNotNull(funcBarRef) + assertRefersTo(funcBarRef, funcBar) + + // we should now have an implicit call to our operator in-between "p" and "foo" + val opCall = funcFooRef.base + assertNotNull(opCall) + assertIs(opCall) + assertEquals(p, opCall.base) + assertInvokes(opCall, op) + } } diff --git a/cpg-language-cxx/src/test/resources/cxx/operators/call_expression.cpp b/cpg-language-cxx/src/test/resources/cxx/operators/call_expression.cpp new file mode 100644 index 0000000000..232d1df32a --- /dev/null +++ b/cpg-language-cxx/src/test/resources/cxx/operators/call_expression.cpp @@ -0,0 +1,27 @@ +struct Data { + int foo() { + return 1; + } +}; + +struct Proxy { + Data *data; + Proxy() { + data = new Data; + } + Data* operator->() { + return data; + } + int bar() { + return 1; + } +}; + +int main() { + Proxy p; + + int i = p->foo(); + int j = p.bar(); + return 1; +} + From 37987af069c74dbf9b9bd8d2083b8c15dd421d71 Mon Sep 17 00:00:00 2001 From: KuechA <31155350+KuechA@users.noreply.github.com> Date: Thu, 21 Nov 2024 14:57:28 +0100 Subject: [PATCH 02/10] Add more extensions to follow edges until the end (#1853) * Add more extensions to follow edges * Fix analysis --- .../de/fraunhofer/aisec/cpg/query/Query.kt | 4 +- .../fraunhofer/aisec/cpg/graph/Extensions.kt | 195 +++++++++++++++++- 2 files changed, 188 insertions(+), 11 deletions(-) diff --git a/cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/query/Query.kt b/cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/query/Query.kt index 0f4fd3a68e..12598f020c 100644 --- a/cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/query/Query.kt +++ b/cpg-analysis/src/main/kotlin/de/fraunhofer/aisec/cpg/query/Query.kt @@ -249,7 +249,7 @@ fun executionPath(from: Node, to: Node): QueryTree { * requirement specified in [predicate]. */ fun executionPath(from: Node, predicate: (Node) -> Boolean): QueryTree { - val evalRes = from.followNextEOGEdgesUntilHit(predicate) + val evalRes = from.followNextEOGEdgesUntilHit(predicate = predicate) val allPaths = evalRes.fulfilled.map { QueryTree(it) }.toMutableList() allPaths.addAll(evalRes.failed.map { QueryTree(it) }) return QueryTree( @@ -264,7 +264,7 @@ fun executionPath(from: Node, predicate: (Node) -> Boolean): QueryTree * requirement specified in [predicate]. */ fun executionPathBackwards(to: Node, predicate: (Node) -> Boolean): QueryTree { - val evalRes = to.followPrevEOGEdgesUntilHit(predicate) + val evalRes = to.followPrevEOGEdgesUntilHit(predicate = predicate) val allPaths = evalRes.fulfilled.map { QueryTree(it) }.toMutableList() allPaths.addAll(evalRes.failed.map { QueryTree(it) }) return QueryTree( diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt index a5400e768e..86d04ce274 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/Extensions.kt @@ -199,15 +199,117 @@ class FulfilledAndFailedPaths(val fulfilled: List>, val failed: List< * Hence, if "fulfilled" is a non-empty list, a data flow from [this] to such a node is **possible * but not mandatory**. If the list "failed" is empty, the data flow is mandatory. */ -fun Node.followPrevFullDFGEdgesUntilHit(predicate: (Node) -> Boolean): FulfilledAndFailedPaths { +fun Node.followPrevFullDFGEdgesUntilHit( + collectFailedPaths: Boolean = true, + findAllPossiblePaths: Boolean = true, + predicate: (Node) -> Boolean +): FulfilledAndFailedPaths { return followXUntilHit( x = { currentNode -> currentNode.prevFullDFG }, - collectFailedPaths = true, - findAllPossiblePaths = true, + collectFailedPaths = collectFailedPaths, + findAllPossiblePaths = findAllPossiblePaths, predicate = predicate ) } +/** + * Iterates the prev full DFG edges until there are no more edges available (or until a loop is + * detected). Returns a list of possible paths (each path is represented by a list of nodes). + */ +fun Node.collectAllPrevFullDFGPaths(): List> { + // We make everything fail to reach the end of the DFG. Then, we use the stuff collected in the + // failed paths (everything) + return this.followPrevFullDFGEdgesUntilHit( + collectFailedPaths = true, + findAllPossiblePaths = true + ) { + false + } + .failed +} + +/** + * Iterates the next full DFG edges until there are no more edges available (or until a loop is + * detected). Returns a list of possible paths (each path is represented by a list of nodes). + */ +fun Node.collectAllNextFullDFGPaths(): List> { + // We make everything fail to reach the end of the CDG. Then, we use the stuff collected in the + // failed paths (everything) + return this.followNextFullDFGEdgesUntilHit( + collectFailedPaths = true, + findAllPossiblePaths = true, + ) { + false + } + .failed +} + +/** + * Iterates the next EOG edges until there are no more edges available (or until a loop is + * detected). Returns a list of possible paths (each path is represented by a list of nodes). + */ +fun Node.collectAllNextEOGPaths(): List> { + // We make everything fail to reach the end of the CDG. Then, we use the stuff collected in the + // failed paths (everything) + return this.followNextEOGEdgesUntilHit( + collectFailedPaths = true, + findAllPossiblePaths = true, + ) { + false + } + .failed +} + +/** + * Iterates the prev PDG edges until there are no more edges available (or until a loop is + * detected). Returns a list of possible paths (each path is represented by a list of nodes). + */ +fun Node.collectAllPrevEOGPaths(interproceduralAnalysis: Boolean): List> { + // We make everything fail to reach the end of the CDG. Then, we use the stuff collected in the + // failed paths (everything) + return this.followPrevEOGEdgesUntilHit(collectFailedPaths = true, findAllPossiblePaths = true) { + false + } + .failed +} + +/** + * Iterates the next PDG edges until there are no more edges available (or until a loop is + * detected). Returns a list of possible paths (each path is represented by a list of nodes). + */ +fun Node.collectAllNextPDGGPaths(): List> { + // We make everything fail to reach the end of the CDG. Then, we use the stuff collected in the + // failed paths (everything) + return this.followNextPDGUntilHit( + collectFailedPaths = true, + findAllPossiblePaths = true, + ) { + false + } + .failed +} + +/** + * Iterates the prev PDG edges until there are no more edges available (or until a loop is + * detected). Returns a list of possible paths (each path is represented by a list of nodes). + */ +fun Node.collectAllPrevPDGPaths(interproceduralAnalysis: Boolean): List> { + // We make everything fail to reach the end of the CDG. Then, we use the stuff collected in the + // failed paths (everything) + return this.followPrevPDGUntilHit( + collectFailedPaths = true, + findAllPossiblePaths = true, + interproceduralAnalysis = interproceduralAnalysis + ) { + false + } + .failed +} + +/** + * Iterates the prev CDG edges until there are no more edges available (or until a loop is + * detected). Returns a list of possible paths (each path is represented by a list of nodes). + */ fun Node.collectAllPrevCDGPaths(interproceduralAnalysis: Boolean): List> { // We make everything fail to reach the end of the CDG. Then, we use the stuff collected in the // failed paths (everything) @@ -221,6 +323,10 @@ fun Node.collectAllPrevCDGPaths(interproceduralAnalysis: Boolean): List> { // We make everything fail to reach the end of the CDG. Then, we use the stuff collected in the // failed paths (everything) @@ -234,6 +340,35 @@ fun Node.collectAllNextCDGPaths(interproceduralAnalysis: Boolean): List Boolean +): FulfilledAndFailedPaths { + return followXUntilHit( + x = { currentNode -> + val nextNodes = currentNode.nextPDG.toMutableList() + if (interproceduralAnalysis) { + nextNodes.addAll((currentNode as? CallExpression)?.calls ?: listOf()) + } + nextNodes + }, + collectFailedPaths = collectFailedPaths, + findAllPossiblePaths = findAllPossiblePaths, + predicate = predicate + ) +} + /** * Returns an instance of [FulfilledAndFailedPaths] where [FulfilledAndFailedPaths.fulfilled] * contains all possible shortest data flow paths (with [ControlDependence]) between the starting @@ -263,6 +398,40 @@ fun Node.followNextCDGUntilHit( ) } +/** + * Returns an instance of [FulfilledAndFailedPaths] where [FulfilledAndFailedPaths.fulfilled] + * contains all possible shortest data flow paths (with [ProgramDependences]) between the starting + * node [this] and the end node fulfilling [predicate] (backwards analysis). The paths are + * represented as lists of nodes. Paths which do not end at such a node are included in + * [FulfilledAndFailedPaths.failed]. + * + * Hence, if "fulfilled" is a non-empty list, a CDG path from [this] to such a node is **possible + * but not mandatory**. If the list "failed" is empty, the data flow is mandatory. + */ +fun Node.followPrevPDGUntilHit( + collectFailedPaths: Boolean = true, + findAllPossiblePaths: Boolean = true, + interproceduralAnalysis: Boolean = false, + predicate: (Node) -> Boolean +): FulfilledAndFailedPaths { + return followXUntilHit( + x = { currentNode -> + val nextNodes = currentNode.prevPDG.toMutableList() + if (interproceduralAnalysis) { + nextNodes.addAll( + (currentNode as? FunctionDeclaration)?.usages?.mapNotNull { + it.astParent as? CallExpression + } ?: listOf() + ) + } + nextNodes + }, + collectFailedPaths = collectFailedPaths, + findAllPossiblePaths = findAllPossiblePaths, + predicate = predicate + ) +} + /** * Returns an instance of [FulfilledAndFailedPaths] where [FulfilledAndFailedPaths.fulfilled] * contains all possible shortest data flow paths (with [ControlDependence]) between the starting @@ -393,13 +562,17 @@ fun Node.followNextFullDFGEdgesUntilHit( * is possible after executing [this] **possible but not mandatory**. If the list "failed" is empty, * such a statement is always executed. */ -fun Node.followNextEOGEdgesUntilHit(predicate: (Node) -> Boolean): FulfilledAndFailedPaths { +fun Node.followNextEOGEdgesUntilHit( + collectFailedPaths: Boolean = true, + findAllPossiblePaths: Boolean = true, + predicate: (Node) -> Boolean +): FulfilledAndFailedPaths { return followXUntilHit( x = { currentNode -> currentNode.nextEOGEdges.filter { it.unreachable != true }.map { it.end } }, - collectFailedPaths = true, - findAllPossiblePaths = true, + collectFailedPaths = collectFailedPaths, + findAllPossiblePaths = findAllPossiblePaths, predicate = predicate ) } @@ -414,13 +587,17 @@ fun Node.followNextEOGEdgesUntilHit(predicate: (Node) -> Boolean): FulfilledAndF * is possible after executing [this] **possible but not mandatory**. If the list "failed" is empty, * such a statement is always executed. */ -fun Node.followPrevEOGEdgesUntilHit(predicate: (Node) -> Boolean): FulfilledAndFailedPaths { +fun Node.followPrevEOGEdgesUntilHit( + collectFailedPaths: Boolean = true, + findAllPossiblePaths: Boolean = true, + predicate: (Node) -> Boolean +): FulfilledAndFailedPaths { return followXUntilHit( x = { currentNode -> currentNode.prevEOGEdges.filter { it.unreachable != true }.map { it.start } }, - collectFailedPaths = true, - findAllPossiblePaths = true, + collectFailedPaths = collectFailedPaths, + findAllPossiblePaths = findAllPossiblePaths, predicate = predicate ) } From 2b0474a0fcf70b3c0c04f1bcd8aa02556d90f898 Mon Sep 17 00:00:00 2001 From: KuechA <31155350+KuechA@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:13:04 +0100 Subject: [PATCH 03/10] Start with python `match` statement (#1801) * Start with python match statement * fix bug, add test * More testing * Add implicit break * Review feedback * nullable MatchSingleton: comment and handling --- .../cpg/graph/statements/SwitchStatement.kt | 4 +- .../cpg/frontends/python/ExpressionHandler.kt | 41 ++- .../aisec/cpg/frontends/python/Python.kt | 7 +- .../cpg/frontends/python/StatementHandler.kt | 123 +++++++- .../python/statementHandler/MatchTest.kt | 295 ++++++++++++++++++ .../src/test/resources/python/match.py | 54 ++++ 6 files changed, 498 insertions(+), 26 deletions(-) create mode 100644 cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/statementHandler/MatchTest.kt create mode 100644 cpg-language-python/src/test/resources/python/match.py diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/SwitchStatement.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/SwitchStatement.kt index e677cc8911..9d955a5057 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/SwitchStatement.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/SwitchStatement.kt @@ -36,7 +36,7 @@ import org.neo4j.ogm.annotation.Relationship /** * Represents a Java or C++ switch statement of the `switch (selector) {...}` that can include case - * and default statements. Break statements break out of the switch and labeled breaks in JAva are + * and default statements. Break statements break out of the switch and labeled breaks in Java are * handled properly. */ class SwitchStatement : Statement(), BranchingNode { @@ -51,7 +51,7 @@ class SwitchStatement : Statement(), BranchingNode { @Relationship(value = "SELECTOR_DECLARATION") var selectorDeclarationEdge = astOptionalEdgeOf() - /** C++ allows to use a declaration instead of a expression as selector */ + /** C++ allows to use a declaration instead of an expression as selector */ var selectorDeclaration by unwrapping(SwitchStatement::selectorDeclarationEdge) @Relationship(value = "STATEMENT") var statementEdge = astOptionalEdgeOf() diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandler.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandler.kt index ac844775a2..62778bc208 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandler.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/ExpressionHandler.kt @@ -237,19 +237,24 @@ class ExpressionHandler(frontend: PythonLanguageFrontend) : * where the first element in [nodes] is the lhs of the root of the tree of binary operators. * The last operands are further down the tree. */ - private fun joinListWithBinOp( + internal fun joinListWithBinOp( operatorCode: String, nodes: List, - rawNode: Python.AST.AST? = null + rawNode: Python.AST.AST? = null, + isImplicit: Boolean = true ): BinaryOperator { - val lastTwo = newBinaryOperator(operatorCode, rawNode = rawNode) - lastTwo.rhs = nodes.last() - lastTwo.lhs = nodes[nodes.size - 2] + val lastTwo = + newBinaryOperator(operatorCode = operatorCode, rawNode = rawNode).apply { + rhs = nodes.last() + lhs = nodes[nodes.size - 2] + this.isImplicit = isImplicit + } return nodes.subList(0, nodes.size - 2).foldRight(lastTwo) { newVal, start -> - val nextValue = newBinaryOperator(operatorCode) - nextValue.rhs = start - nextValue.lhs = newVal - nextValue + newBinaryOperator(operatorCode = operatorCode, rawNode = rawNode).apply { + rhs = start + lhs = newVal + this.isImplicit = isImplicit + } } } @@ -297,18 +302,12 @@ class ExpressionHandler(frontend: PythonLanguageFrontend) : rawNode = node ) } else { - // Start with the last two operands, then keep prepending the previous ones until the - // list is finished. - val lastTwo = newBinaryOperator(op, rawNode = node) - lastTwo.rhs = handle(node.values.last()) - lastTwo.lhs = handle(node.values[node.values.size - 2]) - return node.values.subList(0, node.values.size - 2).foldRight(lastTwo) { newVal, start - -> - val nextValue = newBinaryOperator(op, rawNode = node) - nextValue.rhs = start - nextValue.lhs = handle(newVal) - nextValue - } + joinListWithBinOp( + operatorCode = op, + nodes = node.values.map(::handle), + rawNode = node, + isImplicit = true + ) } } diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/Python.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/Python.kt index 774bd91c0b..93eafe9a1e 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/Python.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/Python.kt @@ -1153,7 +1153,12 @@ interface Python { * ``` */ class MatchSingleton(pyObject: PyObject) : BasePattern(pyObject) { - val value: Any by lazy { "value" of pyObject } + /** + * [value] is not optional. We have to make it nullable though because the value will be + * set to `null` if the case matches on `None`. This is known behavior of jep (similar + * to literals/constants). + */ + val value: Any? by lazy { "value" of pyObject } } /** diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt index cdb66e7103..dfb16d943b 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/python/StatementHandler.kt @@ -77,15 +77,134 @@ class StatementHandler(frontend: PythonLanguageFrontend) : is Python.AST.Global -> handleGlobal(node) is Python.AST.Nonlocal -> handleNonLocal(node) is Python.AST.Raise -> handleRaise(node) - is Python.AST.Match, + is Python.AST.Match -> handleMatch(node) is Python.AST.TryStar -> newProblemExpression( - "The statement of class ${node.javaClass} is not supported yet", + problem = "The statement of class ${node.javaClass} is not supported yet", rawNode = node ) } } + /** + * Translates a pattern which can be used by a `match_case`. There are various options available + * and all of them are translated to traditional comparisons and logical expressions which could + * also be seen in the condition of an if-statement. + */ + private fun handlePattern(node: Python.AST.BasePattern, subject: String): Expression { + return when (node) { + is Python.AST.MatchValue -> + newBinaryOperator(operatorCode = "==", rawNode = node).implicit().apply { + this.lhs = newReference(name = subject) + this.rhs = frontend.expressionHandler.handle(ctx = node.value) + } + is Python.AST.MatchSingleton -> + newBinaryOperator(operatorCode = "===", rawNode = node).implicit().apply { + this.lhs = newReference(name = subject) + this.rhs = + when (val value = node.value) { + is Python.AST.BaseExpr -> frontend.expressionHandler.handle(ctx = value) + null -> newLiteral(value = null, rawNode = node) + else -> + newProblemExpression( + problem = + "Can't handle ${value::class} in value of Python.AST.MatchSingleton yet" + ) + } + } + is Python.AST.MatchOr -> + frontend.expressionHandler.joinListWithBinOp( + operatorCode = "or", + nodes = node.patterns.map { handlePattern(node = it, subject = subject) }, + rawNode = node, + isImplicit = false + ) + is Python.AST.MatchSequence, + is Python.AST.MatchMapping, + is Python.AST.MatchClass, + is Python.AST.MatchStar, + is Python.AST.MatchAs -> + newProblemExpression( + problem = "Cannot handle of type ${node::class} yet", + rawNode = node + ) + else -> + newProblemExpression( + problem = "Cannot handle of type ${node::class} yet", + rawNode = node + ) + } + } + + /** + * Translates a [`match_case`](https://docs.python.org/3/library/ast.html#ast.match_case) to a + * [Block] which holds the [CaseStatement] and then all other statements of the + * [Python.AST.match_case.body]. + * + * The [CaseStatement] is generated by the [Python.AST.match_case.pattern] and, if available, + * [Python.AST.match_case.guard]. A `guard` is modeled with an `AND` BinaryOperator in the + * [CaseStatement.caseExpression]. Its `lhs` is the normal pattern and the `rhs` is the guard. + * This is in line with [PEP 634](https://peps.python.org/pep-0634/). + */ + private fun handleMatchCase(node: Python.AST.match_case, subject: String): List { + val statements = mutableListOf() + // First, we add the CaseStatement. A `MatchAs` without a `pattern` implies + // it's a default statement. + // We have to handle this here since we do not want to generate the CaseStatement in this + // case. + val pattern = node.pattern + val guard = node.guard + statements += + if (pattern is Python.AST.MatchAs && pattern.pattern == null) { + newDefaultStatement(rawNode = pattern) + } else if (guard != null) { + newCaseStatement(rawNode = node).apply { + this.caseExpression = + newBinaryOperator(operatorCode = "and") + .implicit( + code = frontend.codeOf(astNode = node), + location = frontend.locationOf(astNode = node) + ) + .apply { + this.lhs = handlePattern(node = node.pattern, subject = subject) + this.rhs = frontend.expressionHandler.handle(ctx = guard) + } + } + } else { + newCaseStatement(rawNode = node).apply { + this.caseExpression = handlePattern(node = node.pattern, subject = subject) + } + } + // Now, we add the remaining body. + statements += node.body.map(::handle) + // Currently, the EOG pass requires a break statement to work as expected. For this reason, + // we insert an implicit break statement at the end of the block. + statements += + newBreakStatement() + .implicit( + code = frontend.codeOf(astNode = node), + location = frontend.locationOf(astNode = node) + ) + return statements + } + + /** + * Translates a Python [`Match`](https://docs.python.org/3/library/ast.html#ast.Match) into a + * [SwitchStatement]. + */ + private fun handleMatch(node: Python.AST.Match): SwitchStatement = + newSwitchStatement(rawNode = node).apply { + val subject = frontend.expressionHandler.handle(ctx = node.subject) + this.selector = subject + + this.statement = + node.cases.fold(initial = newBlock().implicit()) { block, case -> + block.statements += + handleMatchCase(node = case, subject = subject.name.localName) + block + } + } + /** * Translates a Python [`Raise`](https://docs.python.org/3/library/ast.html#ast.Raise) into a * [ThrowExpression]. diff --git a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/statementHandler/MatchTest.kt b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/statementHandler/MatchTest.kt new file mode 100644 index 0000000000..aca1ef4b65 --- /dev/null +++ b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/statementHandler/MatchTest.kt @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.frontends.python.statementHandler + +import de.fraunhofer.aisec.cpg.TranslationResult +import de.fraunhofer.aisec.cpg.frontends.python.PythonLanguage +import de.fraunhofer.aisec.cpg.graph.functions +import de.fraunhofer.aisec.cpg.graph.get +import de.fraunhofer.aisec.cpg.graph.statements.BreakStatement +import de.fraunhofer.aisec.cpg.graph.statements.CaseStatement +import de.fraunhofer.aisec.cpg.graph.statements.DefaultStatement +import de.fraunhofer.aisec.cpg.graph.statements.expressions.BinaryOperator +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Block +import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Literal +import de.fraunhofer.aisec.cpg.graph.statements.expressions.ProblemExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference +import de.fraunhofer.aisec.cpg.graph.switches +import de.fraunhofer.aisec.cpg.test.analyze +import de.fraunhofer.aisec.cpg.test.assertLiteralValue +import de.fraunhofer.aisec.cpg.test.assertLocalName +import de.fraunhofer.aisec.cpg.test.assertRefersTo +import java.nio.file.Path +import kotlin.test.assertEquals +import kotlin.test.assertIs +import kotlin.test.assertNotNull +import kotlin.test.assertNull +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.TestInstance + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class MatchTest { + private lateinit var topLevel: Path + private lateinit var result: TranslationResult + + @BeforeAll + fun setup() { + topLevel = Path.of("src", "test", "resources", "python") + result = + analyze(listOf(topLevel.resolve("match.py").toFile()), topLevel, true) { + it.registerLanguage() + } + assertNotNull(result) + } + + @Test + fun testMatchSingleton() { + val func = result.functions["matchSingleton"] + assertNotNull(func) + + val switchStatement = func.switches.singleOrNull() + assertNotNull(switchStatement) + + assertLocalName("x", switchStatement.selector) + assertIs(switchStatement.selector) + val paramX = func.parameters.singleOrNull() + assertNotNull(paramX) + assertRefersTo(switchStatement.selector, paramX) + + val statementBlock = switchStatement.statement + assertIs(statementBlock) + assertEquals(3, statementBlock.statements.size) + val caseSingleton = statementBlock[0] + assertIs(caseSingleton) + val singletonCheck = caseSingleton.caseExpression + assertIs(singletonCheck) + assertEquals("===", singletonCheck.operatorCode) + assertRefersTo(singletonCheck.lhs, paramX) + val singletonRhs = singletonCheck.rhs + assertIs>(singletonRhs) + assertNull(singletonRhs.value) + assertIs(statementBlock[2]) + } + + @Test + fun testMatchValue() { + val func = result.functions["matchValue"] + assertNotNull(func) + + val switchStatement = func.switches.singleOrNull() + assertNotNull(switchStatement) + + assertLocalName("x", switchStatement.selector) + assertIs(switchStatement.selector) + val paramX = func.parameters.singleOrNull() + assertNotNull(paramX) + assertRefersTo(switchStatement.selector, paramX) + + val statementBlock = switchStatement.statement + assertIs(statementBlock) + assertEquals(3, statementBlock.statements.size) + val caseValue = statementBlock[0] + assertIs(caseValue) + val valueCheck = caseValue.caseExpression + assertIs(valueCheck) + assertEquals("==", valueCheck.operatorCode) + assertRefersTo(valueCheck.lhs, paramX) + assertLiteralValue("value", valueCheck.rhs) + assertIs(statementBlock[2]) + } + + @Test + fun testMatchOr() { + val func = result.functions["matchOr"] + assertNotNull(func) + + val switchStatement = func.switches.singleOrNull() + assertNotNull(switchStatement) + + assertLocalName("x", switchStatement.selector) + assertIs(switchStatement.selector) + val paramX = func.parameters.singleOrNull() + assertNotNull(paramX) + assertRefersTo(switchStatement.selector, paramX) + + val statementBlock = switchStatement.statement + assertIs(statementBlock) + assertEquals(3, statementBlock.statements.size) + val caseOr = statementBlock[0] + assertIs(caseOr) + val orExpr = caseOr.caseExpression + assertIs(orExpr) + assertEquals("or", orExpr.operatorCode) + assertIs(orExpr.lhs) + assertIs(orExpr.rhs) + assertIs(statementBlock[2]) + } + + @Test + fun testMatchDefault() { + val func = result.functions["matchDefault"] + assertNotNull(func) + + val switchStatement = func.switches.singleOrNull() + assertNotNull(switchStatement) + + assertLocalName("x", switchStatement.selector) + assertIs(switchStatement.selector) + val paramX = func.parameters.singleOrNull() + assertNotNull(paramX) + assertRefersTo(switchStatement.selector, paramX) + + val statementBlock = switchStatement.statement + assertIs(statementBlock) + assertEquals(3, statementBlock.statements.size) + val caseDefault = statementBlock[0] + assertIs(caseDefault) + assertIs(statementBlock[2]) + } + + @Test + fun testMatchGuard() { + val func = result.functions["matchAnd"] + assertNotNull(func) + + val switchStatement = func.switches.singleOrNull() + assertNotNull(switchStatement) + + assertLocalName("x", switchStatement.selector) + assertIs(switchStatement.selector) + val paramX = func.parameters.singleOrNull() + assertNotNull(paramX) + assertRefersTo(switchStatement.selector, paramX) + + val statementBlock = switchStatement.statement + assertIs(statementBlock) + val caseAnd = statementBlock[0] + assertIs(caseAnd) + val andExpr = caseAnd.caseExpression + assertIs(andExpr) + assertEquals("and", andExpr.operatorCode) + val andRhs = andExpr.rhs + assertIs(andRhs) + assertEquals(">", andRhs.operatorCode) + assertRefersTo(andRhs.lhs, paramX) + assertLiteralValue(0L, andRhs.rhs) + assertIs(statementBlock[2]) + } + + @Test + fun testMatchCombined() { + val func = result.functions["matcher"] + assertNotNull(func) + + val switchStatement = func.switches.singleOrNull() + assertNotNull(switchStatement) + + assertLocalName("x", switchStatement.selector) + assertIs(switchStatement.selector) + val paramX = func.parameters.singleOrNull() + assertNotNull(paramX) + assertRefersTo(switchStatement.selector, paramX) + + val statementBlock = switchStatement.statement + assertIs(statementBlock) + val caseSingleton = statementBlock[0] + assertIs(caseSingleton) + val singletonCheck = caseSingleton.caseExpression + assertIs(singletonCheck) + assertEquals("===", singletonCheck.operatorCode) + assertRefersTo(singletonCheck.lhs, paramX) + val singletonRhs = singletonCheck.rhs + assertIs>(singletonRhs) + assertNull(singletonRhs.value) + assertIs(statementBlock[2]) + + val caseValue = statementBlock[3] + assertIs(caseValue) + val valueCheck = caseValue.caseExpression + assertIs(valueCheck) + assertEquals("==", valueCheck.operatorCode) + assertRefersTo(valueCheck.lhs, paramX) + assertLiteralValue("value", valueCheck.rhs) + assertIs(statementBlock[5]) + + val caseAnd = statementBlock[6] + assertIs(caseAnd) + val andExpr = caseAnd.caseExpression + assertIs(andExpr) + assertEquals("and", andExpr.operatorCode) + val andRhs = andExpr.rhs + assertIs(andRhs) + assertEquals(">", andRhs.operatorCode) + assertRefersTo(andRhs.lhs, paramX) + assertLiteralValue(0L, andRhs.rhs) + assertIs(statementBlock[8]) + + assertIs(statementBlock[9]) + assertIs(statementBlock[11]) + assertIs(statementBlock[12]) + assertIs(statementBlock[14]) + assertIs(statementBlock[15]) + assertIs(statementBlock[17]) + assertIs(statementBlock[18]) + assertIs(statementBlock[20]) + assertIs(statementBlock[21]) + assertIs(statementBlock[23]) + assertIs(statementBlock[24]) + assertIs(statementBlock[26]) + + val caseOr = statementBlock[27] + assertIs(caseOr) + val orExpr = caseOr.caseExpression + assertIs(orExpr) + assertEquals("or", orExpr.operatorCode) + assertIs(orExpr.lhs) + assertIs(orExpr.rhs) + assertIs(statementBlock[29]) + + val caseDefault = statementBlock[30] + assertIs(caseDefault) + assertIs(statementBlock[32]) + } + + @Test + fun testMatch2() { + val func = result.functions["match_weird"] + assertNotNull(func) + + val switchStatement = func.switches.singleOrNull() + assertNotNull(switchStatement) + + assertIs(switchStatement.selector) + + val statementBlock = switchStatement.statement + assertIs(statementBlock) + val case = statementBlock[0] + assertIs(case) + assertIs(case.caseExpression) + } +} diff --git a/cpg-language-python/src/test/resources/python/match.py b/cpg-language-python/src/test/resources/python/match.py new file mode 100644 index 0000000000..21e7884ba1 --- /dev/null +++ b/cpg-language-python/src/test/resources/python/match.py @@ -0,0 +1,54 @@ +def matcher(x): + match x: + case None: + print("singleton" + x) + case "value": + print("value" + x) + case [x] if x>0: + print(x) + case [1, 2]: + print("sequence" + x) + case [1, 2, *rest]: + print("star" + x) + case [*_]: + print("star2" + x) + case {1: _, 2: _}: + print("mapping" + x) + case Point2D(0, 0): + print("class" + x) + case [x] as y: + print("as" + y) + case "xyz" | "abc": + print("or" + x) + case _: + print("Default match") + +def matchSingleton(x): + match x: + case None: + print("singleton" + x) + +def matchValue(x): + match x: + case "value": + print("value" + x) + +def matchOr(x): + match x: + case "xyz" | "abc": + print("or" + x) + +def matchAnd(x): + match x: + case [x] if x>0: + print(x) + +def matchDefault(x): + match x: + case _: + print("Default match") + +def match_weird(): + match command.split(): + case ["go", ("north" | "south" | "east" | "west") as direction]: + current_room = current_room.neighbor(direction) \ No newline at end of file From 477a7de900988591105bf019268293e83205ec2e Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 24 Nov 2024 23:20:53 +0100 Subject: [PATCH 04/10] Update dependency typescript to v5.7.2 (#1865) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- cpg-language-typescript/src/main/nodejs/package-lock.json | 8 ++++---- cpg-language-typescript/src/main/nodejs/package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpg-language-typescript/src/main/nodejs/package-lock.json b/cpg-language-typescript/src/main/nodejs/package-lock.json index cf5c47fa98..ba44910374 100644 --- a/cpg-language-typescript/src/main/nodejs/package-lock.json +++ b/cpg-language-typescript/src/main/nodejs/package-lock.json @@ -7,7 +7,7 @@ "license": "Apache-2.0", "dependencies": { "@types/node": "^22.0.0", - "typescript": "5.6.2" + "typescript": "5.7.2" }, "devDependencies": { "@rollup/plugin-commonjs": "^28.0.0", @@ -631,9 +631,9 @@ "license": "0BSD" }, "node_modules/typescript": { - "version": "5.6.2", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.2.tgz", - "integrity": "sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==", + "version": "5.7.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz", + "integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==", "license": "Apache-2.0", "bin": { "tsc": "bin/tsc", diff --git a/cpg-language-typescript/src/main/nodejs/package.json b/cpg-language-typescript/src/main/nodejs/package.json index f380846c4e..ad5fedfff1 100644 --- a/cpg-language-typescript/src/main/nodejs/package.json +++ b/cpg-language-typescript/src/main/nodejs/package.json @@ -6,7 +6,7 @@ }, "dependencies": { "@types/node": "^22.0.0", - "typescript": "5.6.2" + "typescript": "5.7.2" }, "license": "Apache-2.0", "devDependencies": { From d2c7f2852d98fffaa724648485875d8193a967d3 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:57:57 +0100 Subject: [PATCH 05/10] Update dependency @types/node to v22.10.0 (#1866) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- .../src/main/nodejs/package-lock.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpg-language-typescript/src/main/nodejs/package-lock.json b/cpg-language-typescript/src/main/nodejs/package-lock.json index ba44910374..d18afa2b33 100644 --- a/cpg-language-typescript/src/main/nodejs/package-lock.json +++ b/cpg-language-typescript/src/main/nodejs/package-lock.json @@ -411,12 +411,12 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "22.9.0", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.9.0.tgz", - "integrity": "sha512-vuyHg81vvWA1Z1ELfvLko2c8f34gyA0zaic0+Rllc5lbCnbSyuvb2Oxpm6TAUAC/2xZN3QGqxBNggD1nNR2AfQ==", + "version": "22.10.0", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.0.tgz", + "integrity": "sha512-XC70cRZVElFHfIUB40FgZOBbgJYFKKMa5nb9lxcwYstFG/Mi+/Y0bGS+rs6Dmhmkpq4pnNiLiuZAbc02YCOnmA==", "license": "MIT", "dependencies": { - "undici-types": "~6.19.8" + "undici-types": "~6.20.0" } }, "node_modules/@types/resolve": { @@ -644,9 +644,9 @@ } }, "node_modules/undici-types": { - "version": "6.19.8", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", - "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==", + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", + "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", "license": "MIT" } } From 07c1dd5f77fabda66b608ec1031ef0a87c6659e2 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Thu, 28 Nov 2024 15:02:00 +0100 Subject: [PATCH 06/10] Improvements to function and record inference (#1586) --- .../aisec/cpg/InferenceConfiguration.kt | 10 ++ .../de/fraunhofer/aisec/cpg/ScopeManager.kt | 28 +--- .../aisec/cpg/frontends/LanguageTraits.kt | 8 + .../aisec/cpg/passes/inference/Inference.kt | 122 ++++++++++++-- .../aisec/cpg/passes/inference/PassHelper.kt | 157 ++++++++++++++---- .../de/fraunhofer/aisec/cpg/GraphExamples.kt | 100 +++++++++++ .../aisec/cpg/enhancements/InferenceTest.kt | 84 ++++++++++ .../aisec/cpg/frontends/TestLanguage.kt | 2 +- .../de/fraunhofer/aisec/cpg/test/TestUtils.kt | 3 +- .../aisec/cpg/frontends/cxx/CLanguage.kt | 3 +- .../cpg/frontends/cxx/CXXInferenceTest.kt | 113 ++++++++++++- .../resources/cxx/inference/construct.cpp | 7 + .../test/resources/cxx/inference/construct.h | 4 + .../cxx/{ => inference}/inference.cpp | 0 .../resources/cxx/{ => inference}/inference.h | 0 .../resources/cxx/inference/superclass.cpp | 19 +++ .../test/resources/cxx/inference/superclass.h | 13 ++ .../cxx/{ => inference}/tricky_inference.cpp | 13 +- .../cxx/inference/tricky_inference.h | 31 ++++ .../src/test/resources/cxx/tricky_inference.h | 12 -- 20 files changed, 640 insertions(+), 89 deletions(-) create mode 100644 cpg-language-cxx/src/test/resources/cxx/inference/construct.cpp create mode 100644 cpg-language-cxx/src/test/resources/cxx/inference/construct.h rename cpg-language-cxx/src/test/resources/cxx/{ => inference}/inference.cpp (100%) rename cpg-language-cxx/src/test/resources/cxx/{ => inference}/inference.h (100%) create mode 100644 cpg-language-cxx/src/test/resources/cxx/inference/superclass.cpp create mode 100644 cpg-language-cxx/src/test/resources/cxx/inference/superclass.h rename cpg-language-cxx/src/test/resources/cxx/{ => inference}/tricky_inference.cpp (81%) create mode 100644 cpg-language-cxx/src/test/resources/cxx/inference/tricky_inference.h delete mode 100644 cpg-language-cxx/src/test/resources/cxx/tricky_inference.h diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/InferenceConfiguration.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/InferenceConfiguration.kt index 3f005411b0..08c9633064 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/InferenceConfiguration.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/InferenceConfiguration.kt @@ -49,6 +49,12 @@ private constructor( /** Enables the inference of variables, such as global variables. */ val inferVariables: Boolean, + /** + * A very EXPERIMENTAL feature. If this is enabled, we will try to infer return types of + * functions based on the context of the call it originated out of. This is disabled by default. + */ + val inferReturnTypes: Boolean, + /** * Uses heuristics to add DFG edges for call expressions to unresolved functions (i.e., * functions not implemented in the given source code). @@ -61,6 +67,7 @@ private constructor( private var inferRecords: Boolean = true, private var inferFunctions: Boolean = true, private var inferVariables: Boolean = true, + private var inferReturnTypes: Boolean = false, private var inferDfgForUnresolvedCalls: Boolean = true ) { fun enabled(infer: Boolean) = apply { this.enabled = infer } @@ -73,6 +80,8 @@ private constructor( fun inferVariables(infer: Boolean) = apply { this.inferVariables = infer } + fun inferReturnTypes(infer: Boolean) = apply { this.inferReturnTypes = infer } + fun inferDfgForUnresolvedCalls(infer: Boolean) = apply { this.inferDfgForUnresolvedCalls = infer } @@ -84,6 +93,7 @@ private constructor( inferRecords, inferFunctions, inferVariables, + inferReturnTypes, inferDfgForUnresolvedCalls ) } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt index ced0e7f63d..266ac9604f 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt @@ -111,9 +111,6 @@ class ScopeManager : ScopeProvider { val currentRecord: RecordDeclaration? get() = this.firstScopeIsInstanceOrNull()?.astNode as? RecordDeclaration - val currentTypedefs: Collection - get() = this.getCurrentTypedefs(currentScope) - val currentNamespace: Name? get() { val namedScope = this.firstScopeIsInstanceOrNull() @@ -237,7 +234,7 @@ class ScopeManager : ScopeProvider { is Block -> BlockScope(nodeToScope) is WhileStatement, is DoStatement, - is AssertStatement -> LoopScope(nodeToScope as Statement) + is AssertStatement -> LoopScope(nodeToScope) is ForStatement, is ForEachStatement -> LoopScope(nodeToScope as Statement) is SwitchStatement -> SwitchScope(nodeToScope) @@ -508,29 +505,6 @@ class ScopeManager : ScopeProvider { scope?.addTypedef(typedef) } - private fun getCurrentTypedefs(searchScope: Scope?): Collection { - val typedefs = mutableMapOf() - - val path = mutableListOf() - var current = searchScope - - // We need to build a path from the current scope to the top most one - while (current != null) { - if (current is ValueDeclarationScope) { - path += current - } - current = current.parent - } - - // And then follow the path in reverse. This ensures us that a local definition - // overwrites / shadows one that was there on a higher scope. - for (scope in path.reversed()) { - typedefs.putAll(scope.typedefs) - } - - return typedefs.values - } - /** * Resolves only references to Values in the current scope, static references to other visible * records are not resolved over the ScopeManager. diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt index dee23cc697..b701827023 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt @@ -204,6 +204,14 @@ interface HasAnonymousIdentifier : LanguageTrait { */ interface HasGlobalVariables : LanguageTrait +/** + * A language trait, that specifies that this language has global functions directly in the + * [GlobalScope], i.e., not within a namespace, but directly contained in a + * [TranslationUnitDeclaration]. For example, C++ has global functions, Java and Go do not (as every + * function is either in a class or a namespace). + */ +interface HasGlobalFunctions : LanguageTrait + /** * A common super-class for all language traits that arise because they are an ambiguity of a * function call, e.g., function-style casts. This means that we cannot differentiate between a diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/Inference.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/Inference.kt index ce3e2e3f21..2073b108af 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/Inference.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/Inference.kt @@ -25,7 +25,6 @@ */ package de.fraunhofer.aisec.cpg.passes.inference -import de.fraunhofer.aisec.cpg.InferenceConfiguration import de.fraunhofer.aisec.cpg.ScopeManager import de.fraunhofer.aisec.cpg.TranslationContext import de.fraunhofer.aisec.cpg.TypeManager @@ -34,11 +33,14 @@ import de.fraunhofer.aisec.cpg.frontends.Language import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.* import de.fraunhofer.aisec.cpg.graph.scopes.Scope +import de.fraunhofer.aisec.cpg.graph.statements.ReturnStatement +import de.fraunhofer.aisec.cpg.graph.statements.expressions.BinaryOperator import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.ConstructExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.Expression -import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference import de.fraunhofer.aisec.cpg.graph.statements.expressions.TypeExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.UnaryOperator import de.fraunhofer.aisec.cpg.graph.types.* import de.fraunhofer.aisec.cpg.helpers.Util.debugWithFileLocation import de.fraunhofer.aisec.cpg.helpers.Util.errorWithFileLocation @@ -80,7 +82,7 @@ class Inference internal constructor(val start: Node, override val ctx: Translat code: String?, isStatic: Boolean, signature: List, - returnType: Type?, + incomingReturnType: Type?, hint: CallExpression? = null ): FunctionDeclaration? { if (!ctx.config.inferenceConfiguration.inferFunctions) { @@ -108,15 +110,6 @@ class Inference internal constructor(val start: Node, override val ctx: Translat } inferred.code = code - debugWithFileLocation( - hint, - log, - "Inferred a new {} declaration {} with parameter types {} in $it", - if (inferred is MethodDeclaration) "method" else "function", - inferred.name, - signature.map { it?.name } - ) - // Create parameter declarations and receiver (only for methods). if (inferred is MethodDeclaration) { createInferredReceiver(inferred, record) @@ -124,9 +117,36 @@ class Inference internal constructor(val start: Node, override val ctx: Translat createInferredParameters(inferred, signature) // Set the type and return type(s) - returnType?.let { inferred.returnTypes = listOf(it) } + var returnType = + if ( + ctx.config.inferenceConfiguration.inferReturnTypes && + incomingReturnType is UnknownType && + hint != null + ) { + inferReturnType(hint) ?: unknownType() + } else { + incomingReturnType + } + + if (returnType is TupleType) { + inferred.returnTypes = returnType.types + } else if (returnType != null) { + inferred.returnTypes = listOf(returnType) + } + inferred.type = FunctionType.computeType(inferred) + debugWithFileLocation( + hint, + log, + "Inferred a new {} declaration {} with parameter types {} and return types {} in {}", + if (inferred is MethodDeclaration) "method" else "function", + inferred.name, + signature.map { it?.name }, + inferred.returnTypes.map { it.name }, + it + ) + // Add it to the scope scopeManager.addDeclaration(inferred) @@ -528,6 +548,73 @@ class Inference internal constructor(val start: Node, override val ctx: Translat this.scopeManager = ctx.scopeManager this.typeManager = ctx.typeManager } + + /** + * This function tries to infer a return type for an inferred [FunctionDeclaration] based the + * original [CallExpression] (as the [hint]) parameter that was used to infer the function. + */ + fun inferReturnType(hint: CallExpression): Type? { + // Try to find out, if the supplied hint is part of an assignment. If yes, we can use their + // type as the return type of the function + var targetType = + ctx.currentComponent.assignments.singleOrNull { it.value == hint }?.target?.type + if (targetType != null && targetType !is UnknownType) { + return targetType + } + + // Look for an "argument holder". These can be different kind of nodes + val holder = + ctx.currentComponent.allChildren { it.hasArgument(hint) }.singleOrNull() + when (holder) { + is UnaryOperator -> { + // If it's a boolean operator, the return type is probably a boolean + if (holder.operatorCode == "!") { + return hint.language?.builtInTypes?.values?.firstOrNull { it is BooleanType } + } + // If it's a numeric operator, return the largest numeric type that we have; we + // prefer integers to floats + if (holder.operatorCode in listOf("+", "-", "++", "--")) { + val numericTypes = + hint.language + ?.builtInTypes + ?.values + ?.filterIsInstance() + ?.sortedWith( + compareBy { it.bitWidth } + .then { a, b -> preferIntegerType(a, b) } + ) + + return numericTypes?.lastOrNull() + } + } + is ConstructExpression -> { + return holder.type + } + is BinaryOperator -> { + // If it is on the right side, it's probably the same as on the left-side (and + // vice versa) + if (hint == holder.rhs) { + return holder.lhs.type + } else if (hint == holder.lhs) { + return holder.rhs.type + } + } + is ReturnStatement -> { + // If this is part of a return statement, we can take the return type + val func = + hint.firstParentOrNull { it is FunctionDeclaration } as? FunctionDeclaration + val returnTypes = func?.returnTypes + + return if (returnTypes != null && returnTypes.size > 1) { + TupleType(returnTypes) + } else { + returnTypes?.singleOrNull() + } + } + } + + return null + } } /** Provides information about the inference status of a node. */ @@ -605,3 +692,12 @@ fun RecordDeclaration.inferMethod( call ) as? MethodDeclaration } + +/** A small helper function that prefers [IntegerType] when comparing two [NumericType] types. */ +fun preferIntegerType(a: NumericType, b: NumericType): Int { + return when { + a is IntegerType && b is IntegerType -> 0 + a is IntegerType && b !is IntegerType -> 1 + else -> -1 + } +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt index da7184e8f5..f88a8fc3dc 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt @@ -27,18 +27,24 @@ package de.fraunhofer.aisec.cpg.passes.inference import de.fraunhofer.aisec.cpg.CallResolutionResult import de.fraunhofer.aisec.cpg.InferenceConfiguration +import de.fraunhofer.aisec.cpg.ancestors +import de.fraunhofer.aisec.cpg.frontends.HasGlobalFunctions import de.fraunhofer.aisec.cpg.frontends.HasGlobalVariables import de.fraunhofer.aisec.cpg.frontends.HasImplicitReceiver import de.fraunhofer.aisec.cpg.frontends.HasStructs import de.fraunhofer.aisec.cpg.frontends.Language import de.fraunhofer.aisec.cpg.graph.Name import de.fraunhofer.aisec.cpg.graph.Node +import de.fraunhofer.aisec.cpg.graph.calls import de.fraunhofer.aisec.cpg.graph.declarations.* +import de.fraunhofer.aisec.cpg.graph.invoke +import de.fraunhofer.aisec.cpg.graph.methods import de.fraunhofer.aisec.cpg.graph.newFieldDeclaration import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope import de.fraunhofer.aisec.cpg.graph.scopes.NameScope import de.fraunhofer.aisec.cpg.graph.scopes.RecordScope import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberCallExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference import de.fraunhofer.aisec.cpg.graph.types.ObjectType @@ -56,10 +62,25 @@ import kotlin.collections.forEach * Tries to infer a [NamespaceDeclaration] from a [Name]. This will return `null`, if inference was * not possible, or if it was turned off in the [InferenceConfiguration]. */ -internal fun Pass<*>.tryNamespaceInference(name: Name, locationHint: Node?): NamespaceDeclaration? { - return scopeManager.globalScope - ?.astNode - ?.startInference(this.ctx) +fun Pass<*>.tryNamespaceInference(name: Name, locationHint: Node?): NamespaceDeclaration? { + // Determine the scope where we want to start our inference + var (scope, _) = scopeManager.extractScope(name, location = locationHint?.location) + + if (scope !is NameScope) { + scope = null + } + + var holder = scope?.astNode + + // If we could not find a scope, but we have an FQN, we can try to infer a namespace (or a + // parent record) + var parentName = name.parent + if (scope == null && parentName != null) { + holder = tryScopeInference(parentName, locationHint) + } + + return (holder ?: scopeManager.globalScope?.astNode) + ?.startInference(ctx) ?.inferNamespaceDeclaration(name, null, locationHint) } @@ -69,7 +90,7 @@ internal fun Pass<*>.tryNamespaceInference(name: Name, locationHint: Node?): Nam */ internal fun Pass<*>.tryRecordInference( type: Type, - locationHint: Node? = null, + locationHint: Node? = null ): RecordDeclaration? { val kind = if (type.language is HasStructs) { @@ -78,10 +99,20 @@ internal fun Pass<*>.tryRecordInference( "class" } // Determine the scope where we want to start our inference - var (scope, _) = scopeManager.extractScope(type) + var (scope, _) = scopeManager.extractScope(type, scope = type.scope) if (scope !is NameScope) { scope = null + } else if (scope is RecordScope) { + // We are asked to infer a record inside another record. While this is not unusual + // per-se, it is far more likely that the "correct" way to place our record is in a + // parent namespace or even the global scope. This is especially true if we did NOT + // infer the parent record, because in this case we can somewhat assume that the + // parent's records declaration (e.g. in a C++ header file) is somewhat complete. + if (scope.astNode?.isInferred == false) { + // It is therefore a better choice to infer it in the parent namespace instead + scope = scopeManager.firstScopeOrNull(scope) { it is NameScope && it !is RecordScope } + } } var holder = scope?.astNode @@ -90,21 +121,12 @@ internal fun Pass<*>.tryRecordInference( // parent record) var parentName = type.name.parent if (scope == null && parentName != null) { - // At this point, we need to check whether we have any type reference to our parent - // name. If we have (e.g. it is used in a function parameter, variable, etc.), then we - // have a high chance that this is actually a parent record and not a namespace - var parentType = typeManager.lookupResolvedType(parentName) - holder = - if (parentType != null) { - tryRecordInference(parentType, locationHint = locationHint) - } else { - tryNamespaceInference(parentName, locationHint = locationHint) - } + holder = tryScopeInference(parentName, locationHint) } val record = - (holder ?: this.scopeManager.globalScope?.astNode) - ?.startInference(this.ctx) + (holder ?: scopeManager.globalScope?.astNode) + ?.startInference(ctx) ?.inferRecordDeclaration(type, kind, locationHint) // Update the type's record. Because types are only unique per scope, we potentially need to @@ -294,23 +316,63 @@ internal fun Pass<*>.tryFunctionInference( } /** - * Tries to infer a [MethodDeclaration] from a [CallExpression]. This will return an empty list, if - * inference was not possible, or if it was turned off in the [InferenceConfiguration]. + * Creates an inferred [FunctionDeclaration] for each suitable [Type] (which points to a + * [RecordDeclaration]). * - * Since potentially multiple suitable bases exist for the inference of methods (specified in - * [possibleContainingTypes]), we infer a method for all of them and return a list. + * There is a big challenge in this inference: We can not be 100 % sure, whether we really need to + * infer a [MethodDeclaration] inside the [RecordDeclaration] or if this is a call to a global + * function (if [call] is a simple [CallExpression] and not a [MemberCallExpression]). The reason + * behind that is that most languages allow to omit `this` when calling methods in the current + * class. So a call to `foo()` inside record `Bar` could either be a call to a global function `foo` + * or a call to `Bar::foo`. * - * Should we encounter that none of our types in [possibleContainingTypes] have a resolved - * declaration, we are inferring one (using [bestGuess]). This should normally not happen as missing - * type declarations are already inferred in the [TypeResolver]. However, there is a special - * corner-case involving types in [Language.builtInTypes] (see [tryFieldInference] for more - * details), + * We need to decide whether we want to infer a global function or not; the heuristic is based on a + * multitude of factors such as: + * - Whether the language even allows for [HasGlobalFunctions]. + * - Whether we have multiple calls to the same function `func()` from multiple locations, everytime + * without an explicit receiver. */ internal fun Pass<*>.tryMethodInference( call: CallExpression, possibleContainingTypes: Set, bestGuess: Type?, ): List { + // We need to decide whether we want to infer a global function or not. We do this with a + // simple heuristic. This will of course not be 100 % error-free, but this is the burden of + // inference. + // 1a) If the language does not even support functions at a global level, it's easy + // 1b) If this is a member call expression, it's also easy + var inferGlobalFunction = + if (call.language !is HasGlobalFunctions || call is MemberCallExpression) { + false + } else if (bestGuess is ObjectType && methodExists(bestGuess, call.name.localName)) { + // 2) We do a quick check, whether we would have a method with our name in the "best + // guess" class. Because if we do, we most likely ended up here because of an + // argument type mismatch. Once we use the new call resolution also for member + // calls, we have this information more easily available + false + } else { + // 3) Lastly, if we are still undecided, we do a quick check on the current + // component, + // if we have multiple calls to the same function from across different locations. + // This is a bit more expensive, so we leave this as a last resort. + // If we encounter "others", there is a high chance this is a global function. Of + // course, we could run into a scenario where we have multiple calls to `init()` in + // several classes and in all occasions the `this` was left out; but this seems + // unlikely + var others = + ctx.currentComponent.calls { + it != call && it.name == call.name && call !is MemberCallExpression + } + others.isNotEmpty() + } + + if (inferGlobalFunction) { + var currentTU = + scopeManager.currentScope?.globalScope?.astNode as? TranslationUnitDeclaration + return listOfNotNull(currentTU?.inferFunction(call, ctx = ctx)) + } + var records = possibleContainingTypes.mapNotNull { val root = it.root as? ObjectType @@ -328,5 +390,44 @@ internal fun Pass<*>.tryMethodInference( } records = records.distinct() - return records.mapNotNull { record -> record.inferMethod(call, ctx = this.ctx) } + return records.mapNotNull { record -> record.inferMethod(call, ctx = ctx) } +} + +/** + * This functions tries to infer a "scope" that should exist under [scopeName], but does not. + * + * A common use-case for this is the creation of nested namespaces, e.g., when inferring classes + * such as `java.lang.System`. At first, we check whether the scope `java` exists, if not, this + * function makes sure that a [NamespaceDeclaration] `java` will be created. Afterward, the same + * check will be repeated for `java.lang`, until we are finally ready to infer the + * [RecordDeclaration] `java.lang.System`. + */ +internal fun Pass<*>.tryScopeInference(scopeName: Name, locationHint: Node?): Declaration? { + // At this point, we need to check whether we have any type reference to our scope + // name. If we have (e.g. it is used in a function parameter, variable, etc.), then we + // have a high chance that this is actually a parent record and not a namespace + var parentType = typeManager.lookupResolvedType(scopeName) + return if (parentType != null) { + tryRecordInference(parentType, locationHint = locationHint) + } else { + tryNamespaceInference(scopeName, locationHint = locationHint) + } +} + +/** + * This function is a necessary evil until we completely switch over member call resolution to the + * new call resolver. We need a way to find out if a method with a given name (independently of + * their arguments) exists in [type] or in one of their [Type.superTypes]. Because in the new call + * resolver we will get a [CallResolutionResult], which contains all candidate and not just the + * matching ones. + * + * This function should solely be used in [tryMethodInference]. + */ +private fun methodExists( + type: ObjectType, + name: String, +): Boolean { + var types = type.ancestors.map { it.type } + var methods = types.map { it.recordDeclaration }.flatMap { it.methods } + return methods.any { it.name.localName == name } } diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/GraphExamples.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/GraphExamples.kt index 4ccb5af4f0..74b1cbf3c6 100644 --- a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/GraphExamples.kt +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/GraphExamples.kt @@ -339,6 +339,106 @@ class GraphExamples { } } + fun getInferenceBinaryOperatorReturnType( + config: TranslationConfiguration = + TranslationConfiguration.builder() + .defaultPasses() + .registerLanguage(StructTestLanguage(".")) + .inferenceConfiguration( + InferenceConfiguration.builder() + .inferRecords(true) + .inferReturnTypes(true) + .build() + ) + .build() + ) = + testFrontend(config).build { + translationResult { + translationUnit("test.python") { + function("foo", t("int")) { + body { + declare { variable("a") } + declare { variable("b") } + ref("a") assign { call("bar") + literal(2, t("int")) } + ref("b") assign { literal(2L, t("long")) + call("baz") } + } + } + } + } + } + + fun getInferenceTupleReturnType( + config: TranslationConfiguration = + TranslationConfiguration.builder() + .defaultPasses() + .registerLanguage(StructTestLanguage(".")) + .inferenceConfiguration( + InferenceConfiguration.builder() + .inferRecords(true) + .inferReturnTypes(true) + .build() + ) + .build() + ) = + testFrontend(config).build { + translationResult { + translationUnit("test.python") { + function("foo", returnTypes = listOf(t("Foo"), t("Bar"))) { + body { returnStmt { call("bar") } } + } + } + } + } + + fun getInferenceUnaryOperatorReturnType( + config: TranslationConfiguration = + TranslationConfiguration.builder() + .defaultPasses() + .registerLanguage(StructTestLanguage(".")) + .inferenceConfiguration( + InferenceConfiguration.builder() + .inferRecords(true) + .inferReturnTypes(true) + .build() + ) + .build() + ) = + testFrontend(config).build { + translationResult { + translationUnit("Test.java") { + record("Test") { method("foo") { body { returnStmt { -call("bar") } } } } + } + } + } + + fun getInferenceNestedNamespace( + config: TranslationConfiguration = + TranslationConfiguration.builder() + .defaultPasses() + .registerLanguage(StructTestLanguage(".")) + .inferenceConfiguration( + InferenceConfiguration.builder() + .inferRecords(true) + .inferNamespaces(true) + .build() + ) + .build() + ) = + testFrontend(config).build { + translationResult { + translationUnit("Test.java") { + record("Test") { + method("foo") { + body { + declare { variable("node", t("java.lang.String")) } + returnStmt { isImplicit = true } + } + } + } + } + } + } + fun getVariables( config: TranslationConfiguration = TranslationConfiguration.builder() diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/InferenceTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/InferenceTest.kt index 9454a0a1ab..43f867a105 100644 --- a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/InferenceTest.kt +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/InferenceTest.kt @@ -95,4 +95,88 @@ class InferenceTest { assertNotNull(nextField) assertLocalName("T*", nextField.type) } + + @Test + fun testUnaryOperatorReturnType() { + val tu = + GraphExamples.getInferenceUnaryOperatorReturnType() + .components + .firstOrNull() + ?.translationUnits + ?.firstOrNull() + assertNotNull(tu) + with(tu) { + val longType = assertResolvedType("long") + + val bar = tu.functions["bar"] + assertNotNull(bar) + + assertEquals(longType, bar.returnTypes.singleOrNull()) + } + } + + @Test + fun testTupleTypeReturnType() { + val tu = + GraphExamples.getInferenceTupleReturnType() + .components + .firstOrNull() + ?.translationUnits + ?.firstOrNull() + assertNotNull(tu) + with(tu) { + val fooType = assertResolvedType("Foo") + val barType = assertResolvedType("Bar") + + val bar = tu.functions["bar"] + assertNotNull(bar) + + assertEquals(listOf(fooType, barType), bar.returnTypes) + } + } + + @Test + fun testBinaryOperatorReturnType() { + val tu = + GraphExamples.getInferenceBinaryOperatorReturnType() + .components + .firstOrNull() + ?.translationUnits + ?.firstOrNull() + assertNotNull(tu) + with(tu) { + val intType = assertResolvedType("int") + val longType = assertResolvedType("long") + + val bar = tu.functions["bar"] + assertNotNull(bar) + assertEquals(intType, bar.returnTypes.singleOrNull()) + + val baz = tu.functions["baz"] + assertNotNull(baz) + assertEquals(longType, baz.returnTypes.singleOrNull()) + } + } + + @Test + fun testNestedNamespace() { + val result = GraphExamples.getInferenceNestedNamespace() + with(result) { + val java = result.namespaces["java"] + assertNotNull(java) + assertLocalName("java", java) + + val javaLang = result.namespaces["java.lang"] + assertNotNull(javaLang) + assertLocalName("lang", javaLang) + // should exist in the scope of "java" + assertEquals(java, javaLang.scope?.astNode) + + val javaLangString = result.records["java.lang.String"] + assertNotNull(javaLangString) + assertLocalName("String", javaLangString) + // should exist in the scope of "java.lang" + assertEquals(javaLang, javaLangString.scope?.astNode) + } + } } diff --git a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt index 859e58adec..7c94a3431a 100644 --- a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt +++ b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt @@ -50,7 +50,7 @@ open class TestLanguage(final override var namespaceDelimiter: String = "::") : override val builtInTypes: Map = mapOf( - "boolean" to IntegerType("boolean", 1, this, NumericType.Modifier.SIGNED), + "boolean" to BooleanType("boolean", 1, this, NumericType.Modifier.SIGNED), "char" to IntegerType("char", 8, this, NumericType.Modifier.NOT_APPLICABLE), "byte" to IntegerType("byte", 8, this, NumericType.Modifier.SIGNED), "short" to IntegerType("short", 16, this, NumericType.Modifier.SIGNED), diff --git a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt index 8be47cdcd3..92987f60f1 100644 --- a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt +++ b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt @@ -319,6 +319,7 @@ fun assertLiteralValue(expected: T, expr: Expression?, message: Strin } fun ContextProvider.assertResolvedType(fqn: String, generics: List? = null): Type { - var type = ctx?.typeManager?.lookupResolvedType(fqn, generics) + var type = + ctx?.typeManager?.lookupResolvedType(fqn, generics, (this as? LanguageProvider)?.language) return assertNotNull(type) } diff --git a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CLanguage.kt b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CLanguage.kt index ad24b11905..4fb4823edd 100644 --- a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CLanguage.kt +++ b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CLanguage.kt @@ -41,7 +41,8 @@ open class CLanguage : HasQualifier, HasElaboratedTypeSpecifier, HasShortCircuitOperators, - HasGlobalVariables { + HasGlobalVariables, + HasGlobalFunctions { override val fileExtensions = listOf("c", "h") override val namespaceDelimiter = "::" @Transient override val frontend: KClass = CXXLanguageFrontend::class diff --git a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXInferenceTest.kt b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXInferenceTest.kt index b104e30113..e5e991e2f7 100644 --- a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXInferenceTest.kt +++ b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXInferenceTest.kt @@ -25,18 +25,25 @@ */ package de.fraunhofer.aisec.cpg.frontends.cxx +import de.fraunhofer.aisec.cpg.InferenceConfiguration import de.fraunhofer.aisec.cpg.graph.* +import de.fraunhofer.aisec.cpg.graph.declarations.MethodDeclaration +import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope +import de.fraunhofer.aisec.cpg.graph.types.BooleanType import de.fraunhofer.aisec.cpg.test.* import java.io.File import kotlin.test.Test import kotlin.test.assertContains +import kotlin.test.assertEquals +import kotlin.test.assertIs +import kotlin.test.assertIsNot import kotlin.test.assertNotNull import kotlin.test.assertTrue class CXXInferenceTest { @Test fun testGlobals() { - val file = File("src/test/resources/cxx/inference.cpp") + val file = File("src/test/resources/cxx/inference/inference.cpp") val tu = analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { it.registerLanguage() @@ -53,7 +60,7 @@ class CXXInferenceTest { @Test fun testInferClassInNamespace() { - val file = File("src/test/resources/cxx/inference.cpp") + val file = File("src/test/resources/cxx/inference/inference.cpp") val tu = analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { it.registerLanguage() @@ -71,12 +78,15 @@ class CXXInferenceTest { @Test fun testTrickyInference() { - val file = File("src/test/resources/cxx/tricky_inference.cpp") + val file = File("src/test/resources/cxx/inference/tricky_inference.cpp") val tu = analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { it.registerLanguage() it.loadIncludes(false) it.addIncludesToGraph(false) + it.inferenceConfiguration( + InferenceConfiguration.builder().inferReturnTypes(true).build() + ) } assertNotNull(tu) @@ -88,8 +98,105 @@ class CXXInferenceTest { assertNotNull(json) assertTrue(json.isInferred) + val begin = json.methods["begin"] + assertNotNull(begin) + assertTrue(begin.isInferred) + assertLocalName("iterator*", begin.returnTypes.singleOrNull()) + + val end = json.methods["end"] + assertNotNull(end) + assertTrue(end.isInferred) + assertLocalName("iterator*", end.returnTypes.singleOrNull()) + + val size = json.methods["size"] + assertNotNull(size) + assertTrue(size.isInferred) + assertLocalName("int", size.returnTypes.singleOrNull()) + val iterator = json.records["iterator"] assertNotNull(iterator) assertTrue(iterator.isInferred) + + val next = iterator.methods["next"] + assertNotNull(next) + assertTrue(next.isInferred) + assertLocalName("iterator*", next.returnTypes.singleOrNull()) + + val isValid = iterator.methods["isValid"] + assertNotNull(isValid) + assertTrue(isValid.isInferred) + assertIs(isValid.returnTypes.singleOrNull()) + + val log = tu.functions["log"] + assertNotNull(log) + assertIsNot(log) + assertIs(log.scope) + } + + @Test + fun testSuperClass() { + val file = File("src/test/resources/cxx/inference/superclass.cpp") + val result = + analyze(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + it.loadIncludes(false) + it.addIncludesToGraph(false) + it.inferenceConfiguration( + InferenceConfiguration.builder().inferReturnTypes(true).build() + ) + } + assertNotNull(result) + + val a = result.records["A"] + assertNotNull(a) + assertTrue(a.isInferred) + + val n = result.namespaces["N"] + assertNotNull(n) + assertTrue(n.isInferred) + + val b = n.records["N::B"] + assertNotNull(b) + assertTrue(b.isInferred) + + val m = result.namespaces["M"] + assertNotNull(m) + assertTrue(m.isInferred) + + val c = m.namespaces["M::C"] + assertNotNull(c) + assertTrue(c.isInferred) + + val d = c.records["M::C::D"] + assertNotNull(d) + assertTrue(d.isInferred) + + val e = result.records["E"] + assertNotNull(e) + assertTrue(e.isInferred) + } + + @Test + fun testConstruct() { + val file = File("src/test/resources/cxx/inference/construct.cpp") + val result = + analyze(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + it.loadIncludes(false) + it.addIncludesToGraph(false) + it.inferenceConfiguration( + InferenceConfiguration.builder().inferReturnTypes(true).build() + ) + } + assertNotNull(result) + with(result) { + val pairType = assertResolvedType("Pair") + assertNotNull(pairType) + + val pair = result.functions["Pair"] + assertNotNull(pair) + assertTrue(pair.isInferred) + assertEquals(pairType, pair.returnTypes.singleOrNull()) + } } } diff --git a/cpg-language-cxx/src/test/resources/cxx/inference/construct.cpp b/cpg-language-cxx/src/test/resources/cxx/inference/construct.cpp new file mode 100644 index 0000000000..e525ac18f8 --- /dev/null +++ b/cpg-language-cxx/src/test/resources/cxx/inference/construct.cpp @@ -0,0 +1,7 @@ +// The headers are just there to make it compile with clang, but we will not parse headers. +// You can use `clang++ -std=c++20 tricky_inference.cpp` to check, if it will compile. +#include "construct.h" + +Pair doPair() { + return Pair(1, 0); +} \ No newline at end of file diff --git a/cpg-language-cxx/src/test/resources/cxx/inference/construct.h b/cpg-language-cxx/src/test/resources/cxx/inference/construct.h new file mode 100644 index 0000000000..ffe7855bce --- /dev/null +++ b/cpg-language-cxx/src/test/resources/cxx/inference/construct.h @@ -0,0 +1,4 @@ +class Pair { +public: + Pair(int a, int b); +}; \ No newline at end of file diff --git a/cpg-language-cxx/src/test/resources/cxx/inference.cpp b/cpg-language-cxx/src/test/resources/cxx/inference/inference.cpp similarity index 100% rename from cpg-language-cxx/src/test/resources/cxx/inference.cpp rename to cpg-language-cxx/src/test/resources/cxx/inference/inference.cpp diff --git a/cpg-language-cxx/src/test/resources/cxx/inference.h b/cpg-language-cxx/src/test/resources/cxx/inference/inference.h similarity index 100% rename from cpg-language-cxx/src/test/resources/cxx/inference.h rename to cpg-language-cxx/src/test/resources/cxx/inference/inference.h diff --git a/cpg-language-cxx/src/test/resources/cxx/inference/superclass.cpp b/cpg-language-cxx/src/test/resources/cxx/inference/superclass.cpp new file mode 100644 index 0000000000..69fe3f9075 --- /dev/null +++ b/cpg-language-cxx/src/test/resources/cxx/inference/superclass.cpp @@ -0,0 +1,19 @@ +// The headers are just there to make it compile with clang, but we will not parse headers. +// You can use `clang++ -std=c++20 tricky_inference.cpp` to check, if it will compile. +#include "superclass.h" + +class F : A { + +}; + +class G : N::B { + +}; + +namespace O { + class H : E { + class I : M::C::D { + }; + }; + +} diff --git a/cpg-language-cxx/src/test/resources/cxx/inference/superclass.h b/cpg-language-cxx/src/test/resources/cxx/inference/superclass.h new file mode 100644 index 0000000000..69d41c5910 --- /dev/null +++ b/cpg-language-cxx/src/test/resources/cxx/inference/superclass.h @@ -0,0 +1,13 @@ +class A {}; + +namespace N { + class B {}; +}; + +namespace M { + namespace C { + class D {}; + } +} + +class E {}; \ No newline at end of file diff --git a/cpg-language-cxx/src/test/resources/cxx/tricky_inference.cpp b/cpg-language-cxx/src/test/resources/cxx/inference/tricky_inference.cpp similarity index 81% rename from cpg-language-cxx/src/test/resources/cxx/tricky_inference.cpp rename to cpg-language-cxx/src/test/resources/cxx/inference/tricky_inference.cpp index 6031949d9d..045ea13179 100644 --- a/cpg-language-cxx/src/test/resources/cxx/tricky_inference.cpp +++ b/cpg-language-cxx/src/test/resources/cxx/inference/tricky_inference.cpp @@ -16,6 +16,8 @@ using json = some::json; class wrapper { public: json* get() { + log("get"); + int i(j.size()); return &j; } @@ -24,14 +26,19 @@ class wrapper { }; // For some more complexity, let's refer to a sub-class of it -void iterator(json::iterator& it) { - if (!it.hasNext()) { - return; +void loop(json* j) { + log("loop"); + + for(json::iterator* it = j->begin(); it != j->end(); it = it->next()) { + if(!it->isValid()) { + // do something + } } } // And lastly, finally call a method on it, so we can know it's // a class. void* get_data(json* j) { + log("get_data"); return j->data; } \ No newline at end of file diff --git a/cpg-language-cxx/src/test/resources/cxx/inference/tricky_inference.h b/cpg-language-cxx/src/test/resources/cxx/inference/tricky_inference.h new file mode 100644 index 0000000000..2885f9373c --- /dev/null +++ b/cpg-language-cxx/src/test/resources/cxx/inference/tricky_inference.h @@ -0,0 +1,31 @@ +namespace some { + class json { +public: + class iterator { +public: + bool isValid() { + return false; + } + + json::iterator* next() { + return nullptr; + } + }; + + int size() { + return 1; + } + + json::iterator* begin() { + return nullptr; + } + + json::iterator* end() { + return nullptr; + } + + void* data; + }; +} + +void log(const char* msg); \ No newline at end of file diff --git a/cpg-language-cxx/src/test/resources/cxx/tricky_inference.h b/cpg-language-cxx/src/test/resources/cxx/tricky_inference.h deleted file mode 100644 index 543a7d1b11..0000000000 --- a/cpg-language-cxx/src/test/resources/cxx/tricky_inference.h +++ /dev/null @@ -1,12 +0,0 @@ -namespace some { - class json { -public: - class iterator { -public: - bool hasNext() { - return false; - } - }; - void* data; - }; -} \ No newline at end of file From 5155fff0e85f33ab5a134f7dd6af381425447b6f Mon Sep 17 00:00:00 2001 From: Maximilian Kaul Date: Fri, 29 Nov 2024 08:15:06 +0100 Subject: [PATCH 07/10] C: add names to gotos and labels (#1871) * fix goto / label missing names * test++ * test++ --- .../cpg/graph/statements/GotoStatement.kt | 11 +++++++- .../cpg/frontends/cxx/StatementHandler.kt | 6 +++- .../frontends/cxx/CXXLanguageFrontendTest.kt | 28 +++++++++++++++++-- cpg-language-cxx/src/test/resources/c/goto.c | 5 ++++ 4 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 cpg-language-cxx/src/test/resources/c/goto.c diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/GotoStatement.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/GotoStatement.kt index 813e59742a..0d0a653146 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/GotoStatement.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/statements/GotoStatement.kt @@ -25,7 +25,8 @@ */ package de.fraunhofer.aisec.cpg.graph.statements -import java.util.Objects +import java.util.* +import org.apache.commons.lang3.builder.ToStringBuilder class GotoStatement : Statement() { var labelName: String = "" @@ -42,4 +43,12 @@ class GotoStatement : Statement() { } override fun hashCode() = Objects.hash(super.hashCode(), labelName, targetLabel) + + override fun toString(): String { + return ToStringBuilder(this, TO_STRING_STYLE) + .append("labelName", labelName) + .append("targetName", targetLabel) + .append("location", location) + .toString() + } } diff --git a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/StatementHandler.kt b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/StatementHandler.kt index 2dfc0f60b9..3c507437de 100644 --- a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/StatementHandler.kt +++ b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/StatementHandler.kt @@ -160,6 +160,7 @@ class StatementHandler(lang: CXXLanguageFrontend) : val statement = newLabelStatement(rawNode = ctx) statement.subStatement = handle(ctx.nestedStatement) statement.label = ctx.name.toString() + statement.name = newName(name = ctx.name.toString()) return statement } @@ -167,12 +168,15 @@ class StatementHandler(lang: CXXLanguageFrontend) : val statement = newGotoStatement(rawNode = ctx) val assigneeTargetLabel = BiConsumer { _: Any, to: Node -> statement.targetLabel = to as LabelStatement + to.label?.let { + statement.labelName = it + statement.name = newName(it) + } } val b: IBinding? try { b = ctx.name.resolveBinding() if (b is ILabel) { - b.labelStatement // If the bound AST node is/or was transformed into a CPG node the cpg node is bound // to the CPG goto statement frontend.registerObjectListener(b.labelStatement, assigneeTargetLabel) diff --git a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt index 3e1b661e95..4e6243fd57 100644 --- a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt +++ b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXLanguageFrontendTest.kt @@ -25,8 +25,8 @@ */ package de.fraunhofer.aisec.cpg.frontends.cxx -import de.fraunhofer.aisec.cpg.* import de.fraunhofer.aisec.cpg.InferenceConfiguration.Companion.builder +import de.fraunhofer.aisec.cpg.TranslationConfiguration import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.* import de.fraunhofer.aisec.cpg.graph.statements.* @@ -41,9 +41,8 @@ import de.fraunhofer.aisec.cpg.sarif.Region import de.fraunhofer.aisec.cpg.test.* import java.io.File import java.nio.file.Path -import java.util.* import java.util.function.Consumer -import kotlin.collections.set +import kotlin.Throws import kotlin.test.* internal class CXXLanguageFrontendTest : BaseTest() { @@ -1758,4 +1757,27 @@ internal class CXXLanguageFrontendTest : BaseTest() { assertIs(cast) assertLocalName("mytype", cast.castType) } + + @Test + fun testGoto() { + val file = File("src/test/resources/c/goto.c") + val tu = + analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + } + assertNotNull(tu) + + val labelCName = "LAB_123" + + val goto = tu.allChildren().firstOrNull() + assertIs(goto) + assertEquals(labelCName, goto.labelName) + assertLocalName(labelCName, goto) + + val label = tu.labels[labelCName] + assertIs(label) + assertLocalName(labelCName, label) + + assertEquals(label, goto.targetLabel) + } } diff --git a/cpg-language-cxx/src/test/resources/c/goto.c b/cpg-language-cxx/src/test/resources/c/goto.c new file mode 100644 index 0000000000..61c41e0bfb --- /dev/null +++ b/cpg-language-cxx/src/test/resources/c/goto.c @@ -0,0 +1,5 @@ +void foo() { + goto LAB_123; +LAB_123: + return; +} From 35e17041a87e25e31eeac8726daaee6cc389209b Mon Sep 17 00:00:00 2001 From: Maximilian Kaul Date: Fri, 29 Nov 2024 11:32:34 +0100 Subject: [PATCH 08/10] INI file frontend (#1858) This PR implements support for INI files by providing a new `Language` and `LanguageFrontend`. --- .github/CODEOWNERS | 2 + build.gradle.kts | 6 + ...frontend-dependency-conventions.gradle.kts | 5 + configure_frontends.sh | 2 + cpg-language-ini/build.gradle.kts | 48 ++++++ .../frontend/configfiles/IniFileFrontend.kt | 163 ++++++++++++++++++ .../frontend/configfiles/IniFileLanguage.kt | 50 ++++++ .../cpg/frontend/configfiles/IniFileTest.kt | 92 ++++++++++ .../src/test/resources/config.ini | 10 ++ gradle.properties.example | 1 + gradle/libs.versions.toml | 1 + settings.gradle.kts | 5 + 12 files changed, 385 insertions(+) create mode 100644 cpg-language-ini/build.gradle.kts create mode 100644 cpg-language-ini/src/main/kotlin/de/fraunhofer/aisec/cpg/frontend/configfiles/IniFileFrontend.kt create mode 100644 cpg-language-ini/src/main/kotlin/de/fraunhofer/aisec/cpg/frontend/configfiles/IniFileLanguage.kt create mode 100644 cpg-language-ini/src/test/kotlin/de/fraunhofer/aisec/cpg/frontend/configfiles/IniFileTest.kt create mode 100644 cpg-language-ini/src/test/resources/config.ini diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 59cb7556af..5a84f06b67 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -28,3 +28,5 @@ cpg-neo4j @peckto build.gradle.kts @oxisto .github @oxisto + +cpg-language-ini @maximiliankaul diff --git a/build.gradle.kts b/build.gradle.kts index 368ea647b4..67e352fed8 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -143,3 +143,9 @@ val enableJVMFrontend: Boolean by extra { enableJVMFrontend.toBoolean() } project.logger.lifecycle("JVM frontend is ${if (enableJVMFrontend) "enabled" else "disabled"}") + +val enableINIFrontend: Boolean by extra { + val enableINIFrontend: String? by project + enableINIFrontend.toBoolean() +} +project.logger.lifecycle("INI frontend is ${if (enableINIFrontend) "enabled" else "disabled"}") diff --git a/buildSrc/src/main/kotlin/cpg.frontend-dependency-conventions.gradle.kts b/buildSrc/src/main/kotlin/cpg.frontend-dependency-conventions.gradle.kts index 10fef182f3..f482eecd48 100644 --- a/buildSrc/src/main/kotlin/cpg.frontend-dependency-conventions.gradle.kts +++ b/buildSrc/src/main/kotlin/cpg.frontend-dependency-conventions.gradle.kts @@ -12,6 +12,7 @@ val enableLLVMFrontend: Boolean by rootProject.extra val enableTypeScriptFrontend: Boolean by rootProject.extra val enableRubyFrontend: Boolean by rootProject.extra val enableJVMFrontend: Boolean by rootProject.extra +val enableINIFrontend: Boolean by rootProject.extra dependencies { if (enableJavaFrontend) { @@ -46,4 +47,8 @@ dependencies { api(project(":cpg-language-ruby")) kover(project(":cpg-language-ruby")) } + if (enableINIFrontend) { + api(project(":cpg-language-ini")) + kover(project(":cpg-language-ini")) + } } diff --git a/configure_frontends.sh b/configure_frontends.sh index 49e3233752..3fd8e946a7 100755 --- a/configure_frontends.sh +++ b/configure_frontends.sh @@ -60,3 +60,5 @@ answerRuby=$(ask "Do you want to enable the Ruby frontend? (currently $(getPrope setProperty "enableRubyFrontend" $answerRuby answerJVM=$(ask "Do you want to enable the JVM frontend? (currently $(getProperty "enableJVMFrontend"))") setProperty "enableJVMFrontend" $answerJVM +answerINI=$(ask "Do you want to enable the INI frontend? (currently $(getProperty "enableINIFrontend"))") +setProperty "enableINIFrontend" $answerINI diff --git a/cpg-language-ini/build.gradle.kts b/cpg-language-ini/build.gradle.kts new file mode 100644 index 0000000000..928bb3d0ec --- /dev/null +++ b/cpg-language-ini/build.gradle.kts @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +plugins { + id("cpg.frontend-conventions") +} + +publishing { + publications { + named("cpg-language-ini") { + pom { + artifactId = "cpg-language-ini" + name.set("Code Property Graph - INI Frontend") + description.set("An INI configuration file frontend for the CPG") + } + } + } +} + +dependencies { + // ini4j for parsing ini files + implementation(libs.ini4j) + + // to evaluate some test cases + testImplementation(project(":cpg-analysis")) +} diff --git a/cpg-language-ini/src/main/kotlin/de/fraunhofer/aisec/cpg/frontend/configfiles/IniFileFrontend.kt b/cpg-language-ini/src/main/kotlin/de/fraunhofer/aisec/cpg/frontend/configfiles/IniFileFrontend.kt new file mode 100644 index 0000000000..340f839806 --- /dev/null +++ b/cpg-language-ini/src/main/kotlin/de/fraunhofer/aisec/cpg/frontend/configfiles/IniFileFrontend.kt @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.frontend.configfiles + +import de.fraunhofer.aisec.cpg.TranslationContext +import de.fraunhofer.aisec.cpg.frontends.Language +import de.fraunhofer.aisec.cpg.frontends.LanguageFrontend +import de.fraunhofer.aisec.cpg.frontends.TranslationException +import de.fraunhofer.aisec.cpg.graph.* +import de.fraunhofer.aisec.cpg.graph.declarations.RecordDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration +import de.fraunhofer.aisec.cpg.graph.types.Type +import de.fraunhofer.aisec.cpg.sarif.PhysicalLocation +import de.fraunhofer.aisec.cpg.sarif.Region +import java.io.File +import java.io.FileInputStream +import java.net.URI +import org.ini4j.Ini +import org.ini4j.Profile + +/** + * The INI file frontend. This frontend utilizes the [ini4j library](https://ini4j.sourceforge.net/) + * to parse the config file. The result consists of + * - a [TranslationUnitDeclaration] wrapping the entire result + * - a [de.fraunhofer.aisec.cpg.graph.declarations.NamespaceDeclaration] wrapping the INI file and + * thus preventing collisions with other symbols which might have the same name + * - a [RecordDeclaration] per `Section` (a section refers to a block of INI values marked with a + * line `[SectionName]`) + * - a [de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration] per entry in a section. The + * [de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration.name] matches the `entry`s `name` + * field and the [de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration.initializer] is set + * to a [statements.expressions.Literal] with the corresponding `entry`s `value`. + * + * Note: + * - the "ini4j" library does not provide any super type for all nodes. Thus, the frontend accepts + * `Any` + * - [typeOf] has to be implemented, but as there are no types always returns the builtin `string` + * type + * - [codeOf] has to accept `Any` (because of the limitations stated above) and simply returns + * `.toString()` + * - [locationOf] always returns `null` as the "ini4j" library does not provide any means of getting + * a location given a node + * - [setComment] not implemented as this is not used (no + * [de.fraunhofer.aisec.cpg.frontends.Handler] pattern implemented) + * - Comments in general are not supported. + */ +class IniFileFrontend(language: Language, ctx: TranslationContext) : + LanguageFrontend(language, ctx) { + + private lateinit var uri: URI + private lateinit var region: Region + + override fun parse(file: File): TranslationUnitDeclaration { + uri = file.toURI() + region = Region() + + val ini = Ini() + try { + ini.load(FileInputStream(file)) + } catch (ex: Exception) { + throw TranslationException("Parsing failed with exception: $ex") + } + + /* + * build a namespace name relative to the configured + * [de.fraunhofer.aisec.cpg.TranslationConfiguration.topLevel] using + * [Language.namespaceDelimiter] as a separator + */ + val topLevel = config.topLevel?.let { file.relativeToOrNull(it) } ?: file + val parentDir = topLevel.parent + + val namespace = + if (parentDir != null) { + val pathSegments = parentDir.toString().split(File.separator) + (pathSegments + file.nameWithoutExtension).joinToString(language.namespaceDelimiter) + } else { + file.nameWithoutExtension + } + + val tud = newTranslationUnitDeclaration(name = file.name, rawNode = ini) + scopeManager.resetToGlobal(tud) + val nsd = newNamespaceDeclaration(name = namespace, rawNode = ini) + scopeManager.addDeclaration(nsd) + scopeManager.enterScope(nsd) + + ini.values.forEach { handleSection(it) } + + scopeManager.enterScope(nsd) + return tud + } + + /** + * Translates a `Section` into a [RecordDeclaration] and handles all `entries` using + * [handleEntry]. + */ + private fun handleSection(section: Profile.Section) { + val record = newRecordDeclaration(name = section.name, kind = "section", rawNode = section) + scopeManager.addDeclaration(record) + scopeManager.enterScope(record) + section.entries.forEach { handleEntry(it) } + scopeManager.leaveScope(record) + } + + /** + * Translates an `MutableEntry` to a new + * [de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration] with the + * [de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration.initializer] being set to the + * `entry`s value. + */ + private fun handleEntry(entry: MutableMap.MutableEntry) { + val field = + newFieldDeclaration(name = entry.key, type = primitiveType("string"), rawNode = entry) + .apply { initializer = newLiteral(value = entry.value, rawNode = entry) } + scopeManager.addDeclaration(field) + } + + override fun typeOf(type: Any?): Type { + return primitiveType("string") + } + + override fun codeOf(astNode: Any): String? { + return astNode.toString() + } + + /** + * Return the entire file as the location of any node. The parsing library in use does not + * provide more fine granular access to a node's location. + */ + override fun locationOf(astNode: Any): PhysicalLocation? { + return PhysicalLocation( + uri, + region + ) // currently, the line number / column cannot be accessed given an Ini object -> we only + // provide a precise uri + } + + override fun setComment(node: Node, astNode: Any) { + return // not used as this function does not implement [Handler] + } +} diff --git a/cpg-language-ini/src/main/kotlin/de/fraunhofer/aisec/cpg/frontend/configfiles/IniFileLanguage.kt b/cpg-language-ini/src/main/kotlin/de/fraunhofer/aisec/cpg/frontend/configfiles/IniFileLanguage.kt new file mode 100644 index 0000000000..3d55cb93df --- /dev/null +++ b/cpg-language-ini/src/main/kotlin/de/fraunhofer/aisec/cpg/frontend/configfiles/IniFileLanguage.kt @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.frontend.configfiles + +import de.fraunhofer.aisec.cpg.frontends.Language +import de.fraunhofer.aisec.cpg.graph.types.StringType +import de.fraunhofer.aisec.cpg.graph.types.Type +import kotlin.reflect.KClass + +/** + * A simple language representing classical [INI files](https://en.wikipedia.org/wiki/INI_file). As + * there are conflicting definitions of an INI file, we go with: + * - the file extension is `.ini` or `.conf` + * - all entries live in a unique `section` + * - all `key`s are unique per section + * - the file is accepted by the [ini4j library](https://ini4j.sourceforge.net/) + */ +class IniFileLanguage : Language() { + override val fileExtensions = listOf("ini", "conf") + override val namespaceDelimiter: String = "." // no such thing + + @Transient override val frontend: KClass = IniFileFrontend::class + override val builtInTypes: Map = + mapOf("string" to StringType("string", language = this)) // everything is a string + + override val compoundAssignmentOperators: Set = emptySet() // no such thing +} diff --git a/cpg-language-ini/src/test/kotlin/de/fraunhofer/aisec/cpg/frontend/configfiles/IniFileTest.kt b/cpg-language-ini/src/test/kotlin/de/fraunhofer/aisec/cpg/frontend/configfiles/IniFileTest.kt new file mode 100644 index 0000000000..b0dee09da7 --- /dev/null +++ b/cpg-language-ini/src/test/kotlin/de/fraunhofer/aisec/cpg/frontend/configfiles/IniFileTest.kt @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.frontend.configfiles + +import de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.RecordDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration +import de.fraunhofer.aisec.cpg.graph.get +import de.fraunhofer.aisec.cpg.graph.records +import de.fraunhofer.aisec.cpg.test.BaseTest +import de.fraunhofer.aisec.cpg.test.analyzeAndGetFirstTU +import de.fraunhofer.aisec.cpg.test.assertFullName +import de.fraunhofer.aisec.cpg.test.assertLiteralValue +import java.nio.file.Path +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertIs +import kotlin.test.assertNotNull + +class IniFileTest : BaseTest() { + + @Test + fun testSimpleINIFile() { + val topLevel = Path.of("src", "test", "resources") + val tu = + analyzeAndGetFirstTU(listOf(topLevel.resolve("config.ini").toFile()), topLevel, true) { + it.registerLanguage() + } + assertIs(tu) + + val namespace = tu.namespaces.firstOrNull() + assertNotNull(namespace) + assertFullName( + "config", + namespace, + "Namespace name mismatch." + ) // analyzeAndGetFirstTU does not provide the full path + + assertEquals(2, tu.records.size, "Expected two records") + + val sectionA = tu.records["SectionA"] + assertIs(sectionA) + assertEquals(2, sectionA.fields.size, "Expected two fields") + + val sectionAEntry1 = sectionA.fields["key1"] + assertIs(sectionAEntry1) + assertLiteralValue("value1", sectionAEntry1.initializer) + + val sectionAEntry2 = sectionA.fields["key2"] + assertIs(sectionAEntry2) + assertLiteralValue("value2", sectionAEntry2.initializer) + + val sectionB = tu.records["SectionB"] + assertIs(sectionB) + assertEquals(3, sectionB.fields.size, "Expected three fields") + + val sectionBEntry1 = sectionB.fields["key1"] + assertIs(sectionBEntry1) + assertLiteralValue("123", sectionBEntry1.initializer) + + val sectionBEntry2 = sectionB.fields["key2"] + assertIs(sectionBEntry2) + assertLiteralValue("1.2.3.4", sectionBEntry2.initializer) + + val sectionBEntry3 = sectionB.fields["key3"] + assertIs(sectionBEntry3) + assertLiteralValue("\"abc\"", sectionBEntry3.initializer) + } +} diff --git a/cpg-language-ini/src/test/resources/config.ini b/cpg-language-ini/src/test/resources/config.ini new file mode 100644 index 0000000000..4e2a323b5b --- /dev/null +++ b/cpg-language-ini/src/test/resources/config.ini @@ -0,0 +1,10 @@ +; An example INI file + +[SectionA] +key1 = value1 +key2 = value2 + +[SectionB] +key1 = 123 +key2 = 1.2.3.4 +key3 = "abc" \ No newline at end of file diff --git a/gradle.properties.example b/gradle.properties.example index f956f34730..34749a17ce 100644 --- a/gradle.properties.example +++ b/gradle.properties.example @@ -9,3 +9,4 @@ enableLLVMFrontend=true enableTypeScriptFrontend=true enableRubyFrontend=true enableJVMFrontend=true +enableINIFrontend=true diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 4a8a2e5a44..bb1dda77cc 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -42,6 +42,7 @@ llvm = { module = "org.bytedeco:llvm-platform", version = "16.0.4-1.5.9"} jruby = { module = "org.jruby:jruby-core", version = "9.4.3.0" } jline = { module = "org.jline:jline", version = "3.27.0" } antlr-runtime = { module = "org.antlr:antlr4-runtime", version = "4.8-1" } # we cannot upgrade until ki-shell upgrades this! +ini4j = { module = "org.ini4j:ini4j", version = "0.5.4" } # test junit-params = { module = "org.junit.jupiter:junit-jupiter-params", version = "5.11.0"} diff --git a/settings.gradle.kts b/settings.gradle.kts index afe927cdee..78d929ce31 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -41,6 +41,10 @@ val enableJVMFrontend: Boolean by extra { val enableJVMFrontend: String? by settings enableJVMFrontend.toBoolean() } +val enableINIFrontend: Boolean by extra { + val enableINIFrontend: String? by settings + enableINIFrontend.toBoolean() +} if (enableJavaFrontend) include(":cpg-language-java") if (enableCXXFrontend) include(":cpg-language-cxx") @@ -50,3 +54,4 @@ if (enablePythonFrontend) include(":cpg-language-python") if (enableTypeScriptFrontend) include(":cpg-language-typescript") if (enableRubyFrontend) include(":cpg-language-ruby") if (enableJVMFrontend) include(":cpg-language-jvm") +if (enableINIFrontend) include(":cpg-language-ini") From 71fc658070d743156029ef49b2da3419bb9ca31b Mon Sep 17 00:00:00 2001 From: Maximilian Kaul Date: Fri, 29 Nov 2024 13:37:49 +0100 Subject: [PATCH 09/10] add the INI frontend to README.md (#1873) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 126102c450..3a9ef5911a 100644 --- a/README.md +++ b/README.md @@ -134,6 +134,7 @@ The current state of languages is: | C++ | cpg-language-cxx | [main](https://github.com/Fraunhofer-AISEC/cpg) | `maintained` | | Python | cpg-language-python | [main](https://github.com/Fraunhofer-AISEC/cpg) | `maintained` | | Go | cpg-language-go | [main](https://github.com/Fraunhofer-AISEC/cpg) | `maintained` | +| INI | cpg-language-ini | [main](https://github.com/Fraunhofer-AISEC/cpg) | `maintained` | | JVM (Bytecode) | cpg-language-jvm | [main](https://github.com/Fraunhofer-AISEC/cpg) | `incubating` | | LLVM | cpg-language-llvm | [main](https://github.com/Fraunhofer-AISEC/cpg) | `incubating` | | TypeScript/JavaScript | cpg-language-typescript | [main](https://github.com/Fraunhofer-AISEC/cpg) | `experimental` | From 897955d3062a70de3d121a456782df66271b1f19 Mon Sep 17 00:00:00 2001 From: Leutrim Shala <83644358+lshala@users.noreply.github.com> Date: Fri, 29 Nov 2024 21:16:28 +0100 Subject: [PATCH 10/10] Add exclusion filter for directories (#1828) * Add exclusion filter for directories * Revert changes from other branch * Refactor filter * Add separate props for exclusion by regex and strings * Overload functions * Changed method parameter types to vararg * Added tests --------- Co-authored-by: Christian Banse Co-authored-by: Christian Banse --- .../aisec/cpg/TranslationConfiguration.kt | 36 ++++- .../aisec/cpg/TranslationManager.kt | 10 ++ .../de/fraunhofer/aisec/cpg/ExclusionTest.kt | 149 ++++++++++++++++++ cpg-core/src/test/resources/exclusion/a.file | 0 .../test/resources/exclusion/tests/test.file | 0 .../aisec/cpg/frontends/TestLanguage.kt | 4 - .../de/fraunhofer/aisec/cpg/test/TestUtils.kt | 5 +- 7 files changed, 196 insertions(+), 8 deletions(-) create mode 100644 cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/ExclusionTest.kt create mode 100644 cpg-core/src/test/resources/exclusion/a.file create mode 100644 cpg-core/src/test/resources/exclusion/tests/test.file diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationConfiguration.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationConfiguration.kt index 73d7a20d22..3c4b1906f3 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationConfiguration.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationConfiguration.kt @@ -117,6 +117,10 @@ private constructor( matchCommentsToNodes: Boolean, addIncludesToGraph: Boolean, passConfigurations: Map>, PassConfiguration>, + /** A list of exclusion patterns used to filter files and directories. */ + val exclusionPatternsByString: List, + /** A list of exclusion patterns using regular expressions to filter files and directories. */ + val exclusionPatternsByRegex: List ) { /** This list contains all languages which we want to translate. */ val languages: List> @@ -257,6 +261,8 @@ private constructor( private var useDefaultPasses = false private var passConfigurations: MutableMap>, PassConfiguration> = mutableMapOf() + private val exclusionPatternsByRegex = mutableListOf() + private val exclusionPatternsByString = mutableListOf() fun symbols(symbols: Map): Builder { this.symbols = symbols @@ -453,6 +459,32 @@ private constructor( return this.configurePass(T::class, config) } + /** + * Adds exclusion patterns using regular expressions for filtering files and directories. + * + * @param patterns Exclusion patterns. Example: + * ``` + * exclusionPatterns(Regex(".*test(s)?")) + * ``` + */ + fun exclusionPatterns(vararg patterns: Regex): Builder { + exclusionPatternsByRegex.addAll(patterns) + return this + } + + /** + * Adds exclusion patterns for filtering files and directories. + * + * @param patterns Exclusion patterns. Example: + * ``` + * exclusionPatterns("tests") + * ``` + */ + fun exclusionPatterns(vararg patterns: String): Builder { + exclusionPatternsByString.addAll(patterns) + return this + } + /** * Loads and registers an additional [Language] based on a fully qualified class name (FQN). */ @@ -647,7 +679,9 @@ private constructor( compilationDatabase, matchCommentsToNodes, addIncludesToGraph, - passConfigurations + passConfigurations, + exclusionPatternsByString, + exclusionPatternsByRegex ) } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt index 199f03fe91..ef1e37b3f5 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TranslationManager.kt @@ -156,6 +156,16 @@ private constructor( .walkTopDown() .onEnter { !it.name.startsWith(".") } .filter { it.isFile && !it.name.startsWith(".") } + .filter { + ctx.config.exclusionPatternsByString.none { pattern -> + it.absolutePath.contains(pattern) + } + } + .filter { + ctx.config.exclusionPatternsByRegex.none { pattern -> + pattern.containsMatchIn(it.absolutePath) + } + } .toList() files } else { diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/ExclusionTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/ExclusionTest.kt new file mode 100644 index 0000000000..c94d0f9e40 --- /dev/null +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/ExclusionTest.kt @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg + +import de.fraunhofer.aisec.cpg.frontends.Language +import de.fraunhofer.aisec.cpg.frontends.TestLanguage +import de.fraunhofer.aisec.cpg.frontends.TestLanguageFrontend +import de.fraunhofer.aisec.cpg.graph.Node +import de.fraunhofer.aisec.cpg.graph.declarations.TranslationUnitDeclaration +import de.fraunhofer.aisec.cpg.graph.newTranslationUnitDeclaration +import de.fraunhofer.aisec.cpg.graph.types.Type +import de.fraunhofer.aisec.cpg.graph.unknownType +import de.fraunhofer.aisec.cpg.sarif.PhysicalLocation +import java.io.File +import kotlin.reflect.KClass +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +class TestFileLanguage : TestLanguage() { + override val fileExtensions: List + get() = listOf("file") + + override val frontend: KClass + get() = TestFileLanguageFrontend::class +} + +/** Just a test frontend that "reads" a file and returns an empty [TranslationUnitDeclaration]. */ +class TestFileLanguageFrontend( + language: Language = TestFileLanguage(), + ctx: TranslationContext = + TranslationContext( + TranslationConfiguration.builder().build(), + ScopeManager(), + TypeManager() + ), +) : TestLanguageFrontend("::", language, ctx) { + override fun parse(file: File): TranslationUnitDeclaration { + return newTranslationUnitDeclaration(file.name) + } + + override fun typeOf(type: Any): Type { + return unknownType() + } + + override fun codeOf(astNode: Any): String? { + return null + } + + override fun locationOf(astNode: Any): PhysicalLocation? { + return null + } + + override fun setComment(node: Node, astNode: Any) {} +} + +class ExclusionTest { + @Test + fun testExclusionPatternStringDirectory() { + val topLevel = File("src/test/resources/exclusion") + val result = + TranslationManager.builder() + .config( + TranslationConfiguration.builder() + .topLevel(topLevel) + .sourceLocations(topLevel) + .defaultPasses() + .exclusionPatterns("tests") + .registerLanguage() + .build() + ) + .build() + .analyze() + .get() + + val tus = result.translationUnits + assertNotNull(tus) + assertEquals(1, tus.size) + } + + @Test + fun testExclusionPatternStringFile() { + val topLevel = File("src/test/resources/exclusion") + val result = + TranslationManager.builder() + .config( + TranslationConfiguration.builder() + .topLevel(topLevel) + .sourceLocations(topLevel) + .defaultPasses() + .exclusionPatterns("test.file") + .registerLanguage() + .build() + ) + .build() + .analyze() + .get() + + val tus = result.translationUnits + assertNotNull(tus) + assertEquals(1, tus.size) + } + + @Test + fun testExclusionPatternRegex() { + val topLevel = File("src/test/resources/exclusion") + val result = + TranslationManager.builder() + .config( + TranslationConfiguration.builder() + .topLevel(topLevel) + .sourceLocations(topLevel) + .defaultPasses() + .exclusionPatterns("""(.*)est.file""".toRegex()) + .registerLanguage() + .build() + ) + .build() + .analyze() + .get() + + val tus = result.translationUnits + assertNotNull(tus) + assertEquals(1, tus.size) + } +} diff --git a/cpg-core/src/test/resources/exclusion/a.file b/cpg-core/src/test/resources/exclusion/a.file new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cpg-core/src/test/resources/exclusion/tests/test.file b/cpg-core/src/test/resources/exclusion/tests/test.file new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt index 7c94a3431a..9c0036b1b2 100644 --- a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt +++ b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt @@ -60,10 +60,6 @@ open class TestLanguage(final override var namespaceDelimiter: String = "::") : "double" to FloatingPointType("double", 64, this, NumericType.Modifier.SIGNED), "string" to StringType("string", this), ) - - override fun newFrontend(ctx: TranslationContext): TestLanguageFrontend { - return TestLanguageFrontend(language = this, ctx = ctx) - } } class StructTestLanguage(namespaceDelimiter: String = "::") : diff --git a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt index 92987f60f1..04410153f9 100644 --- a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt +++ b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt @@ -113,9 +113,8 @@ fun analyze( ): TranslationResult { val files = Files.walk(topLevel, Int.MAX_VALUE) - .map(Path::toFile) - .filter { it.isFile } - .filter { it.name.endsWith(fileExtension!!) } + .map { it.toFile() } + .filter { it.isFile && (fileExtension == null || it.name.endsWith(fileExtension)) } .sorted() .collect(Collectors.toList()) return analyze(files, topLevel, usePasses, configModifier)