diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/InferenceConfiguration.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/InferenceConfiguration.kt index 3f005411b0..08c9633064 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/InferenceConfiguration.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/InferenceConfiguration.kt @@ -49,6 +49,12 @@ private constructor( /** Enables the inference of variables, such as global variables. */ val inferVariables: Boolean, + /** + * A very EXPERIMENTAL feature. If this is enabled, we will try to infer return types of + * functions based on the context of the call it originated out of. This is disabled by default. + */ + val inferReturnTypes: Boolean, + /** * Uses heuristics to add DFG edges for call expressions to unresolved functions (i.e., * functions not implemented in the given source code). @@ -61,6 +67,7 @@ private constructor( private var inferRecords: Boolean = true, private var inferFunctions: Boolean = true, private var inferVariables: Boolean = true, + private var inferReturnTypes: Boolean = false, private var inferDfgForUnresolvedCalls: Boolean = true ) { fun enabled(infer: Boolean) = apply { this.enabled = infer } @@ -73,6 +80,8 @@ private constructor( fun inferVariables(infer: Boolean) = apply { this.inferVariables = infer } + fun inferReturnTypes(infer: Boolean) = apply { this.inferReturnTypes = infer } + fun inferDfgForUnresolvedCalls(infer: Boolean) = apply { this.inferDfgForUnresolvedCalls = infer } @@ -84,6 +93,7 @@ private constructor( inferRecords, inferFunctions, inferVariables, + inferReturnTypes, inferDfgForUnresolvedCalls ) } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt index ced0e7f63d..266ac9604f 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt @@ -111,9 +111,6 @@ class ScopeManager : ScopeProvider { val currentRecord: RecordDeclaration? get() = this.firstScopeIsInstanceOrNull()?.astNode as? RecordDeclaration - val currentTypedefs: Collection - get() = this.getCurrentTypedefs(currentScope) - val currentNamespace: Name? get() { val namedScope = this.firstScopeIsInstanceOrNull() @@ -237,7 +234,7 @@ class ScopeManager : ScopeProvider { is Block -> BlockScope(nodeToScope) is WhileStatement, is DoStatement, - is AssertStatement -> LoopScope(nodeToScope as Statement) + is AssertStatement -> LoopScope(nodeToScope) is ForStatement, is ForEachStatement -> LoopScope(nodeToScope as Statement) is SwitchStatement -> SwitchScope(nodeToScope) @@ -508,29 +505,6 @@ class ScopeManager : ScopeProvider { scope?.addTypedef(typedef) } - private fun getCurrentTypedefs(searchScope: Scope?): Collection { - val typedefs = mutableMapOf() - - val path = mutableListOf() - var current = searchScope - - // We need to build a path from the current scope to the top most one - while (current != null) { - if (current is ValueDeclarationScope) { - path += current - } - current = current.parent - } - - // And then follow the path in reverse. This ensures us that a local definition - // overwrites / shadows one that was there on a higher scope. - for (scope in path.reversed()) { - typedefs.putAll(scope.typedefs) - } - - return typedefs.values - } - /** * Resolves only references to Values in the current scope, static references to other visible * records are not resolved over the ScopeManager. diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt index dee23cc697..b701827023 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/LanguageTraits.kt @@ -204,6 +204,14 @@ interface HasAnonymousIdentifier : LanguageTrait { */ interface HasGlobalVariables : LanguageTrait +/** + * A language trait, that specifies that this language has global functions directly in the + * [GlobalScope], i.e., not within a namespace, but directly contained in a + * [TranslationUnitDeclaration]. For example, C++ has global functions, Java and Go do not (as every + * function is either in a class or a namespace). + */ +interface HasGlobalFunctions : LanguageTrait + /** * A common super-class for all language traits that arise because they are an ambiguity of a * function call, e.g., function-style casts. This means that we cannot differentiate between a diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/Inference.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/Inference.kt index ce3e2e3f21..2073b108af 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/Inference.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/Inference.kt @@ -25,7 +25,6 @@ */ package de.fraunhofer.aisec.cpg.passes.inference -import de.fraunhofer.aisec.cpg.InferenceConfiguration import de.fraunhofer.aisec.cpg.ScopeManager import de.fraunhofer.aisec.cpg.TranslationContext import de.fraunhofer.aisec.cpg.TypeManager @@ -34,11 +33,14 @@ import de.fraunhofer.aisec.cpg.frontends.Language import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.* import de.fraunhofer.aisec.cpg.graph.scopes.Scope +import de.fraunhofer.aisec.cpg.graph.statements.ReturnStatement +import de.fraunhofer.aisec.cpg.graph.statements.expressions.BinaryOperator import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.ConstructExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.Expression -import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference import de.fraunhofer.aisec.cpg.graph.statements.expressions.TypeExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.UnaryOperator import de.fraunhofer.aisec.cpg.graph.types.* import de.fraunhofer.aisec.cpg.helpers.Util.debugWithFileLocation import de.fraunhofer.aisec.cpg.helpers.Util.errorWithFileLocation @@ -80,7 +82,7 @@ class Inference internal constructor(val start: Node, override val ctx: Translat code: String?, isStatic: Boolean, signature: List, - returnType: Type?, + incomingReturnType: Type?, hint: CallExpression? = null ): FunctionDeclaration? { if (!ctx.config.inferenceConfiguration.inferFunctions) { @@ -108,15 +110,6 @@ class Inference internal constructor(val start: Node, override val ctx: Translat } inferred.code = code - debugWithFileLocation( - hint, - log, - "Inferred a new {} declaration {} with parameter types {} in $it", - if (inferred is MethodDeclaration) "method" else "function", - inferred.name, - signature.map { it?.name } - ) - // Create parameter declarations and receiver (only for methods). if (inferred is MethodDeclaration) { createInferredReceiver(inferred, record) @@ -124,9 +117,36 @@ class Inference internal constructor(val start: Node, override val ctx: Translat createInferredParameters(inferred, signature) // Set the type and return type(s) - returnType?.let { inferred.returnTypes = listOf(it) } + var returnType = + if ( + ctx.config.inferenceConfiguration.inferReturnTypes && + incomingReturnType is UnknownType && + hint != null + ) { + inferReturnType(hint) ?: unknownType() + } else { + incomingReturnType + } + + if (returnType is TupleType) { + inferred.returnTypes = returnType.types + } else if (returnType != null) { + inferred.returnTypes = listOf(returnType) + } + inferred.type = FunctionType.computeType(inferred) + debugWithFileLocation( + hint, + log, + "Inferred a new {} declaration {} with parameter types {} and return types {} in {}", + if (inferred is MethodDeclaration) "method" else "function", + inferred.name, + signature.map { it?.name }, + inferred.returnTypes.map { it.name }, + it + ) + // Add it to the scope scopeManager.addDeclaration(inferred) @@ -528,6 +548,73 @@ class Inference internal constructor(val start: Node, override val ctx: Translat this.scopeManager = ctx.scopeManager this.typeManager = ctx.typeManager } + + /** + * This function tries to infer a return type for an inferred [FunctionDeclaration] based the + * original [CallExpression] (as the [hint]) parameter that was used to infer the function. + */ + fun inferReturnType(hint: CallExpression): Type? { + // Try to find out, if the supplied hint is part of an assignment. If yes, we can use their + // type as the return type of the function + var targetType = + ctx.currentComponent.assignments.singleOrNull { it.value == hint }?.target?.type + if (targetType != null && targetType !is UnknownType) { + return targetType + } + + // Look for an "argument holder". These can be different kind of nodes + val holder = + ctx.currentComponent.allChildren { it.hasArgument(hint) }.singleOrNull() + when (holder) { + is UnaryOperator -> { + // If it's a boolean operator, the return type is probably a boolean + if (holder.operatorCode == "!") { + return hint.language?.builtInTypes?.values?.firstOrNull { it is BooleanType } + } + // If it's a numeric operator, return the largest numeric type that we have; we + // prefer integers to floats + if (holder.operatorCode in listOf("+", "-", "++", "--")) { + val numericTypes = + hint.language + ?.builtInTypes + ?.values + ?.filterIsInstance() + ?.sortedWith( + compareBy { it.bitWidth } + .then { a, b -> preferIntegerType(a, b) } + ) + + return numericTypes?.lastOrNull() + } + } + is ConstructExpression -> { + return holder.type + } + is BinaryOperator -> { + // If it is on the right side, it's probably the same as on the left-side (and + // vice versa) + if (hint == holder.rhs) { + return holder.lhs.type + } else if (hint == holder.lhs) { + return holder.rhs.type + } + } + is ReturnStatement -> { + // If this is part of a return statement, we can take the return type + val func = + hint.firstParentOrNull { it is FunctionDeclaration } as? FunctionDeclaration + val returnTypes = func?.returnTypes + + return if (returnTypes != null && returnTypes.size > 1) { + TupleType(returnTypes) + } else { + returnTypes?.singleOrNull() + } + } + } + + return null + } } /** Provides information about the inference status of a node. */ @@ -605,3 +692,12 @@ fun RecordDeclaration.inferMethod( call ) as? MethodDeclaration } + +/** A small helper function that prefers [IntegerType] when comparing two [NumericType] types. */ +fun preferIntegerType(a: NumericType, b: NumericType): Int { + return when { + a is IntegerType && b is IntegerType -> 0 + a is IntegerType && b !is IntegerType -> 1 + else -> -1 + } +} diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt index da7184e8f5..f88a8fc3dc 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt @@ -27,18 +27,24 @@ package de.fraunhofer.aisec.cpg.passes.inference import de.fraunhofer.aisec.cpg.CallResolutionResult import de.fraunhofer.aisec.cpg.InferenceConfiguration +import de.fraunhofer.aisec.cpg.ancestors +import de.fraunhofer.aisec.cpg.frontends.HasGlobalFunctions import de.fraunhofer.aisec.cpg.frontends.HasGlobalVariables import de.fraunhofer.aisec.cpg.frontends.HasImplicitReceiver import de.fraunhofer.aisec.cpg.frontends.HasStructs import de.fraunhofer.aisec.cpg.frontends.Language import de.fraunhofer.aisec.cpg.graph.Name import de.fraunhofer.aisec.cpg.graph.Node +import de.fraunhofer.aisec.cpg.graph.calls import de.fraunhofer.aisec.cpg.graph.declarations.* +import de.fraunhofer.aisec.cpg.graph.invoke +import de.fraunhofer.aisec.cpg.graph.methods import de.fraunhofer.aisec.cpg.graph.newFieldDeclaration import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope import de.fraunhofer.aisec.cpg.graph.scopes.NameScope import de.fraunhofer.aisec.cpg.graph.scopes.RecordScope import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberCallExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference import de.fraunhofer.aisec.cpg.graph.types.ObjectType @@ -56,10 +62,25 @@ import kotlin.collections.forEach * Tries to infer a [NamespaceDeclaration] from a [Name]. This will return `null`, if inference was * not possible, or if it was turned off in the [InferenceConfiguration]. */ -internal fun Pass<*>.tryNamespaceInference(name: Name, locationHint: Node?): NamespaceDeclaration? { - return scopeManager.globalScope - ?.astNode - ?.startInference(this.ctx) +fun Pass<*>.tryNamespaceInference(name: Name, locationHint: Node?): NamespaceDeclaration? { + // Determine the scope where we want to start our inference + var (scope, _) = scopeManager.extractScope(name, location = locationHint?.location) + + if (scope !is NameScope) { + scope = null + } + + var holder = scope?.astNode + + // If we could not find a scope, but we have an FQN, we can try to infer a namespace (or a + // parent record) + var parentName = name.parent + if (scope == null && parentName != null) { + holder = tryScopeInference(parentName, locationHint) + } + + return (holder ?: scopeManager.globalScope?.astNode) + ?.startInference(ctx) ?.inferNamespaceDeclaration(name, null, locationHint) } @@ -69,7 +90,7 @@ internal fun Pass<*>.tryNamespaceInference(name: Name, locationHint: Node?): Nam */ internal fun Pass<*>.tryRecordInference( type: Type, - locationHint: Node? = null, + locationHint: Node? = null ): RecordDeclaration? { val kind = if (type.language is HasStructs) { @@ -78,10 +99,20 @@ internal fun Pass<*>.tryRecordInference( "class" } // Determine the scope where we want to start our inference - var (scope, _) = scopeManager.extractScope(type) + var (scope, _) = scopeManager.extractScope(type, scope = type.scope) if (scope !is NameScope) { scope = null + } else if (scope is RecordScope) { + // We are asked to infer a record inside another record. While this is not unusual + // per-se, it is far more likely that the "correct" way to place our record is in a + // parent namespace or even the global scope. This is especially true if we did NOT + // infer the parent record, because in this case we can somewhat assume that the + // parent's records declaration (e.g. in a C++ header file) is somewhat complete. + if (scope.astNode?.isInferred == false) { + // It is therefore a better choice to infer it in the parent namespace instead + scope = scopeManager.firstScopeOrNull(scope) { it is NameScope && it !is RecordScope } + } } var holder = scope?.astNode @@ -90,21 +121,12 @@ internal fun Pass<*>.tryRecordInference( // parent record) var parentName = type.name.parent if (scope == null && parentName != null) { - // At this point, we need to check whether we have any type reference to our parent - // name. If we have (e.g. it is used in a function parameter, variable, etc.), then we - // have a high chance that this is actually a parent record and not a namespace - var parentType = typeManager.lookupResolvedType(parentName) - holder = - if (parentType != null) { - tryRecordInference(parentType, locationHint = locationHint) - } else { - tryNamespaceInference(parentName, locationHint = locationHint) - } + holder = tryScopeInference(parentName, locationHint) } val record = - (holder ?: this.scopeManager.globalScope?.astNode) - ?.startInference(this.ctx) + (holder ?: scopeManager.globalScope?.astNode) + ?.startInference(ctx) ?.inferRecordDeclaration(type, kind, locationHint) // Update the type's record. Because types are only unique per scope, we potentially need to @@ -294,23 +316,63 @@ internal fun Pass<*>.tryFunctionInference( } /** - * Tries to infer a [MethodDeclaration] from a [CallExpression]. This will return an empty list, if - * inference was not possible, or if it was turned off in the [InferenceConfiguration]. + * Creates an inferred [FunctionDeclaration] for each suitable [Type] (which points to a + * [RecordDeclaration]). * - * Since potentially multiple suitable bases exist for the inference of methods (specified in - * [possibleContainingTypes]), we infer a method for all of them and return a list. + * There is a big challenge in this inference: We can not be 100 % sure, whether we really need to + * infer a [MethodDeclaration] inside the [RecordDeclaration] or if this is a call to a global + * function (if [call] is a simple [CallExpression] and not a [MemberCallExpression]). The reason + * behind that is that most languages allow to omit `this` when calling methods in the current + * class. So a call to `foo()` inside record `Bar` could either be a call to a global function `foo` + * or a call to `Bar::foo`. * - * Should we encounter that none of our types in [possibleContainingTypes] have a resolved - * declaration, we are inferring one (using [bestGuess]). This should normally not happen as missing - * type declarations are already inferred in the [TypeResolver]. However, there is a special - * corner-case involving types in [Language.builtInTypes] (see [tryFieldInference] for more - * details), + * We need to decide whether we want to infer a global function or not; the heuristic is based on a + * multitude of factors such as: + * - Whether the language even allows for [HasGlobalFunctions]. + * - Whether we have multiple calls to the same function `func()` from multiple locations, everytime + * without an explicit receiver. */ internal fun Pass<*>.tryMethodInference( call: CallExpression, possibleContainingTypes: Set, bestGuess: Type?, ): List { + // We need to decide whether we want to infer a global function or not. We do this with a + // simple heuristic. This will of course not be 100 % error-free, but this is the burden of + // inference. + // 1a) If the language does not even support functions at a global level, it's easy + // 1b) If this is a member call expression, it's also easy + var inferGlobalFunction = + if (call.language !is HasGlobalFunctions || call is MemberCallExpression) { + false + } else if (bestGuess is ObjectType && methodExists(bestGuess, call.name.localName)) { + // 2) We do a quick check, whether we would have a method with our name in the "best + // guess" class. Because if we do, we most likely ended up here because of an + // argument type mismatch. Once we use the new call resolution also for member + // calls, we have this information more easily available + false + } else { + // 3) Lastly, if we are still undecided, we do a quick check on the current + // component, + // if we have multiple calls to the same function from across different locations. + // This is a bit more expensive, so we leave this as a last resort. + // If we encounter "others", there is a high chance this is a global function. Of + // course, we could run into a scenario where we have multiple calls to `init()` in + // several classes and in all occasions the `this` was left out; but this seems + // unlikely + var others = + ctx.currentComponent.calls { + it != call && it.name == call.name && call !is MemberCallExpression + } + others.isNotEmpty() + } + + if (inferGlobalFunction) { + var currentTU = + scopeManager.currentScope?.globalScope?.astNode as? TranslationUnitDeclaration + return listOfNotNull(currentTU?.inferFunction(call, ctx = ctx)) + } + var records = possibleContainingTypes.mapNotNull { val root = it.root as? ObjectType @@ -328,5 +390,44 @@ internal fun Pass<*>.tryMethodInference( } records = records.distinct() - return records.mapNotNull { record -> record.inferMethod(call, ctx = this.ctx) } + return records.mapNotNull { record -> record.inferMethod(call, ctx = ctx) } +} + +/** + * This functions tries to infer a "scope" that should exist under [scopeName], but does not. + * + * A common use-case for this is the creation of nested namespaces, e.g., when inferring classes + * such as `java.lang.System`. At first, we check whether the scope `java` exists, if not, this + * function makes sure that a [NamespaceDeclaration] `java` will be created. Afterward, the same + * check will be repeated for `java.lang`, until we are finally ready to infer the + * [RecordDeclaration] `java.lang.System`. + */ +internal fun Pass<*>.tryScopeInference(scopeName: Name, locationHint: Node?): Declaration? { + // At this point, we need to check whether we have any type reference to our scope + // name. If we have (e.g. it is used in a function parameter, variable, etc.), then we + // have a high chance that this is actually a parent record and not a namespace + var parentType = typeManager.lookupResolvedType(scopeName) + return if (parentType != null) { + tryRecordInference(parentType, locationHint = locationHint) + } else { + tryNamespaceInference(scopeName, locationHint = locationHint) + } +} + +/** + * This function is a necessary evil until we completely switch over member call resolution to the + * new call resolver. We need a way to find out if a method with a given name (independently of + * their arguments) exists in [type] or in one of their [Type.superTypes]. Because in the new call + * resolver we will get a [CallResolutionResult], which contains all candidate and not just the + * matching ones. + * + * This function should solely be used in [tryMethodInference]. + */ +private fun methodExists( + type: ObjectType, + name: String, +): Boolean { + var types = type.ancestors.map { it.type } + var methods = types.map { it.recordDeclaration }.flatMap { it.methods } + return methods.any { it.name.localName == name } } diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/GraphExamples.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/GraphExamples.kt index 4ccb5af4f0..74b1cbf3c6 100644 --- a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/GraphExamples.kt +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/GraphExamples.kt @@ -339,6 +339,106 @@ class GraphExamples { } } + fun getInferenceBinaryOperatorReturnType( + config: TranslationConfiguration = + TranslationConfiguration.builder() + .defaultPasses() + .registerLanguage(StructTestLanguage(".")) + .inferenceConfiguration( + InferenceConfiguration.builder() + .inferRecords(true) + .inferReturnTypes(true) + .build() + ) + .build() + ) = + testFrontend(config).build { + translationResult { + translationUnit("test.python") { + function("foo", t("int")) { + body { + declare { variable("a") } + declare { variable("b") } + ref("a") assign { call("bar") + literal(2, t("int")) } + ref("b") assign { literal(2L, t("long")) + call("baz") } + } + } + } + } + } + + fun getInferenceTupleReturnType( + config: TranslationConfiguration = + TranslationConfiguration.builder() + .defaultPasses() + .registerLanguage(StructTestLanguage(".")) + .inferenceConfiguration( + InferenceConfiguration.builder() + .inferRecords(true) + .inferReturnTypes(true) + .build() + ) + .build() + ) = + testFrontend(config).build { + translationResult { + translationUnit("test.python") { + function("foo", returnTypes = listOf(t("Foo"), t("Bar"))) { + body { returnStmt { call("bar") } } + } + } + } + } + + fun getInferenceUnaryOperatorReturnType( + config: TranslationConfiguration = + TranslationConfiguration.builder() + .defaultPasses() + .registerLanguage(StructTestLanguage(".")) + .inferenceConfiguration( + InferenceConfiguration.builder() + .inferRecords(true) + .inferReturnTypes(true) + .build() + ) + .build() + ) = + testFrontend(config).build { + translationResult { + translationUnit("Test.java") { + record("Test") { method("foo") { body { returnStmt { -call("bar") } } } } + } + } + } + + fun getInferenceNestedNamespace( + config: TranslationConfiguration = + TranslationConfiguration.builder() + .defaultPasses() + .registerLanguage(StructTestLanguage(".")) + .inferenceConfiguration( + InferenceConfiguration.builder() + .inferRecords(true) + .inferNamespaces(true) + .build() + ) + .build() + ) = + testFrontend(config).build { + translationResult { + translationUnit("Test.java") { + record("Test") { + method("foo") { + body { + declare { variable("node", t("java.lang.String")) } + returnStmt { isImplicit = true } + } + } + } + } + } + } + fun getVariables( config: TranslationConfiguration = TranslationConfiguration.builder() diff --git a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/InferenceTest.kt b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/InferenceTest.kt index 9454a0a1ab..43f867a105 100644 --- a/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/InferenceTest.kt +++ b/cpg-core/src/test/kotlin/de/fraunhofer/aisec/cpg/enhancements/InferenceTest.kt @@ -95,4 +95,88 @@ class InferenceTest { assertNotNull(nextField) assertLocalName("T*", nextField.type) } + + @Test + fun testUnaryOperatorReturnType() { + val tu = + GraphExamples.getInferenceUnaryOperatorReturnType() + .components + .firstOrNull() + ?.translationUnits + ?.firstOrNull() + assertNotNull(tu) + with(tu) { + val longType = assertResolvedType("long") + + val bar = tu.functions["bar"] + assertNotNull(bar) + + assertEquals(longType, bar.returnTypes.singleOrNull()) + } + } + + @Test + fun testTupleTypeReturnType() { + val tu = + GraphExamples.getInferenceTupleReturnType() + .components + .firstOrNull() + ?.translationUnits + ?.firstOrNull() + assertNotNull(tu) + with(tu) { + val fooType = assertResolvedType("Foo") + val barType = assertResolvedType("Bar") + + val bar = tu.functions["bar"] + assertNotNull(bar) + + assertEquals(listOf(fooType, barType), bar.returnTypes) + } + } + + @Test + fun testBinaryOperatorReturnType() { + val tu = + GraphExamples.getInferenceBinaryOperatorReturnType() + .components + .firstOrNull() + ?.translationUnits + ?.firstOrNull() + assertNotNull(tu) + with(tu) { + val intType = assertResolvedType("int") + val longType = assertResolvedType("long") + + val bar = tu.functions["bar"] + assertNotNull(bar) + assertEquals(intType, bar.returnTypes.singleOrNull()) + + val baz = tu.functions["baz"] + assertNotNull(baz) + assertEquals(longType, baz.returnTypes.singleOrNull()) + } + } + + @Test + fun testNestedNamespace() { + val result = GraphExamples.getInferenceNestedNamespace() + with(result) { + val java = result.namespaces["java"] + assertNotNull(java) + assertLocalName("java", java) + + val javaLang = result.namespaces["java.lang"] + assertNotNull(javaLang) + assertLocalName("lang", javaLang) + // should exist in the scope of "java" + assertEquals(java, javaLang.scope?.astNode) + + val javaLangString = result.records["java.lang.String"] + assertNotNull(javaLangString) + assertLocalName("String", javaLangString) + // should exist in the scope of "java.lang" + assertEquals(javaLang, javaLangString.scope?.astNode) + } + } } diff --git a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt index 859e58adec..7c94a3431a 100644 --- a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt +++ b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/frontends/TestLanguage.kt @@ -50,7 +50,7 @@ open class TestLanguage(final override var namespaceDelimiter: String = "::") : override val builtInTypes: Map = mapOf( - "boolean" to IntegerType("boolean", 1, this, NumericType.Modifier.SIGNED), + "boolean" to BooleanType("boolean", 1, this, NumericType.Modifier.SIGNED), "char" to IntegerType("char", 8, this, NumericType.Modifier.NOT_APPLICABLE), "byte" to IntegerType("byte", 8, this, NumericType.Modifier.SIGNED), "short" to IntegerType("short", 16, this, NumericType.Modifier.SIGNED), diff --git a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt index 8be47cdcd3..92987f60f1 100644 --- a/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt +++ b/cpg-core/src/testFixtures/kotlin/de/fraunhofer/aisec/cpg/test/TestUtils.kt @@ -319,6 +319,7 @@ fun assertLiteralValue(expected: T, expr: Expression?, message: Strin } fun ContextProvider.assertResolvedType(fqn: String, generics: List? = null): Type { - var type = ctx?.typeManager?.lookupResolvedType(fqn, generics) + var type = + ctx?.typeManager?.lookupResolvedType(fqn, generics, (this as? LanguageProvider)?.language) return assertNotNull(type) } diff --git a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CLanguage.kt b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CLanguage.kt index ad24b11905..4fb4823edd 100644 --- a/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CLanguage.kt +++ b/cpg-language-cxx/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CLanguage.kt @@ -41,7 +41,8 @@ open class CLanguage : HasQualifier, HasElaboratedTypeSpecifier, HasShortCircuitOperators, - HasGlobalVariables { + HasGlobalVariables, + HasGlobalFunctions { override val fileExtensions = listOf("c", "h") override val namespaceDelimiter = "::" @Transient override val frontend: KClass = CXXLanguageFrontend::class diff --git a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXInferenceTest.kt b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXInferenceTest.kt index b104e30113..e5e991e2f7 100644 --- a/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXInferenceTest.kt +++ b/cpg-language-cxx/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/cxx/CXXInferenceTest.kt @@ -25,18 +25,25 @@ */ package de.fraunhofer.aisec.cpg.frontends.cxx +import de.fraunhofer.aisec.cpg.InferenceConfiguration import de.fraunhofer.aisec.cpg.graph.* +import de.fraunhofer.aisec.cpg.graph.declarations.MethodDeclaration +import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope +import de.fraunhofer.aisec.cpg.graph.types.BooleanType import de.fraunhofer.aisec.cpg.test.* import java.io.File import kotlin.test.Test import kotlin.test.assertContains +import kotlin.test.assertEquals +import kotlin.test.assertIs +import kotlin.test.assertIsNot import kotlin.test.assertNotNull import kotlin.test.assertTrue class CXXInferenceTest { @Test fun testGlobals() { - val file = File("src/test/resources/cxx/inference.cpp") + val file = File("src/test/resources/cxx/inference/inference.cpp") val tu = analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { it.registerLanguage() @@ -53,7 +60,7 @@ class CXXInferenceTest { @Test fun testInferClassInNamespace() { - val file = File("src/test/resources/cxx/inference.cpp") + val file = File("src/test/resources/cxx/inference/inference.cpp") val tu = analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { it.registerLanguage() @@ -71,12 +78,15 @@ class CXXInferenceTest { @Test fun testTrickyInference() { - val file = File("src/test/resources/cxx/tricky_inference.cpp") + val file = File("src/test/resources/cxx/inference/tricky_inference.cpp") val tu = analyzeAndGetFirstTU(listOf(file), file.parentFile.toPath(), true) { it.registerLanguage() it.loadIncludes(false) it.addIncludesToGraph(false) + it.inferenceConfiguration( + InferenceConfiguration.builder().inferReturnTypes(true).build() + ) } assertNotNull(tu) @@ -88,8 +98,105 @@ class CXXInferenceTest { assertNotNull(json) assertTrue(json.isInferred) + val begin = json.methods["begin"] + assertNotNull(begin) + assertTrue(begin.isInferred) + assertLocalName("iterator*", begin.returnTypes.singleOrNull()) + + val end = json.methods["end"] + assertNotNull(end) + assertTrue(end.isInferred) + assertLocalName("iterator*", end.returnTypes.singleOrNull()) + + val size = json.methods["size"] + assertNotNull(size) + assertTrue(size.isInferred) + assertLocalName("int", size.returnTypes.singleOrNull()) + val iterator = json.records["iterator"] assertNotNull(iterator) assertTrue(iterator.isInferred) + + val next = iterator.methods["next"] + assertNotNull(next) + assertTrue(next.isInferred) + assertLocalName("iterator*", next.returnTypes.singleOrNull()) + + val isValid = iterator.methods["isValid"] + assertNotNull(isValid) + assertTrue(isValid.isInferred) + assertIs(isValid.returnTypes.singleOrNull()) + + val log = tu.functions["log"] + assertNotNull(log) + assertIsNot(log) + assertIs(log.scope) + } + + @Test + fun testSuperClass() { + val file = File("src/test/resources/cxx/inference/superclass.cpp") + val result = + analyze(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + it.loadIncludes(false) + it.addIncludesToGraph(false) + it.inferenceConfiguration( + InferenceConfiguration.builder().inferReturnTypes(true).build() + ) + } + assertNotNull(result) + + val a = result.records["A"] + assertNotNull(a) + assertTrue(a.isInferred) + + val n = result.namespaces["N"] + assertNotNull(n) + assertTrue(n.isInferred) + + val b = n.records["N::B"] + assertNotNull(b) + assertTrue(b.isInferred) + + val m = result.namespaces["M"] + assertNotNull(m) + assertTrue(m.isInferred) + + val c = m.namespaces["M::C"] + assertNotNull(c) + assertTrue(c.isInferred) + + val d = c.records["M::C::D"] + assertNotNull(d) + assertTrue(d.isInferred) + + val e = result.records["E"] + assertNotNull(e) + assertTrue(e.isInferred) + } + + @Test + fun testConstruct() { + val file = File("src/test/resources/cxx/inference/construct.cpp") + val result = + analyze(listOf(file), file.parentFile.toPath(), true) { + it.registerLanguage() + it.loadIncludes(false) + it.addIncludesToGraph(false) + it.inferenceConfiguration( + InferenceConfiguration.builder().inferReturnTypes(true).build() + ) + } + assertNotNull(result) + with(result) { + val pairType = assertResolvedType("Pair") + assertNotNull(pairType) + + val pair = result.functions["Pair"] + assertNotNull(pair) + assertTrue(pair.isInferred) + assertEquals(pairType, pair.returnTypes.singleOrNull()) + } } } diff --git a/cpg-language-cxx/src/test/resources/cxx/inference/construct.cpp b/cpg-language-cxx/src/test/resources/cxx/inference/construct.cpp new file mode 100644 index 0000000000..e525ac18f8 --- /dev/null +++ b/cpg-language-cxx/src/test/resources/cxx/inference/construct.cpp @@ -0,0 +1,7 @@ +// The headers are just there to make it compile with clang, but we will not parse headers. +// You can use `clang++ -std=c++20 tricky_inference.cpp` to check, if it will compile. +#include "construct.h" + +Pair doPair() { + return Pair(1, 0); +} \ No newline at end of file diff --git a/cpg-language-cxx/src/test/resources/cxx/inference/construct.h b/cpg-language-cxx/src/test/resources/cxx/inference/construct.h new file mode 100644 index 0000000000..ffe7855bce --- /dev/null +++ b/cpg-language-cxx/src/test/resources/cxx/inference/construct.h @@ -0,0 +1,4 @@ +class Pair { +public: + Pair(int a, int b); +}; \ No newline at end of file diff --git a/cpg-language-cxx/src/test/resources/cxx/inference.cpp b/cpg-language-cxx/src/test/resources/cxx/inference/inference.cpp similarity index 100% rename from cpg-language-cxx/src/test/resources/cxx/inference.cpp rename to cpg-language-cxx/src/test/resources/cxx/inference/inference.cpp diff --git a/cpg-language-cxx/src/test/resources/cxx/inference.h b/cpg-language-cxx/src/test/resources/cxx/inference/inference.h similarity index 100% rename from cpg-language-cxx/src/test/resources/cxx/inference.h rename to cpg-language-cxx/src/test/resources/cxx/inference/inference.h diff --git a/cpg-language-cxx/src/test/resources/cxx/inference/superclass.cpp b/cpg-language-cxx/src/test/resources/cxx/inference/superclass.cpp new file mode 100644 index 0000000000..69fe3f9075 --- /dev/null +++ b/cpg-language-cxx/src/test/resources/cxx/inference/superclass.cpp @@ -0,0 +1,19 @@ +// The headers are just there to make it compile with clang, but we will not parse headers. +// You can use `clang++ -std=c++20 tricky_inference.cpp` to check, if it will compile. +#include "superclass.h" + +class F : A { + +}; + +class G : N::B { + +}; + +namespace O { + class H : E { + class I : M::C::D { + }; + }; + +} diff --git a/cpg-language-cxx/src/test/resources/cxx/inference/superclass.h b/cpg-language-cxx/src/test/resources/cxx/inference/superclass.h new file mode 100644 index 0000000000..69d41c5910 --- /dev/null +++ b/cpg-language-cxx/src/test/resources/cxx/inference/superclass.h @@ -0,0 +1,13 @@ +class A {}; + +namespace N { + class B {}; +}; + +namespace M { + namespace C { + class D {}; + } +} + +class E {}; \ No newline at end of file diff --git a/cpg-language-cxx/src/test/resources/cxx/tricky_inference.cpp b/cpg-language-cxx/src/test/resources/cxx/inference/tricky_inference.cpp similarity index 81% rename from cpg-language-cxx/src/test/resources/cxx/tricky_inference.cpp rename to cpg-language-cxx/src/test/resources/cxx/inference/tricky_inference.cpp index 6031949d9d..045ea13179 100644 --- a/cpg-language-cxx/src/test/resources/cxx/tricky_inference.cpp +++ b/cpg-language-cxx/src/test/resources/cxx/inference/tricky_inference.cpp @@ -16,6 +16,8 @@ using json = some::json; class wrapper { public: json* get() { + log("get"); + int i(j.size()); return &j; } @@ -24,14 +26,19 @@ class wrapper { }; // For some more complexity, let's refer to a sub-class of it -void iterator(json::iterator& it) { - if (!it.hasNext()) { - return; +void loop(json* j) { + log("loop"); + + for(json::iterator* it = j->begin(); it != j->end(); it = it->next()) { + if(!it->isValid()) { + // do something + } } } // And lastly, finally call a method on it, so we can know it's // a class. void* get_data(json* j) { + log("get_data"); return j->data; } \ No newline at end of file diff --git a/cpg-language-cxx/src/test/resources/cxx/inference/tricky_inference.h b/cpg-language-cxx/src/test/resources/cxx/inference/tricky_inference.h new file mode 100644 index 0000000000..2885f9373c --- /dev/null +++ b/cpg-language-cxx/src/test/resources/cxx/inference/tricky_inference.h @@ -0,0 +1,31 @@ +namespace some { + class json { +public: + class iterator { +public: + bool isValid() { + return false; + } + + json::iterator* next() { + return nullptr; + } + }; + + int size() { + return 1; + } + + json::iterator* begin() { + return nullptr; + } + + json::iterator* end() { + return nullptr; + } + + void* data; + }; +} + +void log(const char* msg); \ No newline at end of file diff --git a/cpg-language-cxx/src/test/resources/cxx/tricky_inference.h b/cpg-language-cxx/src/test/resources/cxx/tricky_inference.h deleted file mode 100644 index 543a7d1b11..0000000000 --- a/cpg-language-cxx/src/test/resources/cxx/tricky_inference.h +++ /dev/null @@ -1,12 +0,0 @@ -namespace some { - class json { -public: - class iterator { -public: - bool hasNext() { - return false; - } - }; - void* data; - }; -} \ No newline at end of file