Skip to content

Commit

Permalink
[pysrc2cpg] Rework Import Resolution (#3812)
Browse files Browse the repository at this point in the history
* [pysrc2cpg] Rework Import Resolution

The issue with the import handling is that paths that resolve to internal entities, and those to external dependencies underwent the same heuristic-driven path resolution.

The problem here is that internal entities have simple guarantees that allow easier and faster traversals with little to no heuristics required other than handling Python 2/3 differences. The rest can then be bundled accurately as unresolved.

### Main changes
* Moved changes for Python's import resolver `codeRoot` to the base class
* Separated handling of internal entities with external entities:
  - Internal entities that are importable, are given associated Pythonic import paths in the `moduleCache` map.
  - Any import paths that don't get a hit in this map then undergo some heuristic-based path building to make sensible looking types
* Split the `ResolvedImport` classes into `UnresolvedImport` and `ResolvedImport` with `EvaluatedImport` as the high-level trait. This separates imports found and those that have not been, but have undergone some heuristic processing.

### Misc
Renamed frontends' `ImportPass` to be prepended with the language name for easier navigation.

### Follow-up
Python models entities imported as `import x.y` as a field access, `FieldAccess(x).fieldIdentifier(y)`. However, `x` may not have an associated type declaration for the module since it is simply a directory holding various modules. This means that `y` may not always be resolved as it's interpreted as a member instead of a standalone module.

* Removed redundant code

* Escape backslash on windows

* Fixed compilation issue in Ruby

* Ignore function type refs
DavidBakerEffendi authored Nov 11, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 8e1481f commit 951b0f3
Showing 15 changed files with 340 additions and 225 deletions.
Original file line number Diff line number Diff line change
@@ -28,7 +28,7 @@ case class PythonSrcCpgGenerator(config: FrontendConfig, rootPath: Path) extends

override def applyPostProcessingPasses(cpg: Cpg): Cpg = {
new ImportsPass(cpg).createAndApply()
new ImportResolverPass(cpg).createAndApply()
new PythonImportResolverPass(cpg).createAndApply()
new DynamicTypeHintFullNamePass(cpg).createAndApply()
new PythonInheritanceNamePass(cpg).createAndApply()
val typeRecoveryConfig = pyConfig match
Original file line number Diff line number Diff line change
@@ -63,7 +63,7 @@ object JsSrc2Cpg {
List(
new JavaScriptInheritanceNamePass(cpg),
new ConstClosurePass(cpg),
new ImportResolverPass(cpg),
new JavaScriptImportResolverPass(cpg),
new JavaScriptTypeRecoveryPass(cpg, typeRecoveryConfig),
new JavaScriptTypeHintCallLinker(cpg),
new NaiveCallLinker(cpg)
Original file line number Diff line number Diff line change
@@ -11,7 +11,7 @@ import java.io.File as JFile
import java.util.regex.{Matcher, Pattern}
import scala.util.{Failure, Success, Try}

class ImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) {
class JavaScriptImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) {

private val pathPattern = Pattern.compile("[\"']([\\w/.]+)[\"']")

@@ -27,7 +27,7 @@ class ImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) {
val alias = importedAs
val matcher = pathPattern.matcher(rawEntity)
val sep = Matcher.quoteReplacement(JFile.separator)
val root = s"$codeRoot${JFile.separator}"
val root = s"$codeRootDir${JFile.separator}"
val currentFile = s"$root$fileName"
// We want to know if the import is local since if an external name is used to match internal methods we may have
// false paths.
@@ -114,12 +114,12 @@ class ImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) {
// Exported closure with a method ref within the AST of the RHS
y.ast.isMethodRef.map(mRef => ResolvedMethod(mRef.methodFullName, alias, Option("this"))).toSet
case _ =>
Set.empty[ResolvedImport]
Set.empty[EvaluatedImport]
}
}.toSet
} else {
Set(UnknownMethod(entity, alias, Option("this")), UnknownTypeDecl(entity))
}).foreach(x => resolvedImportToTag(x, importCall, diffGraph))
}).foreach(x => evaluatedImportToTag(x, importCall, diffGraph))
}

}
Original file line number Diff line number Diff line change
@@ -57,7 +57,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite {

"resolve correct imports via tag nodes" in {
val List(a: UnknownMethod, b: UnknownTypeDecl, x: UnknownMethod, y: UnknownTypeDecl) =
cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked
cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked
a.fullName shouldBe "slack_sdk:WebClient"
b.fullName shouldBe "slack_sdk:WebClient"
x.fullName shouldBe "sendgrid:SendGridAPIClient"
@@ -141,7 +141,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite {

"resolve correct imports via tag nodes" in {
val List(a: ResolvedMember, b: ResolvedMember, c: ResolvedMember, d: UnknownMethod, e: UnknownTypeDecl) =
cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked
cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked
a.basePath shouldBe "Foo.ts::program"
a.memberName shouldBe "x"
b.basePath shouldBe "Foo.ts::program"
@@ -229,7 +229,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite {
)

"resolve correct imports via tag nodes" in {
val List(x: ResolvedMethod) = cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked
val List(x: ResolvedMethod) = cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked
x.fullName shouldBe "util.js::program:getIncrementalInteger"
}

@@ -258,7 +258,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite {

"resolve correct imports via tag nodes" in {
val List(x: UnknownMethod, y: UnknownTypeDecl) =
cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked
cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked
x.fullName shouldBe "googleapis"
y.fullName shouldBe "googleapis"
}
@@ -280,7 +280,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite {

"resolve correct imports via tag nodes" in {
val List(x: UnknownMethod, y: UnknownTypeDecl, z: UnknownMethod) =
cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked
cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked
x.fullName shouldBe "googleapis"
y.fullName shouldBe "googleapis"
z.fullName shouldBe "googleapis"
@@ -381,7 +381,7 @@ class TypeRecoveryPassTests extends DataFlowCodeToCpgSuite {

"resolve correct imports via tag nodes" in {
val List(a: ResolvedTypeDecl, b: ResolvedMethod, c: ResolvedMethod, d: UnknownMethod, e: UnknownTypeDecl) =
cpg.call.where(_.referencedImports).tag.toResolvedImport.toList: @unchecked
cpg.call.where(_.referencedImports).tag.toEvaluatedImport.toList: @unchecked
a.fullName shouldBe "foo.js::program"
b.fullName shouldBe "foo.js::program:literalFunction"
c.fullName shouldBe "foo.js::program:get"

This file was deleted.

Loading

0 comments on commit 951b0f3

Please sign in to comment.