From 33fc4be738bbcdab7b2dcdb2b567b05d64c890bc Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Thu, 19 Sep 2024 11:37:13 -0400 Subject: [PATCH 1/2] wip: move maven resolver to internal/maven Signed-off-by: Keith Zantow --- internal/task/executor.go | 2 +- syft/pkg/cataloger/java/archive_parser.go | 104 +++++-- .../pkg/cataloger/java/archive_parser_test.go | 21 +- syft/pkg/cataloger/java/config.go | 32 ++- .../maven/config.go} | 40 ++- .../maven/config_test.go} | 4 +- .../java/internal/maven/pom_parser.go | 67 +++++ .../java/internal/maven/pom_parser_test.go | 93 +++++++ .../maven/resolver.go} | 258 +++++++++++------- .../maven/resolver_test.go} | 167 ++++-------- .../declared-iso-8859-encoded-pom.xml.base64 | 0 .../.m2/settings.xml | 0 .../test-fixtures}/local/child-1/pom.xml | 0 .../test-fixtures}/local/child-2/pom.xml | 0 .../test-fixtures}/local/child-3/pom.xml | 0 .../local/contains-child-1/pom.xml | 0 .../test-fixtures}/local/parent-1/pom.xml | 0 .../test-fixtures}/local/parent-2/pom.xml | 0 .../test-fixtures}/local/parent-3/pom.xml | 0 .../org/child-one/1.3.6/child-one-1.3.6.pom | 0 .../org/child-two/2.1.90/child-two-2.1.90.pom | 0 .../parent-one/3.11.0/parent-one-3.11.0.pom | 0 .../parent-two/13.7.8/parent-two-13.7.8.pom | 0 .../parent/7.11.2/parent-7.11.2.pom | 0 .../commons-parent/54/commons-parent-54.pom | 0 .../junit/junit-bom/5.9.0/junit-bom-5.9.0.pom | 0 .../junit/junit-bom/5.9.1/junit-bom-5.9.1.pom | 0 .../3.4.6/opensaml-parent-3.4.6.pom | 0 ...undeclared-iso-8859-encoded-pom.xml.base64 | 0 .../java/internal/maven/test/mock_repo.go | 65 +++++ syft/pkg/cataloger/java/parse_pom_xml.go | 162 +++++------ syft/pkg/cataloger/java/parse_pom_xml_test.go | 133 ++------- .../{local => }/commons-text-1.10.0/pom.xml | 0 .../example-java-app-maven/pom.xml | 0 34 files changed, 693 insertions(+), 455 deletions(-) rename syft/pkg/cataloger/java/{maven_utils.go => internal/maven/config.go} (56%) rename syft/pkg/cataloger/java/{maven_utils_test.go => internal/maven/config_test.go} (96%) create mode 100644 syft/pkg/cataloger/java/internal/maven/pom_parser.go create mode 100644 syft/pkg/cataloger/java/internal/maven/pom_parser_test.go rename syft/pkg/cataloger/java/{maven_resolver.go => internal/maven/resolver.go} (64%) rename syft/pkg/cataloger/java/{maven_resolver_test.go => internal/maven/resolver_test.go} (59%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/declared-iso-8859-encoded-pom.xml.base64 (100%) rename syft/pkg/cataloger/java/{ => internal/maven}/test-fixtures/local-repository-settings/.m2/settings.xml (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/local/child-1/pom.xml (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/local/child-2/pom.xml (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/local/child-3/pom.xml (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/local/contains-child-1/pom.xml (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/local/parent-1/pom.xml (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/local/parent-2/pom.xml (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/local/parent-3/pom.xml (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/maven-repo/my/org/child-one/1.3.6/child-one-1.3.6.pom (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/maven-repo/my/org/child-two/2.1.90/child-two-2.1.90.pom (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/maven-repo/my/org/parent-one/3.11.0/parent-one-3.11.0.pom (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/maven-repo/my/org/parent-two/13.7.8/parent-two-13.7.8.pom (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/maven-repo/net/shibboleth/parent/7.11.2/parent-7.11.2.pom (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/maven-repo/org/apache/commons/commons-parent/54/commons-parent-54.pom (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/maven-repo/org/junit/junit-bom/5.9.0/junit-bom-5.9.0.pom (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/maven-repo/org/junit/junit-bom/5.9.1/junit-bom-5.9.1.pom (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/maven-repo/org/opensaml/opensaml-parent/3.4.6/opensaml-parent-3.4.6.pom (100%) rename syft/pkg/cataloger/java/{test-fixtures/pom => internal/maven/test-fixtures}/undeclared-iso-8859-encoded-pom.xml.base64 (100%) create mode 100644 syft/pkg/cataloger/java/internal/maven/test/mock_repo.go rename syft/pkg/cataloger/java/test-fixtures/pom/{local => }/commons-text-1.10.0/pom.xml (100%) rename syft/pkg/cataloger/java/test-fixtures/pom/{local => }/example-java-app-maven/pom.xml (100%) diff --git a/internal/task/executor.go b/internal/task/executor.go index 2935f61b12c..899796424be 100644 --- a/internal/task/executor.go +++ b/internal/task/executor.go @@ -7,9 +7,9 @@ import ( "sync" "time" - "github.com/anchore/syft/internal/log" "github.com/hashicorp/go-multierror" + "github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/sbomsync" "github.com/anchore/syft/syft/event/monitor" "github.com/anchore/syft/syft/file" diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index 740ad0071a4..e5ea00bf2d6 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -9,7 +9,6 @@ import ( "slices" "strings" - "github.com/vifraa/gopom" "golang.org/x/exp/maps" "github.com/anchore/syft/internal" @@ -20,6 +19,7 @@ import ( "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/generic" + "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven" ) var archiveFormatGlobs = []string{ @@ -55,7 +55,7 @@ type archiveParser struct { fileInfo archiveFilename detectNested bool cfg ArchiveCatalogerConfig - maven *mavenResolver + maven *maven.Resolver } type genericArchiveParserAdapter struct { @@ -110,7 +110,7 @@ func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg fileInfo: newJavaArchiveFilename(currentFilepath), detectNested: detectNested, cfg: cfg, - maven: newMavenResolver(nil, cfg), + maven: maven.NewResolver(nil, cfg.mavenConfig()), }, cleanupFn, nil } @@ -279,18 +279,18 @@ func (j *archiveParser) findLicenseFromJavaMetadata(ctx context.Context, groupID } var err error - var pomLicenses []gopom.License + var pomLicenses []maven.License if parsedPom != nil { - pomLicenses, err = j.maven.resolveLicenses(ctx, parsedPom.project) + pomLicenses, err = j.maven.ResolveLicenses(ctx, parsedPom.project) if err != nil { - log.WithFields("error", err, "mavenID", j.maven.getMavenID(ctx, parsedPom.project)).Debug("error attempting to resolve pom licenses") + log.WithFields("error", err, "maven.ID", j.maven.ResolveID(ctx, parsedPom.project)).Debug("error attempting to resolve pom licenses") } } if err == nil && len(pomLicenses) == 0 { - pomLicenses, err = j.maven.findLicenses(ctx, groupID, artifactID, version) + pomLicenses, err = j.maven.FindLicenses(ctx, groupID, artifactID, version) if err != nil { - log.WithFields("error", err, "mavenID", mavenID{groupID, artifactID, version}).Debug("error attempting to find licenses") + log.WithFields("error", err, "maven.ID", maven.NewID(groupID, artifactID, version)).Debug("error attempting to find licenses") } } @@ -298,26 +298,37 @@ func (j *archiveParser) findLicenseFromJavaMetadata(ctx context.Context, groupID // Try removing the last part of the groupId, as sometimes it duplicates the artifactId packages := strings.Split(groupID, ".") groupID = strings.Join(packages[:len(packages)-1], ".") - pomLicenses, err = j.maven.findLicenses(ctx, groupID, artifactID, version) + pomLicenses, err = j.maven.FindLicenses(ctx, groupID, artifactID, version) if err != nil { - log.WithFields("error", err, "mavenID", mavenID{groupID, artifactID, version}).Debug("error attempting to find sub-group licenses") + log.WithFields("error", err, "maven.ID", maven.NewID(groupID, artifactID, version)).Debug("error attempting to find sub-group licenses") } } return toPkgLicenses(&j.location, pomLicenses) } -func toPkgLicenses(location *file.Location, licenses []gopom.License) []pkg.License { +func toPkgLicenses(location *file.Location, licenses []maven.License) []pkg.License { var out []pkg.License for _, license := range licenses { - out = append(out, pkg.NewLicenseFromFields(deref(license.Name), deref(license.URL), location)) + name := "" + if license.Name != nil { + name = *license.Name + } + url := "" + if license.URL != nil { + url = *license.URL + } + if name == "" && url == "" { + continue + } + out = append(out, pkg.NewLicenseFromFields(name, url, location)) } return out } type parsedPomProject struct { path string - project *gopom.Project + project *maven.Project } // discoverMainPackageFromPomInfo attempts to resolve maven groupId, artifactId, version and other info from found pom information @@ -352,7 +363,7 @@ func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (gro version = pomProperties.Version if parsedPom != nil && parsedPom.project != nil { - id := j.maven.getMavenID(ctx, parsedPom.project) + id := j.maven.ResolveID(ctx, parsedPom.project) if group == "" { group = id.GroupID } @@ -576,7 +587,7 @@ func pomProjectByParentPath(archivePath string, location file.Location, extractP projectByParentPath := make(map[string]*parsedPomProject) for filePath, fileContents := range contentsOfMavenProjectFiles { // TODO: when we support locations of paths within archives we should start passing the specific pom.xml location object instead of the top jar - pom, err := decodePomXML(strings.NewReader(fileContents)) + pom, err := maven.ParsePomXML(strings.NewReader(fileContents)) if err != nil { log.WithFields("contents-path", filePath, "location", location.Path()).Warnf("failed to parse pom.xml: %+v", err) continue @@ -593,9 +604,57 @@ func pomProjectByParentPath(archivePath string, location file.Location, extractP return projectByParentPath, nil } +// newPackageFromMavenPom processes a single Maven POM for a given parent package, returning only the main package from the pom +func newPackageFromMavenPom(ctx context.Context, r *maven.Resolver, pom *maven.Project, location file.Location) *pkg.Package { + id := r.ResolveID(ctx, pom) + parent, err := r.ResolveParent(ctx, pom) + if err != nil { + // this is expected in many cases, there will be no network access and the maven resolver is unable to + // look up information, so we can continue with what little information we have + log.Trace("unable to resolve parent due to: %v", err) + } + + var javaPomParent *pkg.JavaPomParent + if parent != nil { + parentID := r.ResolveID(ctx, parent) + javaPomParent = &pkg.JavaPomParent{ + GroupID: parentID.GroupID, + ArtifactID: parentID.ArtifactID, + Version: parentID.Version, + } + } + + pomLicenses, err := r.ResolveLicenses(ctx, pom) + if err != nil { + log.Tracef("error resolving licenses: %v", err) + } + licenses := toPkgLicenses(&location, pomLicenses) + + p := pkg.Package{ + Name: id.ArtifactID, + Version: id.Version, + Locations: file.NewLocationSet( + location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), + ), + Licenses: pkg.NewLicenseSet(licenses...), + Language: pkg.Java, + Type: pkg.JavaPkg, // FIXME this is not necessarily accurate + Metadata: pkg.JavaPomProject{ + Parent: javaPomParent, + GroupID: id.GroupID, + ArtifactID: id.ArtifactID, + Version: id.Version, + Name: r.ResolveProperty(ctx, pom.Name, pom), + Description: r.ResolveProperty(ctx, pom.Description, pom), + URL: r.ResolveProperty(ctx, pom.URL, pom), + }, + } + return &p +} + // newPackageFromMavenData processes a single Maven POM properties for a given parent package, returning all listed Java packages found and // associating each discovered package to the given parent package. Note the pom.xml is optional, the pom.properties is not. -func newPackageFromMavenData(ctx context.Context, r *mavenResolver, pomProperties pkg.JavaPomProperties, parsedPom *parsedPomProject, parentPkg *pkg.Package, location file.Location) *pkg.Package { +func newPackageFromMavenData(ctx context.Context, r *maven.Resolver, pomProperties pkg.JavaPomProperties, parsedPom *parsedPomProject, parentPkg *pkg.Package, location file.Location) *pkg.Package { // keep the artifact name within the virtual path if this package does not match the parent package vPathSuffix := "" groupID := "" @@ -620,23 +679,20 @@ func newPackageFromMavenData(ctx context.Context, r *mavenResolver, pomPropertie var pkgPomProject *pkg.JavaPomProject var err error - var pomLicenses []gopom.License + var pomLicenses []maven.License if parsedPom == nil { // If we have no pom.xml, check maven central using pom.properties - pomLicenses, err = r.findLicenses(ctx, pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version) + pomLicenses, err = r.FindLicenses(ctx, pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version) } else { pkgPomProject = newPomProject(ctx, r, parsedPom.path, parsedPom.project) - pomLicenses, err = r.resolveLicenses(ctx, parsedPom.project) + pomLicenses, err = r.ResolveLicenses(ctx, parsedPom.project) } if err != nil { - log.WithFields("error", err, "mavenID", mavenID{pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version}).Debug("error attempting to resolve licenses") + log.WithFields("error", err, "maven.ID", maven.NewID(pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version)).Debug("error attempting to resolve licenses") } - licenses := make([]pkg.License, 0) - for _, license := range pomLicenses { - licenses = append(licenses, pkg.NewLicenseFromFields(deref(license.Name), deref(license.URL), &location)) - } + licenses := toPkgLicenses(&location, pomLicenses) p := pkg.Package{ Name: pomProperties.ArtifactID, diff --git a/syft/pkg/cataloger/java/archive_parser_test.go b/syft/pkg/cataloger/java/archive_parser_test.go index 2faf003d288..6bdbacdb565 100644 --- a/syft/pkg/cataloger/java/archive_parser_test.go +++ b/syft/pkg/cataloger/java/archive_parser_test.go @@ -18,17 +18,18 @@ import ( "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/vifraa/gopom" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/license" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" + "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven" + maventest "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven/test" ) func TestSearchMavenForLicenses(t *testing.T) { - url := mockMavenRepo(t) + url := maventest.MockRepo(t, "internal/maven/test-fixtures/maven-repo") tests := []struct { name string @@ -79,7 +80,7 @@ func TestSearchMavenForLicenses(t *testing.T) { // assert licenses are discovered from upstream _, _, _, parsedPom := ap.discoverMainPackageFromPomInfo(context.Background()) - licenses, _ := ap.maven.resolveLicenses(context.Background(), parsedPom.project) + licenses, _ := ap.maven.GetLicenses(context.Background(), parsedPom.project) assert.Equal(t, tc.expectedLicenses, toPkgLicenses(nil, licenses)) }) } @@ -762,8 +763,8 @@ func Test_newPackageFromMavenData(t *testing.T) { Version: "1.0", }, project: &parsedPomProject{ - project: &gopom.Project{ - Parent: &gopom.Parent{ + project: &maven.Project{ + Parent: &maven.Parent{ GroupID: ptr("some-parent-group-id"), ArtifactID: ptr("some-parent-artifact-id"), Version: ptr("1.0-parent"), @@ -774,7 +775,7 @@ func Test_newPackageFromMavenData(t *testing.T) { Version: ptr("1.0"), Description: ptr("desc"), URL: ptr("aweso.me"), - Licenses: &[]gopom.License{ + Licenses: &[]maven.License{ { Name: ptr("MIT"), URL: ptr("https://opensource.org/licenses/MIT"), @@ -1038,7 +1039,7 @@ func Test_newPackageFromMavenData(t *testing.T) { } test.expectedParent.Locations = locations - r := newMavenResolver(nil, DefaultArchiveCatalogerConfig()) + r := maven.NewResolver(nil, maven.DefaultConfig()) actualPackage := newPackageFromMavenData(context.Background(), r, test.props, test.project, test.parent, file.NewLocation(virtualPath)) if test.expectedPackage == nil { require.Nil(t, actualPackage) @@ -1348,11 +1349,11 @@ func Test_parseJavaArchive_regressions(t *testing.T) { func Test_deterministicMatchingPomProperties(t *testing.T) { tests := []struct { fixture string - expected mavenID + expected maven.ID }{ { fixture: "multiple-matching-2.11.5", - expected: mavenID{"org.multiple", "multiple-matching-1", "2.11.5"}, + expected: maven.ID{"org.multiple", "multiple-matching-1", "2.11.5"}, }, } @@ -1373,7 +1374,7 @@ func Test_deterministicMatchingPomProperties(t *testing.T) { require.NoError(t, err) groupID, artifactID, version, _ := parser.discoverMainPackageFromPomInfo(context.TODO()) - require.Equal(t, test.expected, mavenID{groupID, artifactID, version}) + require.Equal(t, test.expected, maven.ID{groupID, artifactID, version}) }() } }) diff --git a/syft/pkg/cataloger/java/config.go b/syft/pkg/cataloger/java/config.go index 29096d59bff..35b383c0c21 100644 --- a/syft/pkg/cataloger/java/config.go +++ b/syft/pkg/cataloger/java/config.go @@ -1,8 +1,11 @@ package java -import "github.com/anchore/syft/syft/cataloging" +import ( + "strings" -const mavenBaseURL = "https://repo1.maven.org/maven2" + "github.com/anchore/syft/syft/cataloging" + "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven" +) type ArchiveCatalogerConfig struct { cataloging.ArchiveSearchConfig `yaml:",inline" json:"" mapstructure:",squash"` @@ -11,16 +14,19 @@ type ArchiveCatalogerConfig struct { MavenLocalRepositoryDir string `yaml:"maven-localrepository-dir" json:"maven-localrepository-dir" mapstructure:"maven-localrepository-dir"` MavenBaseURL string `yaml:"maven-base-url" json:"maven-base-url" mapstructure:"maven-base-url"` MaxParentRecursiveDepth int `yaml:"max-parent-recursive-depth" json:"max-parent-recursive-depth" mapstructure:"max-parent-recursive-depth"` + IncludeTransitiveDependencies bool `yaml:"include-transitive-dependencies" json:"include-transitive-dependencies" mapstructure:"include-transitive-dependencies"` } func DefaultArchiveCatalogerConfig() ArchiveCatalogerConfig { + mavenCfg := maven.DefaultConfig() return ArchiveCatalogerConfig{ - ArchiveSearchConfig: cataloging.DefaultArchiveSearchConfig(), - UseNetwork: false, - UseMavenLocalRepository: false, - MavenLocalRepositoryDir: defaultMavenLocalRepoDir(), - MavenBaseURL: mavenBaseURL, - MaxParentRecursiveDepth: 0, // unlimited + ArchiveSearchConfig: cataloging.DefaultArchiveSearchConfig(), + UseNetwork: mavenCfg.UseNetwork, + UseMavenLocalRepository: mavenCfg.UseLocalRepository, + MavenLocalRepositoryDir: mavenCfg.LocalRepositoryDir, + MavenBaseURL: strings.Join(mavenCfg.Repositories, ","), + MaxParentRecursiveDepth: mavenCfg.MaxParentRecursiveDepth, + IncludeTransitiveDependencies: false, } } @@ -51,3 +57,13 @@ func (j ArchiveCatalogerConfig) WithArchiveTraversal(search cataloging.ArchiveSe j.ArchiveSearchConfig = search return j } + +func (j ArchiveCatalogerConfig) mavenConfig() maven.Config { + return maven.Config{ + UseNetwork: j.UseNetwork, + UseLocalRepository: j.UseMavenLocalRepository, + LocalRepositoryDir: j.MavenLocalRepositoryDir, + Repositories: strings.Split(j.MavenBaseURL, ","), + MaxParentRecursiveDepth: j.MaxParentRecursiveDepth, + } +} diff --git a/syft/pkg/cataloger/java/maven_utils.go b/syft/pkg/cataloger/java/internal/maven/config.go similarity index 56% rename from syft/pkg/cataloger/java/maven_utils.go rename to syft/pkg/cataloger/java/internal/maven/config.go index 9d365e151d6..1bdcb711951 100644 --- a/syft/pkg/cataloger/java/maven_utils.go +++ b/syft/pkg/cataloger/java/internal/maven/config.go @@ -1,4 +1,4 @@ -package java +package maven import ( "encoding/xml" @@ -15,6 +15,35 @@ import ( "github.com/anchore/syft/internal/log" ) +const mavenBaseURL = "https://repo1.maven.org/maven2" + +type Config struct { + // UseNetwork instructs the maven resolver to use network operations to resolve maven artifacts + UseNetwork bool `yaml:"use-network" json:"use-network" mapstructure:"use-network"` + + // Repositories are the set of remote repositories the network resolution should use + Repositories []string `yaml:"maven-repositories" json:"maven-repositories" mapstructure:"maven-repositories"` + + // UseLocalRepository instructs the maven resolver to look in the host maven cache, usually ~/.m2/repository + UseLocalRepository bool `yaml:"use-maven-local-repository" json:"use-maven-local-repository" mapstructure:"use-maven-local-repository"` + + // LocalRepositoryDir is an alternate directory to use to look up the local repository + LocalRepositoryDir string `yaml:"maven-local-repository-dir" json:"maven-local-repository-dir" mapstructure:"maven-local-repository-dir"` + + // MaxParentRecursiveDepth allows for a maximum depth to use when recursively resolving parent poms and other information, 0 disables any maximum + MaxParentRecursiveDepth int `yaml:"max-parent-recursive-depth" json:"max-parent-recursive-depth" mapstructure:"max-parent-recursive-depth"` +} + +func DefaultConfig() Config { + return Config{ + UseNetwork: false, + Repositories: []string{mavenBaseURL}, + UseLocalRepository: false, + LocalRepositoryDir: defaultMavenLocalRepoDir(), + MaxParentRecursiveDepth: 0, // unlimited + } +} + // defaultMavenLocalRepoDir gets default location of the Maven local repository, generally at /.m2/repository func defaultMavenLocalRepoDir() string { homeDir, err := homedir.Dir() @@ -49,15 +78,6 @@ func getSettingsXMLLocalRepository(settingsXML io.Reader) string { return s.LocalRepository } -// deref dereferences ptr if not nil, or returns the type default value if ptr is nil -func deref[T any](ptr *T) T { - if ptr == nil { - var t T - return t - } - return *ptr -} - // remotePomURL returns a URL to download a POM from a remote repository func remotePomURL(repoURL, groupID, artifactID, version string) (requestURL string, err error) { // groupID needs to go from maven.org -> maven/org diff --git a/syft/pkg/cataloger/java/maven_utils_test.go b/syft/pkg/cataloger/java/internal/maven/config_test.go similarity index 96% rename from syft/pkg/cataloger/java/maven_utils_test.go rename to syft/pkg/cataloger/java/internal/maven/config_test.go index 8d599b501af..8c39ea9030e 100644 --- a/syft/pkg/cataloger/java/maven_utils_test.go +++ b/syft/pkg/cataloger/java/internal/maven/config_test.go @@ -1,4 +1,4 @@ -package java +package maven import ( "os" @@ -85,7 +85,7 @@ func Test_remotePomURL(t *testing.T) { expected string }{ { - name: "formatMavenURL correctly assembles the pom URL", + name: "remotePomURL correctly assembles the pom URL", groupID: "org.springframework.boot", artifactID: "spring-boot-starter-test", version: "3.1.5", diff --git a/syft/pkg/cataloger/java/internal/maven/pom_parser.go b/syft/pkg/cataloger/java/internal/maven/pom_parser.go new file mode 100644 index 00000000000..ed0637e1c27 --- /dev/null +++ b/syft/pkg/cataloger/java/internal/maven/pom_parser.go @@ -0,0 +1,67 @@ +package maven + +import ( + "bytes" + "encoding/xml" + "fmt" + "io" + "strings" + + "github.com/saintfish/chardet" + "github.com/vifraa/gopom" + "golang.org/x/net/html/charset" +) + +type ( + Project = gopom.Project + Properties = gopom.Properties + Parent = gopom.Parent + Dependency = gopom.Dependency + License = gopom.License +) + +// ParsePomXML decodes a pom XML file, detecting and converting non-UTF-8 charsets. this DOES NOT perform any logic to resolve properties such as groupID, artifactID, and version +func ParsePomXML(content io.Reader) (project *Project, err error) { + inputReader, err := getUtf8Reader(content) + if err != nil { + return nil, fmt.Errorf("unable to read pom.xml: %w", err) + } + + decoder := xml.NewDecoder(inputReader) + // when an xml file has a character set declaration (e.g. '') read that and use the correct decoder + decoder.CharsetReader = charset.NewReaderLabel + + project = &Project{} + if err := decoder.Decode(project); err != nil { + return nil, fmt.Errorf("unable to unmarshal pom.xml: %w", err) + } + + return project, nil +} + +func getUtf8Reader(content io.Reader) (io.Reader, error) { + pomContents, err := io.ReadAll(content) + if err != nil { + return nil, err + } + + detector := chardet.NewTextDetector() + detection, err := detector.DetectBest(pomContents) + + var inputReader io.Reader + if err == nil && detection != nil { + if detection.Charset == "UTF-8" { + inputReader = bytes.NewReader(pomContents) + } else { + inputReader, err = charset.NewReaderLabel(detection.Charset, bytes.NewReader(pomContents)) + if err != nil { + return nil, fmt.Errorf("unable to get encoding: %w", err) + } + } + } else { + // we could not detect the encoding, but we want a valid file to read. Replace unreadable + // characters with the UTF-8 replacement character. + inputReader = strings.NewReader(strings.ToValidUTF8(string(pomContents), "�")) + } + return inputReader, nil +} diff --git a/syft/pkg/cataloger/java/internal/maven/pom_parser_test.go b/syft/pkg/cataloger/java/internal/maven/pom_parser_test.go new file mode 100644 index 00000000000..c9f8463a43d --- /dev/null +++ b/syft/pkg/cataloger/java/internal/maven/pom_parser_test.go @@ -0,0 +1,93 @@ +package maven + +import ( + "encoding/base64" + "io" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/internal" +) + +func Test_getUtf8Reader(t *testing.T) { + tests := []struct { + name string + contents string + }{ + { + name: "unknown encoding", + // random binary contents + contents: "BkiJz02JyEWE0nXR6TH///9NicpJweEETIucJIgAAABJicxPjQwhTY1JCE05WQh0BU2J0eunTYshTIusJIAAAAAPHwBNOeV1BUUx2+tWTIlUJDhMiUwkSEyJRCQgSIl8JFBMiQ==", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + decoder := base64.NewDecoder(base64.StdEncoding, strings.NewReader(tt.contents)) + + got, err := getUtf8Reader(decoder) + require.NoError(t, err) + gotBytes, err := io.ReadAll(got) + require.NoError(t, err) + // if we couldn't decode the section as UTF-8, we should get a replacement character + assert.Contains(t, string(gotBytes), "�") + }) + } +} + +func Test_decodePomXML_surviveNonUtf8Encoding(t *testing.T) { + // regression for https://github.com/anchore/syft/issues/2044 + + // we are storing the base64 contents of the pom.xml file. We are doing this to prevent accidental changes to the + // file, which is extremely important for this test. + + // for instance, even changing a single character in the file and saving in an IntelliJ IDE will automatically + // convert the file to UTF-8, which will break this test: + + // xxd with the original pom.xml + // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. + // 00000790: 203c 6e61 6d65 3e4a e972 f46d 6520 4d69 J.r.me Mi + // 000007a0: 7263 3c2f 6e61 6d65 3e0d 0a20 2020 2020 rc.. + + // xxd with the pom.xml converted to UTF-8 (from a simple change with IntelliJ) + // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. + // 00000790: 203c 6e61 6d65 3e4a efbf bd72 efbf bd6d J...r...m + // 000007a0: 6520 4d69 7263 3c2f 6e61 6d65 3e0d 0a20 e Mirc.. + + // Note that the name "Jérôme Mirc" was originally interpreted as "J.r.me Mi" and after the save + // is now encoded as "J...r...m" which is not what we want (note the extra bytes for each non UTF-8 character. + // The original 0xe9 byte (é) was converted to 0xefbfbd (�) which is the UTF-8 replacement character. + // This is quite silly on the part of IntelliJ, but it is what it is. + + cases := []struct { + name string + fixture string + }{ + { + name: "undeclared encoding", + fixture: "test-fixtures/undeclared-iso-8859-encoded-pom.xml.base64", + }, + { + name: "declared encoding", + fixture: "test-fixtures/declared-iso-8859-encoded-pom.xml.base64", + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + fh, err := os.Open(c.fixture) + require.NoError(t, err) + defer internal.CloseAndLogError(fh, c.fixture) + + decoder := base64.NewDecoder(base64.StdEncoding, fh) + + proj, err := ParsePomXML(decoder) + + require.NoError(t, err) + require.NotEmpty(t, proj.Developers) + }) + } +} diff --git a/syft/pkg/cataloger/java/maven_resolver.go b/syft/pkg/cataloger/java/internal/maven/resolver.go similarity index 64% rename from syft/pkg/cataloger/java/maven_resolver.go rename to syft/pkg/cataloger/java/internal/maven/resolver.go index f9d375b8ea3..e4398cc5ab5 100644 --- a/syft/pkg/cataloger/java/maven_resolver.go +++ b/syft/pkg/cataloger/java/internal/maven/resolver.go @@ -1,4 +1,4 @@ -package java +package maven import ( "bytes" @@ -24,56 +24,64 @@ import ( "github.com/anchore/syft/syft/file" ) -// mavenID is the unique identifier for a package in Maven -type mavenID struct { +// ID is the unique identifier for a package in Maven +type ID struct { GroupID string ArtifactID string Version string } -func (m mavenID) String() string { +func NewID(groupID, artifactID, version string) ID { + return ID{ + GroupID: groupID, + ArtifactID: artifactID, + Version: version, + } +} + +func (m ID) String() string { return fmt.Sprintf("(groupId: %s artifactId: %s version: %s)", m.GroupID, m.ArtifactID, m.Version) } var expressionMatcher = regexp.MustCompile("[$][{][^}]+[}]") -// mavenResolver is a short-lived utility to resolve maven poms from multiple sources, including: +// Resolver is a short-lived utility to resolve maven poms from multiple sources, including: // the scanned filesystem, local maven cache directories, remote maven repositories, and the syft cache -type mavenResolver struct { - cfg ArchiveCatalogerConfig +type Resolver struct { + cfg Config cache cache.Cache - resolved map[mavenID]*gopom.Project + resolved map[ID]*Project remoteRequestTimeout time.Duration checkedLocalRepo bool // fileResolver and pomLocations are used to resolve parent poms by relativePath fileResolver file.Resolver - pomLocations map[*gopom.Project]file.Location + pomLocations map[*Project]file.Location } -// newMavenResolver constructs a new mavenResolver with the given configuration. +// NewResolver constructs a new Resolver with the given configuration. // NOTE: the fileResolver is optional and if provided will be used to resolve parent poms by relative path -func newMavenResolver(fileResolver file.Resolver, cfg ArchiveCatalogerConfig) *mavenResolver { - return &mavenResolver{ +func NewResolver(fileResolver file.Resolver, cfg Config) *Resolver { + return &Resolver{ cfg: cfg, cache: cache.GetManager().GetCache("java/maven/repo", "v1"), - resolved: map[mavenID]*gopom.Project{}, + resolved: map[ID]*Project{}, remoteRequestTimeout: time.Second * 10, fileResolver: fileResolver, - pomLocations: map[*gopom.Project]file.Location{}, + pomLocations: map[*Project]file.Location{}, } } -// getPropertyValue gets property values by emulating maven property resolution logic, looking in the project's variables +// ResolveProperty gets property values by emulating maven property resolution logic, looking in the project's variables // as well as supporting the project expressions like ${project.parent.groupId}. // Properties which are not resolved result in empty string "" -func (r *mavenResolver) getPropertyValue(ctx context.Context, propertyValue *string, resolutionContext ...*gopom.Project) string { - return r.resolvePropertyValue(ctx, propertyValue, nil, resolutionContext...) +func (r *Resolver) ResolveProperty(ctx context.Context, propertyValue *string, pom *Project) string { + return r.resolvePropertyValue(ctx, propertyValue, nil, pom) } // resolvePropertyValue resolves property values by emulating maven property resolution logic, looking in the project's variables // as well as supporting the project expressions like ${project.parent.groupId}. // Properties which are not resolved result in empty string "" -func (r *mavenResolver) resolvePropertyValue(ctx context.Context, propertyValue *string, resolvingProperties []string, resolutionContext ...*gopom.Project) string { +func (r *Resolver) resolvePropertyValue(ctx context.Context, propertyValue *string, resolvingProperties []string, resolutionContext ...*Project) string { if propertyValue == nil { return "" } @@ -86,7 +94,7 @@ func (r *mavenResolver) resolvePropertyValue(ctx context.Context, propertyValue } // resolveExpression resolves an expression, which may be a plain string or a string with ${ property.references } -func (r *mavenResolver) resolveExpression(ctx context.Context, resolutionContext []*gopom.Project, expression string, resolvingProperties []string) (string, error) { +func (r *Resolver) resolveExpression(ctx context.Context, resolutionContext []*Project, expression string, resolvingProperties []string) (string, error) { log.Tracef("resolving expression: '%v' in context: %v", expression, resolutionContext) var errs error @@ -103,7 +111,7 @@ func (r *mavenResolver) resolveExpression(ctx context.Context, resolutionContext } // resolveProperty resolves properties recursively from the root project -func (r *mavenResolver) resolveProperty(ctx context.Context, resolutionContext []*gopom.Project, propertyExpression string, resolvingProperties []string) (string, error) { +func (r *Resolver) resolveProperty(ctx context.Context, resolutionContext []*Project, propertyExpression string, resolvingProperties []string) (string, error) { // prevent cycles if slices.Contains(resolvingProperties, propertyExpression) { return "", fmt.Errorf("cycle detected resolving: %s", propertyExpression) @@ -146,7 +154,7 @@ func (r *mavenResolver) resolveProperty(ctx context.Context, resolutionContext [ // resolveProjectProperty resolves properties on the project // //nolint:gocognit -func (r *mavenResolver) resolveProjectProperty(ctx context.Context, resolutionContext []*gopom.Project, pom *gopom.Project, propertyExpression string, resolving []string) (string, error) { +func (r *Resolver) resolveProjectProperty(ctx context.Context, resolutionContext []*Project, pom *Project, propertyExpression string, resolving []string) (string, error) { // see if we have a project.x expression and process this based // on the xml tags in gopom parts := strings.Split(propertyExpression, ".") @@ -210,19 +218,45 @@ func (r *mavenResolver) resolveProjectProperty(ctx context.Context, resolutionCo return "", nil } -// getMavenID creates a new mavenID from a pom, resolving parent information as necessary -func (r *mavenResolver) getMavenID(ctx context.Context, resolutionContext ...*gopom.Project) mavenID { - return r.resolveMavenID(ctx, nil, resolutionContext...) +// ResolveParent resolves the parent definition, and returns a POM for the parent, which is possibly incomplete, or nil +func (r *Resolver) ResolveParent(ctx context.Context, pom *Project) (*Project, error) { + if pom == nil || pom.Parent == nil { + return nil, nil + } + + groupID := r.ResolveProperty(ctx, pom.Parent.GroupID, pom) + artifactID := r.ResolveProperty(ctx, pom.Parent.ArtifactID, pom) + version := r.ResolveProperty(ctx, pom.Parent.Version, pom) + + parent, err := r.FindPom(ctx, groupID, artifactID, version) + if parent != nil { + return parent, err + } + + if artifactID != "" && version != "" { + return &Project{ + GroupID: &groupID, + Name: &artifactID, + Version: &version, + }, nil + } + + return nil, fmt.Errorf("unsufficient information to create a parent pom project, id: %s", NewID(groupID, artifactID, version)) +} + +// ResolveID creates an ID from a pom, resolving parent information as necessary +func (r *Resolver) ResolveID(ctx context.Context, pom *Project) ID { + return r.resolveID(ctx, nil, pom) } -// resolveMavenID creates a new mavenID from a pom, resolving parent information as necessary -func (r *mavenResolver) resolveMavenID(ctx context.Context, resolvingProperties []string, resolutionContext ...*gopom.Project) mavenID { +// resolveID creates a new ID from a pom, resolving parent information as necessary +func (r *Resolver) resolveID(ctx context.Context, resolvingProperties []string, resolutionContext ...*Project) ID { if len(resolutionContext) == 0 || resolutionContext[0] == nil { - return mavenID{} + return ID{} } pom := resolutionContext[len(resolutionContext)-1] // get topmost pom if pom == nil { - return mavenID{} + return ID{} } groupID := r.resolvePropertyValue(ctx, pom.GroupID, resolvingProperties, resolutionContext...) @@ -239,40 +273,40 @@ func (r *mavenResolver) resolveMavenID(ctx context.Context, resolvingProperties version = r.resolvePropertyValue(ctx, pom.Parent.Version, resolvingProperties, resolutionContext...) } } - return mavenID{groupID, artifactID, version} + return ID{groupID, artifactID, version} } -// resolveDependencyID creates a new mavenID from a dependency element in a pom, resolving information as necessary -func (r *mavenResolver) resolveDependencyID(ctx context.Context, pom *gopom.Project, dep gopom.Dependency) mavenID { +// ResolveDependencyID creates an ID from a dependency element in a pom, resolving information as necessary +func (r *Resolver) ResolveDependencyID(ctx context.Context, pom *Project, dep Dependency) ID { if pom == nil { - return mavenID{} + return ID{} } - groupID := r.getPropertyValue(ctx, dep.GroupID, pom) - artifactID := r.getPropertyValue(ctx, dep.ArtifactID, pom) - version := r.getPropertyValue(ctx, dep.Version, pom) + groupID := r.resolvePropertyValue(ctx, dep.GroupID, nil, pom) + artifactID := r.resolvePropertyValue(ctx, dep.ArtifactID, nil, pom) + version := r.resolvePropertyValue(ctx, dep.Version, nil, pom) var err error if version == "" { - version, err = r.findInheritedVersion(ctx, pom, groupID, artifactID) + version, err = r.resolveInheritedVersion(ctx, pom, groupID, artifactID) } - depID := mavenID{groupID, artifactID, version} + depID := ID{groupID, artifactID, version} if err != nil { - log.WithFields("error", err, "mavenID", r.getMavenID(ctx, pom), "dependencyID", depID) + log.WithFields("error", err, "ID", r.ResolveID(ctx, pom), "dependencyID", depID) } return depID } -// findPom gets a pom from cache, local repository, or from a remote Maven repository depending on configuration -func (r *mavenResolver) findPom(ctx context.Context, groupID, artifactID, version string) (*gopom.Project, error) { +// FindPom gets a pom from cache, local repository, or from a remote Maven repository depending on configuration +func (r *Resolver) FindPom(ctx context.Context, groupID, artifactID, version string) (*Project, error) { if groupID == "" || artifactID == "" || version == "" { return nil, fmt.Errorf("invalid maven pom specification, require non-empty values for groupID: '%s', artifactID: '%s', version: '%s'", groupID, artifactID, version) } - id := mavenID{groupID, artifactID, version} + id := ID{groupID, artifactID, version} pom := r.resolved[id] if pom != nil { @@ -282,7 +316,7 @@ func (r *mavenResolver) findPom(ctx context.Context, groupID, artifactID, versio var errs error // try to resolve first from local maven repo - if r.cfg.UseMavenLocalRepository { + if r.cfg.UseLocalRepository { pom, err := r.findPomInLocalRepository(groupID, artifactID, version) if pom != nil { r.resolved[id] = pom @@ -293,7 +327,7 @@ func (r *mavenResolver) findPom(ctx context.Context, groupID, artifactID, versio // resolve via network maven repository if pom == nil && r.cfg.UseNetwork { - pom, err := r.findPomInRemoteRepository(ctx, groupID, artifactID, version) + pom, err := r.findPomInRemotes(ctx, groupID, artifactID, version) if pom != nil { r.resolved[id] = pom return pom, nil @@ -305,35 +339,50 @@ func (r *mavenResolver) findPom(ctx context.Context, groupID, artifactID, versio } // findPomInLocalRepository attempts to get the POM from the users local maven repository -func (r *mavenResolver) findPomInLocalRepository(groupID, artifactID, version string) (*gopom.Project, error) { +func (r *Resolver) findPomInLocalRepository(groupID, artifactID, version string) (*Project, error) { groupPath := filepath.Join(strings.Split(groupID, ".")...) - pomFilePath := filepath.Join(r.cfg.MavenLocalRepositoryDir, groupPath, artifactID, version, artifactID+"-"+version+".pom") + pomFilePath := filepath.Join(r.cfg.LocalRepositoryDir, groupPath, artifactID, version, artifactID+"-"+version+".pom") pomFile, err := os.Open(pomFilePath) if err != nil { if !r.checkedLocalRepo && errors.Is(err, os.ErrNotExist) { r.checkedLocalRepo = true // check if the directory exists at all, and if not just stop trying to resolve local maven files - fi, err := os.Stat(r.cfg.MavenLocalRepositoryDir) + fi, err := os.Stat(r.cfg.LocalRepositoryDir) if errors.Is(err, os.ErrNotExist) || !fi.IsDir() { - log.WithFields("error", err, "repositoryDir", r.cfg.MavenLocalRepositoryDir). + log.WithFields("error", err, "repositoryDir", r.cfg.LocalRepositoryDir). Info("local maven repository is not a readable directory, stopping local resolution") - r.cfg.UseMavenLocalRepository = false + r.cfg.UseLocalRepository = false } } return nil, err } defer internal.CloseAndLogError(pomFile, pomFilePath) - return decodePomXML(pomFile) + return ParsePomXML(pomFile) +} + +// findPomInRemotes download the pom file from all configured Maven repositories over HTTP +func (r *Resolver) findPomInRemotes(ctx context.Context, groupID, artifactID, version string) (*Project, error) { + var errs error + for _, repo := range r.cfg.Repositories { + pom, err := r.findPomInRemoteRepository(ctx, repo, groupID, artifactID, version) + if err != nil { + errs = errors.Join(errs, err) + } + if pom != nil { + return pom, err + } + } + return nil, fmt.Errorf("pom for %v not found in any remote repository: %w", ID{groupID, artifactID, version}, errs) } // findPomInRemoteRepository download the pom file from a (remote) Maven repository over HTTP -func (r *mavenResolver) findPomInRemoteRepository(ctx context.Context, groupID, artifactID, version string) (*gopom.Project, error) { +func (r *Resolver) findPomInRemoteRepository(ctx context.Context, repo string, groupID, artifactID, version string) (*Project, error) { if groupID == "" || artifactID == "" || version == "" { return nil, fmt.Errorf("missing/incomplete maven artifact coordinates -- groupId: '%s' artifactId: '%s', version: '%s'", groupID, artifactID, version) } - requestURL, err := remotePomURL(r.cfg.MavenBaseURL, groupID, artifactID, version) + requestURL, err := remotePomURL(repo, groupID, artifactID, version) if err != nil { return nil, fmt.Errorf("unable to find pom in remote due to: %w", err) } @@ -377,7 +426,7 @@ func (r *mavenResolver) findPomInRemoteRepository(ctx context.Context, groupID, if reader, ok := reader.(io.Closer); ok { defer internal.CloseAndLogError(reader, requestURL) } - pom, err := decodePomXML(reader) + pom, err := ParsePomXML(reader) if err != nil { return nil, fmt.Errorf("unable to parse pom from Maven repository url %v: %w", requestURL, err) } @@ -388,7 +437,7 @@ func (r *mavenResolver) findPomInRemoteRepository(ctx context.Context, groupID, // this function is guaranteed to return an unread reader for the correct contents. // NOTE: this could be promoted to the internal cache package as a specialized version of the cache.Resolver // if there are more users of this functionality -func (r *mavenResolver) cacheResolveReader(key string, resolve func() (io.ReadCloser, error)) (io.Reader, error) { +func (r *Resolver) cacheResolveReader(key string, resolve func() (io.ReadCloser, error)) (io.Reader, error) { reader, err := r.cache.Read(key) if err == nil && reader != nil { return reader, err @@ -410,7 +459,7 @@ func (r *mavenResolver) cacheResolveReader(key string, resolve func() (io.ReadCl } // resolveParent attempts to resolve the parent for the given pom -func (r *mavenResolver) resolveParent(ctx context.Context, pom *gopom.Project, resolvingProperties ...string) (*gopom.Project, error) { +func (r *Resolver) resolveParent(ctx context.Context, pom *Project, resolvingProperties ...string) (*Project, error) { if pom == nil || pom.Parent == nil { return nil, nil } @@ -422,7 +471,7 @@ func (r *mavenResolver) resolveParent(ctx context.Context, pom *gopom.Project, r version := r.resolvePropertyValue(ctx, parent.Version, resolvingProperties, &pomWithoutParent) // check cache before resolving - parentID := mavenID{groupID, artifactID, version} + parentID := ID{groupID, artifactID, version} if resolvedParent, ok := r.resolved[parentID]; ok { return resolvedParent, nil } @@ -434,21 +483,21 @@ func (r *mavenResolver) resolveParent(ctx context.Context, pom *gopom.Project, r } // find POM normally - return r.findPom(ctx, groupID, artifactID, version) + return r.FindPom(ctx, groupID, artifactID, version) } -// findInheritedVersion attempts to find the version of a dependency (groupID, artifactID) by searching all parent poms and imported managed dependencies +// resolveInheritedVersion attempts to find the version of a dependency (groupID, artifactID) by searching all parent poms and imported managed dependencies // //nolint:gocognit,funlen -func (r *mavenResolver) findInheritedVersion(ctx context.Context, pom *gopom.Project, groupID, artifactID string, resolutionContext ...*gopom.Project) (string, error) { +func (r *Resolver) resolveInheritedVersion(ctx context.Context, pom *Project, groupID, artifactID string, resolutionContext ...*Project) (string, error) { if pom == nil { return "", fmt.Errorf("nil pom provided to findInheritedVersion") } if r.cfg.MaxParentRecursiveDepth > 0 && len(resolutionContext) > r.cfg.MaxParentRecursiveDepth { - return "", fmt.Errorf("maximum depth reached attempting to resolve version for: %s:%s at: %v", groupID, artifactID, r.getMavenID(ctx, pom)) + return "", fmt.Errorf("maximum depth reached attempting to resolve version for: %s:%s at: %v", groupID, artifactID, r.ResolveID(ctx, pom)) } if slices.Contains(resolutionContext, pom) { - return "", fmt.Errorf("cycle detected attempting to resolve version for: %s:%s at: %v", groupID, artifactID, r.getMavenID(ctx, pom)) + return "", fmt.Errorf("cycle detected attempting to resolve version for: %s:%s at: %v", groupID, artifactID, r.ResolveID(ctx, pom)) } resolutionContext = append(resolutionContext, pom) @@ -457,10 +506,10 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, pom *gopom.Pro // check for entries in dependencyManagement first for _, dep := range pomManagedDependencies(pom) { - depGroupID := r.getPropertyValue(ctx, dep.GroupID, resolutionContext...) - depArtifactID := r.getPropertyValue(ctx, dep.ArtifactID, resolutionContext...) + depGroupID := r.resolvePropertyValue(ctx, dep.GroupID, nil, resolutionContext...) + depArtifactID := r.resolvePropertyValue(ctx, dep.ArtifactID, nil, resolutionContext...) if depGroupID == groupID && depArtifactID == artifactID { - version = r.getPropertyValue(ctx, dep.Version, resolutionContext...) + version = r.resolvePropertyValue(ctx, dep.Version, nil, resolutionContext...) if version != "" { return version, nil } @@ -468,17 +517,17 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, pom *gopom.Pro // imported pom files should be treated just like parent poms, they are used to define versions of dependencies if deref(dep.Type) == "pom" && deref(dep.Scope) == "import" { - depVersion := r.getPropertyValue(ctx, dep.Version, resolutionContext...) + depVersion := r.resolvePropertyValue(ctx, dep.Version, nil, resolutionContext...) - depPom, err := r.findPom(ctx, depGroupID, depArtifactID, depVersion) + depPom, err := r.FindPom(ctx, depGroupID, depArtifactID, depVersion) if err != nil || depPom == nil { - log.WithFields("error", err, "mavenID", r.getMavenID(ctx, pom), "dependencyID", mavenID{depGroupID, depArtifactID, depVersion}). + log.WithFields("error", err, "ID", r.ResolveID(ctx, pom), "dependencyID", ID{depGroupID, depArtifactID, depVersion}). Debug("unable to find imported pom looking for managed dependencies") continue } - version, err = r.findInheritedVersion(ctx, depPom, groupID, artifactID, resolutionContext...) + version, err = r.resolveInheritedVersion(ctx, depPom, groupID, artifactID, resolutionContext...) if err != nil { - log.WithFields("error", err, "mavenID", r.getMavenID(ctx, pom), "dependencyID", mavenID{depGroupID, depArtifactID, depVersion}). + log.WithFields("error", err, "ID", r.ResolveID(ctx, pom), "dependencyID", ID{depGroupID, depArtifactID, depVersion}). Debug("error during findInheritedVersion") } if version != "" { @@ -493,7 +542,7 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, pom *gopom.Pro return "", err } if parent != nil { - version, err = r.findInheritedVersion(ctx, parent, groupID, artifactID, resolutionContext...) + version, err = r.resolveInheritedVersion(ctx, parent, groupID, artifactID, resolutionContext...) if err != nil { return "", err } @@ -503,11 +552,11 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, pom *gopom.Pro } // check for inherited dependencies - for _, dep := range pomDependencies(pom) { - depGroupID := r.getPropertyValue(ctx, dep.GroupID, resolutionContext...) - depArtifactID := r.getPropertyValue(ctx, dep.ArtifactID, resolutionContext...) + for _, dep := range DirectPomDependencies(pom) { + depGroupID := r.resolvePropertyValue(ctx, dep.GroupID, nil, resolutionContext...) + depArtifactID := r.resolvePropertyValue(ctx, dep.ArtifactID, nil, resolutionContext...) if depGroupID == groupID && depArtifactID == artifactID { - version = r.getPropertyValue(ctx, dep.Version, resolutionContext...) + version = r.resolvePropertyValue(ctx, dep.Version, nil, resolutionContext...) if version != "" { return version, nil } @@ -517,18 +566,24 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, pom *gopom.Pro return "", nil } -// findLicenses search pom for license, traversing parent poms if needed -func (r *mavenResolver) findLicenses(ctx context.Context, groupID, artifactID, version string) ([]gopom.License, error) { - pom, err := r.findPom(ctx, groupID, artifactID, version) +// FindLicenses attempts to find a pom, and once found attempts to resolve licenses traversing +// parent poms as necessary +func (r *Resolver) FindLicenses(ctx context.Context, groupID, artifactID, version string) ([]gopom.License, error) { + pom, err := r.FindPom(ctx, groupID, artifactID, version) if pom == nil || err != nil { return nil, err } return r.resolveLicenses(ctx, pom) } +// ResolveLicenses searches the pom for license, resolving and traversing parent poms if needed +func (r *Resolver) ResolveLicenses(ctx context.Context, pom *Project) ([]License, error) { + return r.resolveLicenses(ctx, pom) +} + // resolveLicenses searches the pom for license, traversing parent poms if needed -func (r *mavenResolver) resolveLicenses(ctx context.Context, pom *gopom.Project, processing ...mavenID) ([]gopom.License, error) { - id := r.getMavenID(ctx, pom) +func (r *Resolver) resolveLicenses(ctx context.Context, pom *Project, processing ...ID) ([]License, error) { + id := r.ResolveID(ctx, pom) if slices.Contains(processing, id) { return nil, fmt.Errorf("cycle detected resolving licenses for: %v", id) } @@ -552,12 +607,12 @@ func (r *mavenResolver) resolveLicenses(ctx context.Context, pom *gopom.Project, } // pomLicenses appends the directly specified licenses with non-empty name or url -func (r *mavenResolver) pomLicenses(ctx context.Context, pom *gopom.Project) []gopom.License { - var out []gopom.License +func (r *Resolver) pomLicenses(ctx context.Context, pom *Project) []License { + var out []License for _, license := range deref(pom.Licenses) { // if we find non-empty licenses, return them - name := r.getPropertyValue(ctx, license.Name, pom) - url := r.getPropertyValue(ctx, license.URL, pom) + name := r.resolvePropertyValue(ctx, license.Name, nil, pom) + url := r.resolvePropertyValue(ctx, license.URL, nil, pom) if name != "" || url != "" { out = append(out, license) } @@ -565,7 +620,7 @@ func (r *mavenResolver) pomLicenses(ctx context.Context, pom *gopom.Project) []g return out } -func (r *mavenResolver) findParentPomByRelativePath(ctx context.Context, pom *gopom.Project, parentID mavenID, resolvingProperties []string) *gopom.Project { +func (r *Resolver) findParentPomByRelativePath(ctx context.Context, pom *Project, parentID ID, resolvingProperties []string) *Project { // don't resolve if no resolver if r.fileResolver == nil { return nil @@ -588,7 +643,7 @@ func (r *mavenResolver) findParentPomByRelativePath(ctx context.Context, pom *go } parentLocations, err := r.fileResolver.FilesByPath(p) if err != nil || len(parentLocations) == 0 { - log.WithFields("error", err, "mavenID", r.resolveMavenID(ctx, resolvingProperties, pom), "parentID", parentID, "relativePath", relativePath). + log.WithFields("error", err, "mavenID", r.resolveID(ctx, resolvingProperties, pom), "parentID", parentID, "relativePath", relativePath). Trace("parent pom not found by relative path") return nil } @@ -596,21 +651,21 @@ func (r *mavenResolver) findParentPomByRelativePath(ctx context.Context, pom *go parentContents, err := r.fileResolver.FileContentsByLocation(parentLocation) if err != nil || parentContents == nil { - log.WithFields("error", err, "mavenID", r.resolveMavenID(ctx, resolvingProperties, pom), "parentID", parentID, "parentLocation", parentLocation). + log.WithFields("error", err, "mavenID", r.resolveID(ctx, resolvingProperties, pom), "parentID", parentID, "parentLocation", parentLocation). Debug("unable to get contents of parent pom by relative path") return nil } defer internal.CloseAndLogError(parentContents, parentLocation.RealPath) - parentPom, err := decodePomXML(parentContents) + parentPom, err := ParsePomXML(parentContents) if err != nil || parentPom == nil { - log.WithFields("error", err, "mavenID", r.resolveMavenID(ctx, resolvingProperties, pom), "parentID", parentID, "parentLocation", parentLocation). + log.WithFields("error", err, "mavenID", r.resolveID(ctx, resolvingProperties, pom), "parentID", parentID, "parentLocation", parentLocation). Debug("unable to parse parent pom") return nil } // ensure parent matches - newParentID := r.resolveMavenID(ctx, resolvingProperties, parentPom) + newParentID := r.resolveID(ctx, resolvingProperties, parentPom) if newParentID.ArtifactID != parentID.ArtifactID { - log.WithFields("newParentID", newParentID, "mavenID", r.resolveMavenID(ctx, resolvingProperties, pom), "parentID", parentID, "parentLocation", parentLocation). + log.WithFields("newParentID", newParentID, "mavenID", r.resolveID(ctx, resolvingProperties, pom), "parentID", parentID, "parentLocation", parentLocation). Debug("parent IDs do not match resolving parent by relative path") return nil } @@ -621,9 +676,15 @@ func (r *mavenResolver) findParentPomByRelativePath(ctx context.Context, pom *go return parentPom } -// pomDependencies returns all dependencies directly defined in a project, including all defined in profiles. -// does not resolve parent dependencies -func pomDependencies(pom *gopom.Project) []gopom.Dependency { +// AddPom allows for adding known pom files with locations within the file resolver, these locations may be used +// while resolving parent poms by relative path +func (r *Resolver) AddPom(pom *Project, location file.Location) { + r.pomLocations[pom] = location +} + +// DirectPomDependencies returns all dependencies directly defined in a project, including all defined in profiles. +// This does not resolve any parent or transitive dependencies +func DirectPomDependencies(pom *Project) []Dependency { dependencies := deref(pom.Dependencies) for _, profile := range deref(pom.Profiles) { dependencies = append(dependencies, deref(profile.Dependencies)...) @@ -633,8 +694,8 @@ func pomDependencies(pom *gopom.Project) []gopom.Dependency { // pomManagedDependencies returns all directly defined managed dependencies in a project pom, including all defined in profiles. // does not resolve parent managed dependencies -func pomManagedDependencies(pom *gopom.Project) []gopom.Dependency { - var dependencies []gopom.Dependency +func pomManagedDependencies(pom *Project) []Dependency { + var dependencies []Dependency if pom.DependencyManagement != nil { dependencies = append(dependencies, deref(pom.DependencyManagement.Dependencies)...) } @@ -645,3 +706,12 @@ func pomManagedDependencies(pom *gopom.Project) []gopom.Dependency { } return dependencies } + +// deref dereferences ptr if not nil, or returns the type default value if ptr is nil +func deref[T any](ptr *T) T { + if ptr == nil { + var t T + return t + } + return *ptr +} diff --git a/syft/pkg/cataloger/java/maven_resolver_test.go b/syft/pkg/cataloger/java/internal/maven/resolver_test.go similarity index 59% rename from syft/pkg/cataloger/java/maven_resolver_test.go rename to syft/pkg/cataloger/java/internal/maven/resolver_test.go index bec9b669156..e2090b3dc39 100644 --- a/syft/pkg/cataloger/java/maven_resolver_test.go +++ b/syft/pkg/cataloger/java/internal/maven/resolver_test.go @@ -1,34 +1,30 @@ -package java +package maven import ( "context" - "io" - "net/http" - "net/http/httptest" - "os" "path/filepath" + "strings" "testing" - "github.com/bmatcuk/doublestar/v4" "github.com/stretchr/testify/require" - "github.com/vifraa/gopom" "github.com/anchore/syft/internal" "github.com/anchore/syft/syft/internal/fileresolver" + maventest "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven/test" ) func Test_resolveProperty(t *testing.T) { tests := []struct { name string property string - pom gopom.Project + pom Project expected string }{ { name: "property", property: "${version.number}", - pom: gopom.Project{ - Properties: &gopom.Properties{ + pom: Project{ + Properties: &Properties{ Entries: map[string]string{ "version.number": "12.5.0", }, @@ -39,7 +35,7 @@ func Test_resolveProperty(t *testing.T) { { name: "groupId", property: "${project.groupId}", - pom: gopom.Project{ + pom: Project{ GroupID: ptr("org.some.group"), }, expected: "org.some.group", @@ -47,8 +43,8 @@ func Test_resolveProperty(t *testing.T) { { name: "parent groupId", property: "${project.parent.groupId}", - pom: gopom.Project{ - Parent: &gopom.Parent{ + pom: Project{ + Parent: &Parent{ GroupID: ptr("org.some.parent"), }, }, @@ -57,7 +53,7 @@ func Test_resolveProperty(t *testing.T) { { name: "nil pointer halts search", property: "${project.parent.groupId}", - pom: gopom.Project{ + pom: Project{ Parent: nil, }, expected: "", @@ -65,8 +61,8 @@ func Test_resolveProperty(t *testing.T) { { name: "nil string pointer halts search", property: "${project.parent.groupId}", - pom: gopom.Project{ - Parent: &gopom.Parent{ + pom: Project{ + Parent: &Parent{ GroupID: nil, }, }, @@ -75,11 +71,11 @@ func Test_resolveProperty(t *testing.T) { { name: "double dereference", property: "${springboot.version}", - pom: gopom.Project{ - Parent: &gopom.Parent{ + pom: Project{ + Parent: &Parent{ Version: ptr("1.2.3"), }, - Properties: &gopom.Properties{ + Properties: &Properties{ Entries: map[string]string{ "springboot.version": "${project.parent.version}", }, @@ -90,8 +86,8 @@ func Test_resolveProperty(t *testing.T) { { name: "map missing stops double dereference", property: "${springboot.version}", - pom: gopom.Project{ - Parent: &gopom.Parent{ + pom: Project{ + Parent: &Parent{ Version: ptr("1.2.3"), }, }, @@ -100,11 +96,11 @@ func Test_resolveProperty(t *testing.T) { { name: "resolution halts even if it resolves to a variable", property: "${springboot.version}", - pom: gopom.Project{ - Parent: &gopom.Parent{ + pom: Project{ + Parent: &Parent{ Version: ptr("${undefined.version}"), }, - Properties: &gopom.Properties{ + Properties: &Properties{ Entries: map[string]string{ "springboot.version": "${project.parent.version}", }, @@ -115,8 +111,8 @@ func Test_resolveProperty(t *testing.T) { { name: "resolution halts even if cyclic", property: "${springboot.version}", - pom: gopom.Project{ - Properties: &gopom.Properties{ + pom: Project{ + Properties: &Properties{ Entries: map[string]string{ "springboot.version": "${springboot.version}", }, @@ -127,8 +123,8 @@ func Test_resolveProperty(t *testing.T) { { name: "resolution halts even if cyclic more steps", property: "${cyclic.version}", - pom: gopom.Project{ - Properties: &gopom.Properties{ + pom: Project{ + Properties: &Properties{ Entries: map[string]string{ "other.version": "${cyclic.version}", "springboot.version": "${other.version}", @@ -141,11 +137,11 @@ func Test_resolveProperty(t *testing.T) { { name: "resolution halts even if cyclic involving parent", property: "${cyclic.version}", - pom: gopom.Project{ - Parent: &gopom.Parent{ + pom: Project{ + Parent: &Parent{ Version: ptr("${cyclic.version}"), }, - Properties: &gopom.Properties{ + Properties: &Properties{ Entries: map[string]string{ "other.version": "${parent.version}", "springboot.version": "${other.version}", @@ -159,15 +155,15 @@ func Test_resolveProperty(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - r := newMavenResolver(nil, DefaultArchiveCatalogerConfig()) - resolved := r.getPropertyValue(context.Background(), ptr(test.property), &test.pom) + r := NewResolver(nil, DefaultConfig()) + resolved := r.ResolveProperty(context.Background(), ptr(test.property), &test.pom) require.Equal(t, test.expected, resolved) }) } } func Test_mavenResolverLocal(t *testing.T) { - dir, err := filepath.Abs("test-fixtures/pom/maven-repo") + dir, err := filepath.Abs("test-fixtures/maven-repo") require.NoError(t, err) tests := []struct { @@ -211,26 +207,26 @@ func Test_mavenResolverLocal(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { ctx := context.Background() - r := newMavenResolver(nil, ArchiveCatalogerConfig{ + r := NewResolver(nil, Config{ UseNetwork: false, - UseMavenLocalRepository: true, - MavenLocalRepositoryDir: dir, + UseLocalRepository: true, + LocalRepositoryDir: dir, MaxParentRecursiveDepth: test.maxDepth, }) - pom, err := r.findPom(ctx, test.groupID, test.artifactID, test.version) + pom, err := r.FindPom(ctx, test.groupID, test.artifactID, test.version) if test.wantErr != nil { test.wantErr(t, err) } else { require.NoError(t, err) } - got := r.getPropertyValue(context.Background(), &test.expression, pom) + got := r.ResolveProperty(context.Background(), &test.expression, pom) require.Equal(t, test.expected, got) }) } } func Test_mavenResolverRemote(t *testing.T) { - url := mockMavenRepo(t) + url := maventest.MockRepo(t, "test-fixtures/maven-repo") tests := []struct { groupID string @@ -252,25 +248,25 @@ func Test_mavenResolverRemote(t *testing.T) { for _, test := range tests { t.Run(test.artifactID, func(t *testing.T) { ctx := context.Background() - r := newMavenResolver(nil, ArchiveCatalogerConfig{ - UseNetwork: true, - UseMavenLocalRepository: false, - MavenBaseURL: url, + r := NewResolver(nil, Config{ + UseNetwork: true, + UseLocalRepository: false, + Repositories: strings.Split(url, ","), }) - pom, err := r.findPom(ctx, test.groupID, test.artifactID, test.version) + pom, err := r.FindPom(ctx, test.groupID, test.artifactID, test.version) if test.wantErr != nil { test.wantErr(t, err) } else { require.NoError(t, err) } - got := r.getPropertyValue(context.Background(), &test.expression, pom) + got := r.ResolveProperty(context.Background(), &test.expression, pom) require.Equal(t, test.expected, got) }) } } func Test_relativePathParent(t *testing.T) { - resolver, err := fileresolver.NewFromDirectory("test-fixtures/pom/local", "") + resolver, err := fileresolver.NewFromDirectory("test-fixtures/local", "") require.NoError(t, err) ctx := context.Background() @@ -278,12 +274,12 @@ func Test_relativePathParent(t *testing.T) { tests := []struct { name string pom string - validate func(t *testing.T, r *mavenResolver, pom *gopom.Project) + validate func(t *testing.T, r *Resolver, pom *Project) }{ { name: "basic", pom: "child-1/pom.xml", - validate: func(t *testing.T, r *mavenResolver, pom *gopom.Project) { + validate: func(t *testing.T, r *Resolver, pom *Project) { parent, err := r.resolveParent(ctx, pom) require.NoError(t, err) require.Contains(t, r.pomLocations, parent) @@ -292,16 +288,15 @@ func Test_relativePathParent(t *testing.T) { require.NoError(t, err) require.Contains(t, r.pomLocations, parent) - got := r.getPropertyValue(ctx, ptr("${commons-exec_subversion}"), pom) + got := r.ResolveProperty(ctx, ptr("${commons-exec_subversion}"), pom) require.Equal(t, "3", got) - }, }, { name: "parent property", pom: "child-2/pom.xml", - validate: func(t *testing.T, r *mavenResolver, pom *gopom.Project) { - id := r.getMavenID(ctx, pom) + validate: func(t *testing.T, r *Resolver, pom *Project) { + id := r.ResolveID(ctx, pom) // child.parent.version = ${revision} // parent.revision = 3.3.3 require.Equal(t, id.Version, "3.3.3") @@ -310,9 +305,9 @@ func Test_relativePathParent(t *testing.T) { { name: "invalid parent", pom: "child-3/pom.xml", - validate: func(t *testing.T, r *mavenResolver, pom *gopom.Project) { + validate: func(t *testing.T, r *Resolver, pom *Project) { require.NotNil(t, pom) - id := r.getMavenID(ctx, pom) + id := r.ResolveID(ctx, pom) // version should not be resolved to anything require.Equal(t, "", id.Version) }, @@ -321,7 +316,7 @@ func Test_relativePathParent(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - r := newMavenResolver(resolver, DefaultArchiveCatalogerConfig()) + r := NewResolver(resolver, DefaultConfig()) locs, err := resolver.FilesByPath(test.pom) require.NoError(t, err) require.Len(t, locs, 1) @@ -331,7 +326,7 @@ func Test_relativePathParent(t *testing.T) { require.NoError(t, err) defer internal.CloseAndLogError(contents, loc.RealPath) - pom, err := decodePomXML(contents) + pom, err := ParsePomXML(contents) require.NoError(t, err) r.pomLocations[pom] = loc @@ -341,59 +336,7 @@ func Test_relativePathParent(t *testing.T) { } } -// mockMavenRepo starts a remote maven repo serving all the pom files found in test-fixtures/pom/maven-repo -func mockMavenRepo(t *testing.T) (url string) { - t.Helper() - - return mockMavenRepoAt(t, "test-fixtures/pom/maven-repo") -} - -// mockMavenRepoAt starts a remote maven repo serving all the pom files found in the given directory -func mockMavenRepoAt(t *testing.T, dir string) (url string) { - t.Helper() - - // mux is the HTTP request multiplexer used with the test server. - mux := http.NewServeMux() - - // We want to ensure that tests catch mistakes where the endpoint URL is - // specified as absolute rather than relative. It only makes a difference - // when there's a non-empty base URL path. So, use that. See issue #752. - apiHandler := http.NewServeMux() - apiHandler.Handle("/", mux) - // server is a test HTTP server used to provide mock API responses. - server := httptest.NewServer(apiHandler) - - t.Cleanup(server.Close) - - matches, err := doublestar.Glob(os.DirFS(dir), filepath.Join("**", "*.pom")) - require.NoError(t, err) - - for _, match := range matches { - fullPath, err := filepath.Abs(filepath.Join(dir, match)) - require.NoError(t, err) - match = "/" + filepath.ToSlash(match) - mux.HandleFunc(match, mockMavenHandler(fullPath)) - } - - return server.URL -} - -func mockMavenHandler(responseFixture string) func(w http.ResponseWriter, r *http.Request) { - return func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusOK) - // Set the Content-Type header to indicate that the response is XML - w.Header().Set("Content-Type", "application/xml") - // Copy the file's content to the response writer - f, err := os.Open(responseFixture) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - defer internal.CloseAndLogError(f, responseFixture) - _, err = io.Copy(w, f) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - } +// ptr returns a pointer to the given value +func ptr[T any](value T) *T { + return &value } diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/declared-iso-8859-encoded-pom.xml.base64 b/syft/pkg/cataloger/java/internal/maven/test-fixtures/declared-iso-8859-encoded-pom.xml.base64 similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/declared-iso-8859-encoded-pom.xml.base64 rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/declared-iso-8859-encoded-pom.xml.base64 diff --git a/syft/pkg/cataloger/java/test-fixtures/local-repository-settings/.m2/settings.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local-repository-settings/.m2/settings.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/local-repository-settings/.m2/settings.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local-repository-settings/.m2/settings.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/child-1/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/child-1/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/child-1/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/child-1/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/child-2/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/child-2/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/child-2/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/child-2/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/child-3/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/child-3/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/child-3/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/child-3/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/contains-child-1/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/contains-child-1/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/contains-child-1/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/contains-child-1/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/parent-1/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/parent-1/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/parent-1/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/parent-1/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/parent-2/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/parent-2/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/parent-2/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/parent-2/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/parent-3/pom.xml b/syft/pkg/cataloger/java/internal/maven/test-fixtures/local/parent-3/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/parent-3/pom.xml rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/local/parent-3/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/child-one/1.3.6/child-one-1.3.6.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/child-one/1.3.6/child-one-1.3.6.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/child-one/1.3.6/child-one-1.3.6.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/child-one/1.3.6/child-one-1.3.6.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/child-two/2.1.90/child-two-2.1.90.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/child-two/2.1.90/child-two-2.1.90.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/child-two/2.1.90/child-two-2.1.90.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/child-two/2.1.90/child-two-2.1.90.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-one/3.11.0/parent-one-3.11.0.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/parent-one/3.11.0/parent-one-3.11.0.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-one/3.11.0/parent-one-3.11.0.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/parent-one/3.11.0/parent-one-3.11.0.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-two/13.7.8/parent-two-13.7.8.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/parent-two/13.7.8/parent-two-13.7.8.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-two/13.7.8/parent-two-13.7.8.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/my/org/parent-two/13.7.8/parent-two-13.7.8.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/net/shibboleth/parent/7.11.2/parent-7.11.2.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/net/shibboleth/parent/7.11.2/parent-7.11.2.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/net/shibboleth/parent/7.11.2/parent-7.11.2.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/net/shibboleth/parent/7.11.2/parent-7.11.2.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/apache/commons/commons-parent/54/commons-parent-54.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/apache/commons/commons-parent/54/commons-parent-54.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/apache/commons/commons-parent/54/commons-parent-54.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/apache/commons/commons-parent/54/commons-parent-54.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/junit/junit-bom/5.9.0/junit-bom-5.9.0.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/junit/junit-bom/5.9.0/junit-bom-5.9.0.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/junit/junit-bom/5.9.0/junit-bom-5.9.0.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/junit/junit-bom/5.9.0/junit-bom-5.9.0.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/junit/junit-bom/5.9.1/junit-bom-5.9.1.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/junit/junit-bom/5.9.1/junit-bom-5.9.1.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/junit/junit-bom/5.9.1/junit-bom-5.9.1.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/junit/junit-bom/5.9.1/junit-bom-5.9.1.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/opensaml/opensaml-parent/3.4.6/opensaml-parent-3.4.6.pom b/syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/opensaml/opensaml-parent/3.4.6/opensaml-parent-3.4.6.pom similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/org/opensaml/opensaml-parent/3.4.6/opensaml-parent-3.4.6.pom rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/maven-repo/org/opensaml/opensaml-parent/3.4.6/opensaml-parent-3.4.6.pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/undeclared-iso-8859-encoded-pom.xml.base64 b/syft/pkg/cataloger/java/internal/maven/test-fixtures/undeclared-iso-8859-encoded-pom.xml.base64 similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/undeclared-iso-8859-encoded-pom.xml.base64 rename to syft/pkg/cataloger/java/internal/maven/test-fixtures/undeclared-iso-8859-encoded-pom.xml.base64 diff --git a/syft/pkg/cataloger/java/internal/maven/test/mock_repo.go b/syft/pkg/cataloger/java/internal/maven/test/mock_repo.go new file mode 100644 index 00000000000..6ab462f6b9d --- /dev/null +++ b/syft/pkg/cataloger/java/internal/maven/test/mock_repo.go @@ -0,0 +1,65 @@ +package maventest + +import ( + "io" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "github.com/bmatcuk/doublestar/v4" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/internal" +) + +// MockRepo starts a remote maven repo serving all the pom files found in a maven-structured directory +func MockRepo(t *testing.T, dir string) (url string) { + t.Helper() + + // mux is the HTTP request multiplexer used with the test server. + mux := http.NewServeMux() + + // We want to ensure that tests catch mistakes where the endpoint URL is + // specified as absolute rather than relative. It only makes a difference + // when there's a non-empty base URL path. So, use that. See issue #752. + apiHandler := http.NewServeMux() + apiHandler.Handle("/", mux) + // server is a test HTTP server used to provide mock API responses. + server := httptest.NewServer(apiHandler) + + t.Cleanup(server.Close) + + matches, err := doublestar.Glob(os.DirFS(dir), filepath.Join("**", "*.pom")) + require.NoError(t, err) + + for _, match := range matches { + fullPath, err := filepath.Abs(filepath.Join(dir, match)) + require.NoError(t, err) + match = "/" + filepath.ToSlash(match) + mux.HandleFunc(match, mockMavenHandler(fullPath)) + } + + return server.URL +} + +func mockMavenHandler(responseFixture string) http.HandlerFunc { + return func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + // Set the Content-Type header to indicate that the response is XML + w.Header().Set("Content-Type", "application/xml") + // Copy the file's content to the response writer + f, err := os.Open(responseFixture) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + defer internal.CloseAndLogError(f, responseFixture) + _, err = io.Copy(w, f) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + } +} diff --git a/syft/pkg/cataloger/java/parse_pom_xml.go b/syft/pkg/cataloger/java/parse_pom_xml.go index 370ebc2c84a..b83347a1e77 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml.go +++ b/syft/pkg/cataloger/java/parse_pom_xml.go @@ -1,23 +1,16 @@ package java import ( - "bytes" "context" - "encoding/xml" "errors" - "fmt" - "io" "strings" - "github.com/saintfish/chardet" - "github.com/vifraa/gopom" - "golang.org/x/net/html/charset" - "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven" ) const ( @@ -39,9 +32,9 @@ func (p pomXMLCataloger) Catalog(ctx context.Context, fileResolver file.Resolver return nil, nil, err } - r := newMavenResolver(fileResolver, p.cfg) + r := maven.NewResolver(fileResolver, p.cfg.mavenConfig()) - var poms []*gopom.Project + poms := map[*maven.Project]file.Location{} for _, pomLocation := range locations { pom, err := readPomFromLocation(fileResolver, pomLocation) if err != nil || pom == nil { @@ -49,35 +42,55 @@ func (p pomXMLCataloger) Catalog(ctx context.Context, fileResolver file.Resolver continue } - poms = append(poms, pom) - - // store information about this pom for future lookups - r.pomLocations[pom] = pomLocation - r.resolved[r.getMavenID(ctx, pom)] = pom + poms[pom] = pomLocation + r.AddPom(pom, pomLocation) } var pkgs []pkg.Package - for _, pom := range poms { - pkgs = append(pkgs, processPomXML(ctx, r, pom, r.pomLocations[pom])...) + var relationships []artifact.Relationship + var errs []error + + for pom, location := range poms { + mainPkg := newPackageFromMavenPom(ctx, r, pom, location) + mainPkg.SetID() + + if mainPkg != nil { + pkgs = append(pkgs, *mainPkg) + } + + newPkgs, newRelationships, newErrs := collectDependencies(ctx, r, mainPkg, pom, location, p.cfg.IncludeTransitiveDependencies) + pkgs = append(pkgs, newPkgs...) + relationships = append(relationships, newRelationships...) + errs = append(errs, newErrs...) + // add dependency-of relationships from the dependencies to the main pkg + for _, newPkg := range newPkgs { + relationships = append(relationships, artifact.Relationship{ + From: mainPkg, + To: newPkg, + Type: artifact.DependencyOfRelationship, + }) + } } - return pkgs, nil, nil + return pkgs, relationships, errors.Join(errs...) } -func readPomFromLocation(fileResolver file.Resolver, pomLocation file.Location) (*gopom.Project, error) { +func readPomFromLocation(fileResolver file.Resolver, pomLocation file.Location) (*maven.Project, error) { contents, err := fileResolver.FileContentsByLocation(pomLocation) if err != nil { return nil, err } defer internal.CloseAndLogError(contents, pomLocation.RealPath) - return decodePomXML(contents) + return maven.ParsePomXML(contents) } -func processPomXML(ctx context.Context, r *mavenResolver, pom *gopom.Project, loc file.Location) []pkg.Package { +func collectDependencies(ctx context.Context, r *maven.Resolver, parentPkg *pkg.Package, pom *maven.Project, loc file.Location, includeTransitiveDependencies bool) ([]pkg.Package, []artifact.Relationship, []error) { + var errs []error var pkgs []pkg.Package + var relationships []artifact.Relationship - pomID := r.getMavenID(ctx, pom) - for _, dep := range pomDependencies(pom) { - depID := r.resolveDependencyID(ctx, pom, dep) + pomID := r.ResolveID(ctx, pom) + for _, dep := range maven.DirectPomDependencies(pom) { + depID := r.ResolveDependencyID(ctx, pom, dep) log.WithFields("pomLocation", loc, "mavenID", pomID, "dependencyID", depID).Trace("adding maven pom dependency") p, err := newPackageFromDependency( @@ -94,15 +107,36 @@ func processPomXML(ctx context.Context, r *mavenResolver, pom *gopom.Project, lo continue } pkgs = append(pkgs, *p) + if parentPkg != nil { + relationships = append(relationships, artifact.Relationship{ + From: *p, + To: *parentPkg, + Type: artifact.DependencyOfRelationship, + }) + } + + if includeTransitiveDependencies { + depPom, err := r.FindPom(ctx, depID.GroupID, depID.ArtifactID, depID.Version) + if err != nil { + errs = append(errs, err) + } + if depPom == nil { + continue + } + transitivePkgs, transitiveRelationships, transitiveErrs := collectDependencies(ctx, r, p, depPom, loc, includeTransitiveDependencies) + pkgs = append(pkgs, transitivePkgs...) + relationships = append(relationships, transitiveRelationships...) + errs = append(errs, transitiveErrs...) + } } - return pkgs + return pkgs, relationships, errs } -func newPomProject(ctx context.Context, r *mavenResolver, path string, pom *gopom.Project) *pkg.JavaPomProject { - id := r.getMavenID(ctx, pom) - name := r.getPropertyValue(ctx, pom.Name, pom) - projectURL := r.getPropertyValue(ctx, pom.URL, pom) +func newPomProject(ctx context.Context, r *maven.Resolver, path string, pom *maven.Project) *pkg.JavaPomProject { + id := r.ResolveID(ctx, pom) + name := r.ResolveProperty(ctx, pom.Name, pom) + projectURL := r.ResolveProperty(ctx, pom.URL, pom) log.WithFields("path", path, "artifactID", id.ArtifactID, "name", name, "projectURL", projectURL).Trace("parsing pom.xml") return &pkg.JavaPomProject{ @@ -112,34 +146,32 @@ func newPomProject(ctx context.Context, r *mavenResolver, path string, pom *gopo ArtifactID: id.ArtifactID, Version: id.Version, Name: name, - Description: cleanDescription(r.getPropertyValue(ctx, pom.Description, pom)), + Description: cleanDescription(r.ResolveProperty(ctx, pom.Description, pom)), URL: projectURL, } } -func newPackageFromDependency(ctx context.Context, r *mavenResolver, pom *gopom.Project, dep gopom.Dependency, locations ...file.Location) (*pkg.Package, error) { - id := r.resolveDependencyID(ctx, pom, dep) +func newPackageFromDependency(ctx context.Context, r *maven.Resolver, pom *maven.Project, dep maven.Dependency, locations ...file.Location) (*pkg.Package, error) { + id := r.ResolveDependencyID(ctx, pom, dep) m := pkg.JavaArchive{ PomProperties: &pkg.JavaPomProperties{ GroupID: id.GroupID, ArtifactID: id.ArtifactID, - Scope: r.getPropertyValue(ctx, dep.Scope, pom), + Scope: r.ResolveProperty(ctx, dep.Scope, pom), }, } var err error var licenses []pkg.License - dependencyPom, depErr := r.findPom(ctx, id.GroupID, id.ArtifactID, id.Version) + dependencyPom, depErr := r.FindPom(ctx, id.GroupID, id.ArtifactID, id.Version) if depErr != nil { err = errors.Join(err, depErr) } if dependencyPom != nil { - depLicenses, _ := r.resolveLicenses(ctx, dependencyPom) - for _, license := range depLicenses { - licenses = append(licenses, pkg.NewLicenseFromFields(deref(license.Name), deref(license.URL), nil)) - } + depLicenses, _ := r.ResolveLicenses(ctx, dependencyPom) + licenses = append(licenses, toPkgLicenses(nil, depLicenses)...) } p := &pkg.Package{ @@ -159,60 +191,14 @@ func newPackageFromDependency(ctx context.Context, r *mavenResolver, pom *gopom. return p, err } -// decodePomXML decodes a pom XML file, detecting and converting non-UTF-8 charsets. this DOES NOT perform any logic to resolve properties such as groupID, artifactID, and version -func decodePomXML(content io.Reader) (project *gopom.Project, err error) { - inputReader, err := getUtf8Reader(content) - if err != nil { - return nil, fmt.Errorf("unable to read pom.xml: %w", err) - } - - decoder := xml.NewDecoder(inputReader) - // when an xml file has a character set declaration (e.g. '') read that and use the correct decoder - decoder.CharsetReader = charset.NewReaderLabel - - project = &gopom.Project{} - if err := decoder.Decode(project); err != nil { - return nil, fmt.Errorf("unable to unmarshal pom.xml: %w", err) - } - - return project, nil -} - -func getUtf8Reader(content io.Reader) (io.Reader, error) { - pomContents, err := io.ReadAll(content) - if err != nil { - return nil, err - } - - detector := chardet.NewTextDetector() - detection, err := detector.DetectBest(pomContents) - - var inputReader io.Reader - if err == nil && detection != nil { - if detection.Charset == "UTF-8" { - inputReader = bytes.NewReader(pomContents) - } else { - inputReader, err = charset.NewReaderLabel(detection.Charset, bytes.NewReader(pomContents)) - if err != nil { - return nil, fmt.Errorf("unable to get encoding: %w", err) - } - } - } else { - // we could not detect the encoding, but we want a valid file to read. Replace unreadable - // characters with the UTF-8 replacement character. - inputReader = strings.NewReader(strings.ToValidUTF8(string(pomContents), "�")) - } - return inputReader, nil -} - -func pomParent(ctx context.Context, r *mavenResolver, pom *gopom.Project) *pkg.JavaPomParent { +func pomParent(ctx context.Context, r *maven.Resolver, pom *maven.Project) *pkg.JavaPomParent { if pom == nil || pom.Parent == nil { return nil } - groupID := r.getPropertyValue(ctx, pom.Parent.GroupID, pom) - artifactID := r.getPropertyValue(ctx, pom.Parent.ArtifactID, pom) - version := r.getPropertyValue(ctx, pom.Parent.Version, pom) + groupID := r.ResolveProperty(ctx, pom.Parent.GroupID, pom) + artifactID := r.ResolveProperty(ctx, pom.Parent.ArtifactID, pom) + version := r.ResolveProperty(ctx, pom.Parent.Version, pom) if groupID == "" && artifactID == "" && version == "" { return nil diff --git a/syft/pkg/cataloger/java/parse_pom_xml_test.go b/syft/pkg/cataloger/java/parse_pom_xml_test.go index 45650049072..cd5d7ba2168 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml_test.go +++ b/syft/pkg/cataloger/java/parse_pom_xml_test.go @@ -2,21 +2,19 @@ package java import ( "context" - "encoding/base64" - "io" "os" - "strings" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/vifraa/gopom" "github.com/anchore/syft/syft/cataloging" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/license" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" + "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven" + maventest "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven/test" "github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source/directorysource" ) @@ -27,7 +25,7 @@ func Test_parsePomXML(t *testing.T) { expected []pkg.Package }{ { - dir: "test-fixtures/pom/local/example-java-app-maven", + dir: "test-fixtures/pom/example-java-app-maven", expected: []pkg.Package{ { Name: "joda-time", @@ -80,67 +78,13 @@ func Test_parsePomXML(t *testing.T) { } } -func Test_decodePomXML_surviveNonUtf8Encoding(t *testing.T) { - // regression for https://github.com/anchore/syft/issues/2044 - - // we are storing the base64 contents of the pom.xml file. We are doing this to prevent accidental changes to the - // file, which is extremely important for this test. - - // for instance, even changing a single character in the file and saving in an IntelliJ IDE will automatically - // convert the file to UTF-8, which will break this test: - - // xxd with the original pom.xml - // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. - // 00000790: 203c 6e61 6d65 3e4a e972 f46d 6520 4d69 J.r.me Mi - // 000007a0: 7263 3c2f 6e61 6d65 3e0d 0a20 2020 2020 rc.. - - // xxd with the pom.xml converted to UTF-8 (from a simple change with IntelliJ) - // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. - // 00000790: 203c 6e61 6d65 3e4a efbf bd72 efbf bd6d J...r...m - // 000007a0: 6520 4d69 7263 3c2f 6e61 6d65 3e0d 0a20 e Mirc.. - - // Note that the name "Jérôme Mirc" was originally interpreted as "J.r.me Mi" and after the save - // is now encoded as "J...r...m" which is not what we want (note the extra bytes for each non UTF-8 character. - // The original 0xe9 byte (é) was converted to 0xefbfbd (�) which is the UTF-8 replacement character. - // This is quite silly on the part of IntelliJ, but it is what it is. - - cases := []struct { - name string - fixture string - }{ - { - name: "undeclared encoding", - fixture: "test-fixtures/pom/undeclared-iso-8859-encoded-pom.xml.base64", - }, - { - name: "declared encoding", - fixture: "test-fixtures/pom/declared-iso-8859-encoded-pom.xml.base64", - }, - } - - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - fh, err := os.Open(c.fixture) - require.NoError(t, err) - - decoder := base64.NewDecoder(base64.StdEncoding, fh) - - proj, err := decodePomXML(decoder) - - require.NoError(t, err) - require.NotEmpty(t, proj.Developers) - }) - } - -} - func Test_parseCommonsTextPomXMLProject(t *testing.T) { tests := []struct { dir string expected []pkg.Package }{ { - dir: "test-fixtures/pom/local/commons-text-1.10.0", + dir: "test-fixtures/pom/commons-text-1.10.0", expected: getCommonsTextExpectedPackages(), }, @@ -165,6 +109,8 @@ func Test_parseCommonsTextPomXMLProject(t *testing.T) { } func Test_parseCommonsTextPomXMLProjectWithLocalRepository(t *testing.T) { + mavenLocalRepoDir := "internal/maven/test-fixtures/maven-repo" + // Using the local repository, the version of junit-jupiter will be resolved expectedPackages := getCommonsTextExpectedPackages() @@ -188,7 +134,7 @@ func Test_parseCommonsTextPomXMLProjectWithLocalRepository(t *testing.T) { expected []pkg.Package }{ { - dir: "test-fixtures/pom/local/commons-text-1.10.0", + dir: "test-fixtures/pom/commons-text-1.10.0", expected: expectedPackages, }, } @@ -205,7 +151,7 @@ func Test_parseCommonsTextPomXMLProjectWithLocalRepository(t *testing.T) { IncludeUnindexedArchives: true, }, UseMavenLocalRepository: true, - MavenLocalRepositoryDir: "test-fixtures/pom/maven-repo", + MavenLocalRepositoryDir: mavenLocalRepoDir, }) pkgtest.TestCataloger(t, test.dir, cat, test.expected, nil) }) @@ -213,7 +159,7 @@ func Test_parseCommonsTextPomXMLProjectWithLocalRepository(t *testing.T) { } func Test_parseCommonsTextPomXMLProjectWithNetwork(t *testing.T) { - url := mockMavenRepo(t) + url := maventest.MockRepo(t, "internal/maven/test-fixtures/maven-repo") // Using the local repository, the version of junit-jupiter will be resolved expectedPackages := getCommonsTextExpectedPackages() @@ -238,7 +184,7 @@ func Test_parseCommonsTextPomXMLProjectWithNetwork(t *testing.T) { expected []pkg.Package }{ { - dir: "test-fixtures/pom/local/commons-text-1.10.0", + dir: "test-fixtures/pom/commons-text-1.10.0", expected: expectedPackages, }, } @@ -272,7 +218,7 @@ func Test_parsePomXMLProject(t *testing.T) { licenses []pkg.License }{ { - name: "go case", + name: "no license info", project: &pkg.JavaPomProject{ Path: "test-fixtures/pom/commons-codec.pom.xml", Parent: &pkg.JavaPomParent{ @@ -331,17 +277,17 @@ func Test_parsePomXMLProject(t *testing.T) { t.Run(test.name, func(t *testing.T) { fixture, err := os.Open(test.project.Path) assert.NoError(t, err) - r := newMavenResolver(nil, ArchiveCatalogerConfig{}) + r := maven.NewResolver(nil, maven.Config{}) - pom, err := gopom.ParseFromReader(fixture) + pom, err := maven.ParsePomXML(fixture) require.NoError(t, err) actual := newPomProject(context.Background(), r, fixture.Name(), pom) assert.NoError(t, err) assert.Equal(t, test.project, actual) - licenses := r.pomLicenses(context.Background(), pom) - assert.NoError(t, err) + licenses, err := r.GetLicenses(context.Background(), pom) + //assert.NoError(t, err) assert.Equal(t, test.licenses, toPkgLicenses(&jarLocation, licenses)) }) } @@ -350,12 +296,12 @@ func Test_parsePomXMLProject(t *testing.T) { func Test_pomParent(t *testing.T) { tests := []struct { name string - input *gopom.Parent + input *maven.Parent expected *pkg.JavaPomParent }{ { name: "only group ID", - input: &gopom.Parent{ + input: &maven.Parent{ GroupID: ptr("org.something"), }, expected: &pkg.JavaPomParent{ @@ -364,7 +310,7 @@ func Test_pomParent(t *testing.T) { }, { name: "only artifact ID", - input: &gopom.Parent{ + input: &maven.Parent{ ArtifactID: ptr("something"), }, expected: &pkg.JavaPomParent{ @@ -373,7 +319,7 @@ func Test_pomParent(t *testing.T) { }, { name: "only Version", - input: &gopom.Parent{ + input: &maven.Parent{ Version: ptr("something"), }, expected: &pkg.JavaPomParent{ @@ -387,12 +333,12 @@ func Test_pomParent(t *testing.T) { }, { name: "empty", - input: &gopom.Parent{}, + input: &maven.Parent{}, expected: nil, }, { name: "unused field", - input: &gopom.Parent{ + input: &maven.Parent{ RelativePath: ptr("something"), }, expected: nil, @@ -401,8 +347,8 @@ func Test_pomParent(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - r := newMavenResolver(nil, DefaultArchiveCatalogerConfig()) - assert.Equal(t, test.expected, pomParent(context.Background(), r, &gopom.Project{Parent: test.input})) + r := maven.NewResolver(nil, maven.DefaultConfig()) + assert.Equal(t, test.expected, pomParent(context.Background(), r, &maven.Project{Parent: test.input})) }) } } @@ -431,10 +377,10 @@ func Test_cleanDescription(t *testing.T) { } func Test_resolveLicenses(t *testing.T) { - mavenURL := mockMavenRepo(t) - localM2 := "test-fixtures/pom/maven-repo" - localDir := "test-fixtures/pom/local" - containingDir := "test-fixtures/pom/local/contains-child-1" + mavenURL := maventest.MockRepo(t, "internal/maven/test-fixtures/maven-repo") + localM2 := "internal/maven/test-fixtures/maven-repo" + localDir := "internal/maven/test-fixtures/local" + containingDir := "internal/maven/test-fixtures/local/contains-child-1" expectedLicenses := []pkg.License{ { @@ -527,31 +473,6 @@ func Test_resolveLicenses(t *testing.T) { } } -func Test_getUtf8Reader(t *testing.T) { - tests := []struct { - name string - contents string - }{ - { - name: "unknown encoding", - // random binary contents - contents: "BkiJz02JyEWE0nXR6TH///9NicpJweEETIucJIgAAABJicxPjQwhTY1JCE05WQh0BU2J0eunTYshTIusJIAAAAAPHwBNOeV1BUUx2+tWTIlUJDhMiUwkSEyJRCQgSIl8JFBMiQ==", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - decoder := base64.NewDecoder(base64.StdEncoding, strings.NewReader(tt.contents)) - - got, err := getUtf8Reader(decoder) - require.NoError(t, err) - gotBytes, err := io.ReadAll(got) - require.NoError(t, err) - // if we couldn't decode the section as UTF-8, we should get a replacement character - assert.Contains(t, string(gotBytes), "�") - }) - } -} - func getCommonsTextExpectedPackages() []pkg.Package { return []pkg.Package{ { diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/commons-text-1.10.0/pom.xml b/syft/pkg/cataloger/java/test-fixtures/pom/commons-text-1.10.0/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/commons-text-1.10.0/pom.xml rename to syft/pkg/cataloger/java/test-fixtures/pom/commons-text-1.10.0/pom.xml diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/local/example-java-app-maven/pom.xml b/syft/pkg/cataloger/java/test-fixtures/pom/example-java-app-maven/pom.xml similarity index 100% rename from syft/pkg/cataloger/java/test-fixtures/pom/local/example-java-app-maven/pom.xml rename to syft/pkg/cataloger/java/test-fixtures/pom/example-java-app-maven/pom.xml From f0475b557908aa343035c7e0662080db6033049c Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Tue, 24 Sep 2024 17:50:30 -0400 Subject: [PATCH 2/2] fix: archive parser dependency graph for nested jars, source poms --- syft/pkg/cataloger/java/archive_parser.go | 69 ++++++++++++++--------- syft/pkg/cataloger/java/cataloger.go | 2 +- syft/pkg/cataloger/java/parse_pom_xml.go | 9 +-- 3 files changed, 45 insertions(+), 35 deletions(-) diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index e5ea00bf2d6..93e3d9c33d2 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -67,14 +67,19 @@ func newGenericArchiveParserAdapter(cfg ArchiveCatalogerConfig) genericArchivePa } // parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives. -func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { +func (gap genericArchiveParserAdapter) parseJavaArchiveMain(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + return gap.parseJavaArchive(ctx, reader, nil) +} + +// parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives. +func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, reader file.LocationReadCloser, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg) // note: even on error, we should always run cleanup functions defer cleanupFn() if err != nil { return nil, nil, err } - return parser.parse(ctx) + return parser.parse(ctx, parentPkg) } // uniquePkgKey creates a unique string to identify the given package. @@ -115,27 +120,40 @@ func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg } // parse the loaded archive and return all packages found. -func (j *archiveParser) parse(ctx context.Context) ([]pkg.Package, []artifact.Relationship, error) { +func (j *archiveParser) parse(ctx context.Context, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { var pkgs []pkg.Package var relationships []artifact.Relationship // find the parent package from the java manifest - parentPkg, err := j.discoverMainPackage(ctx) + mainPkg, err := j.discoverMainPackage(ctx) if err != nil { return nil, nil, fmt.Errorf("could not generate package from %s: %w", j.location, err) } // find aux packages from pom.properties/pom.xml and potentially modify the existing parentPkg // NOTE: we cannot generate sha1 digests from packages discovered via pom.properties/pom.xml - auxPkgs, err := j.discoverPkgsFromAllMavenFiles(ctx, parentPkg) + auxPkgs, err := j.discoverPkgsFromAllMavenFiles(ctx, mainPkg) if err != nil { return nil, nil, err } pkgs = append(pkgs, auxPkgs...) + if mainPkg != nil { + finalizePackage(mainPkg) + pkgs = append(pkgs, *mainPkg) + + if parentPkg != nil { + relationships = append(relationships, artifact.Relationship{ + From: *mainPkg, + To: *parentPkg, + Type: artifact.DependencyOfRelationship, + }) + } + } + if j.detectNested { // find nested java archive packages - nestedPkgs, nestedRelationships, err := j.discoverPkgsFromNestedArchives(ctx, parentPkg) + nestedPkgs, nestedRelationships, err := j.discoverPkgsFromNestedArchives(ctx, mainPkg) if err != nil { return nil, nil, err } @@ -143,30 +161,29 @@ func (j *archiveParser) parse(ctx context.Context) ([]pkg.Package, []artifact.Re relationships = append(relationships, nestedRelationships...) } - // lastly, add the parent package to the list (assuming the parent exists) - if parentPkg != nil { - pkgs = append([]pkg.Package{*parentPkg}, pkgs...) - } - // add pURLs to all packages found // note: since package information may change after initial creation when parsing multiple locations within the // jar, we wait until the conclusion of the parsing process before synthesizing pURLs. for i := range pkgs { - p := &pkgs[i] - if m, ok := p.Metadata.(pkg.JavaArchive); ok { - p.PURL = packageURL(p.Name, p.Version, m) + finalizePackage(&pkgs[i]) + } + return pkgs, relationships, nil +} - if strings.Contains(p.PURL, "io.jenkins.plugins") || strings.Contains(p.PURL, "org.jenkins-ci.plugins") { - p.Type = pkg.JenkinsPluginPkg - } - } else { - log.WithFields("package", p.String()).Warn("unable to extract java metadata to generate purl") - } +// finalizePackage sets the PURL, and performs some checks to determine if the package should be +// classified as a Jenkins plugin, updates some information and calls p.SetID() +func finalizePackage(p *pkg.Package) { + if m, ok := p.Metadata.(pkg.JavaArchive); ok { + p.PURL = packageURL(p.Name, p.Version, m) - p.SetID() + if strings.Contains(p.PURL, "io.jenkins.plugins") || strings.Contains(p.PURL, "org.jenkins-ci.plugins") { + p.Type = pkg.JenkinsPluginPkg + } + } else { + log.WithFields("package", p.String()).Warn("unable to extract java metadata to generate purl") } - return pkgs, relationships, nil + p.SetID() } // discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages. @@ -499,7 +516,7 @@ func discoverPkgsFromOpeners(ctx context.Context, location file.Location, opener var relationships []artifact.Relationship for pathWithinArchive, archiveOpener := range openers { - nestedPkgs, nestedRelationships, err := discoverPkgsFromOpener(ctx, location, pathWithinArchive, archiveOpener, cfg) + nestedPkgs, nestedRelationships, err := discoverPkgsFromOpener(ctx, location, pathWithinArchive, archiveOpener, cfg, parentPkg) if err != nil { log.WithFields("location", location.Path()).Warnf("unable to discover java packages from opener: %+v", err) continue @@ -523,7 +540,7 @@ func discoverPkgsFromOpeners(ctx context.Context, location file.Location, opener } // discoverPkgsFromOpener finds Java archives within the given file. -func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWithinArchive string, archiveOpener intFile.Opener, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { +func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWithinArchive string, archiveOpener intFile.Opener, cfg ArchiveCatalogerConfig, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { archiveReadCloser, err := archiveOpener.Open() if err != nil { return nil, nil, fmt.Errorf("unable to open archived file from tempdir: %w", err) @@ -538,10 +555,10 @@ func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWit nestedLocation := file.NewLocationFromCoordinates(location.Coordinates) nestedLocation.AccessPath = nestedPath gap := newGenericArchiveParserAdapter(cfg) - nestedPkgs, nestedRelationships, err := gap.parseJavaArchive(ctx, nil, nil, file.LocationReadCloser{ + nestedPkgs, nestedRelationships, err := gap.parseJavaArchive(ctx, file.LocationReadCloser{ Location: nestedLocation, ReadCloser: archiveReadCloser, - }) + }, parentPkg) if err != nil { return nil, nil, fmt.Errorf("unable to process nested java archive (%s): %w", pathWithinArchive, err) } diff --git a/syft/pkg/cataloger/java/cataloger.go b/syft/pkg/cataloger/java/cataloger.go index 11e48b7f5ad..50e245097a2 100644 --- a/syft/pkg/cataloger/java/cataloger.go +++ b/syft/pkg/cataloger/java/cataloger.go @@ -13,7 +13,7 @@ func NewArchiveCataloger(cfg ArchiveCatalogerConfig) pkg.Cataloger { gap := newGenericArchiveParserAdapter(cfg) c := generic.NewCataloger("java-archive-cataloger"). - WithParserByGlobs(gap.parseJavaArchive, archiveFormatGlobs...) + WithParserByGlobs(gap.parseJavaArchiveMain, archiveFormatGlobs...) if cfg.IncludeIndexedArchives { // java archives wrapped within zip files diff --git a/syft/pkg/cataloger/java/parse_pom_xml.go b/syft/pkg/cataloger/java/parse_pom_xml.go index b83347a1e77..5deac07587a 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml.go +++ b/syft/pkg/cataloger/java/parse_pom_xml.go @@ -62,14 +62,6 @@ func (p pomXMLCataloger) Catalog(ctx context.Context, fileResolver file.Resolver pkgs = append(pkgs, newPkgs...) relationships = append(relationships, newRelationships...) errs = append(errs, newErrs...) - // add dependency-of relationships from the dependencies to the main pkg - for _, newPkg := range newPkgs { - relationships = append(relationships, artifact.Relationship{ - From: mainPkg, - To: newPkg, - Type: artifact.DependencyOfRelationship, - }) - } } return pkgs, relationships, errors.Join(errs...) } @@ -107,6 +99,7 @@ func collectDependencies(ctx context.Context, r *maven.Resolver, parentPkg *pkg. continue } pkgs = append(pkgs, *p) + if parentPkg != nil { relationships = append(relationships, artifact.Relationship{ From: *p,