Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add dependency relationships for Java archives and Maven sources #3273

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 123 additions & 50 deletions syft/pkg/cataloger/java/archive_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"slices"
"strings"

"github.com/vifraa/gopom"
"golang.org/x/exp/maps"

"github.com/anchore/syft/internal"
Expand All @@ -20,6 +19,7 @@ import (
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven"
)

var archiveFormatGlobs = []string{
Expand Down Expand Up @@ -55,7 +55,7 @@ type archiveParser struct {
fileInfo archiveFilename
detectNested bool
cfg ArchiveCatalogerConfig
maven *mavenResolver
maven *maven.Resolver
}

type genericArchiveParserAdapter struct {
Expand All @@ -67,14 +67,19 @@ func newGenericArchiveParserAdapter(cfg ArchiveCatalogerConfig) genericArchivePa
}

// parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives.
func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func (gap genericArchiveParserAdapter) parseJavaArchiveMain(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
return gap.parseJavaArchive(ctx, reader, nil)
}

// parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives.
func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, reader file.LocationReadCloser, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg)
// note: even on error, we should always run cleanup functions
defer cleanupFn()
if err != nil {
return nil, nil, err
}
return parser.parse(ctx)
return parser.parse(ctx, parentPkg)
}

// uniquePkgKey creates a unique string to identify the given package.
Expand Down Expand Up @@ -110,63 +115,75 @@ func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg
fileInfo: newJavaArchiveFilename(currentFilepath),
detectNested: detectNested,
cfg: cfg,
maven: newMavenResolver(nil, cfg),
maven: maven.NewResolver(nil, cfg.mavenConfig()),
}, cleanupFn, nil
}

// parse the loaded archive and return all packages found.
func (j *archiveParser) parse(ctx context.Context) ([]pkg.Package, []artifact.Relationship, error) {
func (j *archiveParser) parse(ctx context.Context, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
var pkgs []pkg.Package
var relationships []artifact.Relationship

// find the parent package from the java manifest
parentPkg, err := j.discoverMainPackage(ctx)
mainPkg, err := j.discoverMainPackage(ctx)
if err != nil {
return nil, nil, fmt.Errorf("could not generate package from %s: %w", j.location, err)
}

// find aux packages from pom.properties/pom.xml and potentially modify the existing parentPkg
// NOTE: we cannot generate sha1 digests from packages discovered via pom.properties/pom.xml
auxPkgs, err := j.discoverPkgsFromAllMavenFiles(ctx, parentPkg)
auxPkgs, err := j.discoverPkgsFromAllMavenFiles(ctx, mainPkg)
if err != nil {
return nil, nil, err
}
pkgs = append(pkgs, auxPkgs...)

if mainPkg != nil {
finalizePackage(mainPkg)
pkgs = append(pkgs, *mainPkg)

if parentPkg != nil {
relationships = append(relationships, artifact.Relationship{
From: *mainPkg,
To: *parentPkg,
Type: artifact.DependencyOfRelationship,
})
}
}

if j.detectNested {
// find nested java archive packages
nestedPkgs, nestedRelationships, err := j.discoverPkgsFromNestedArchives(ctx, parentPkg)
nestedPkgs, nestedRelationships, err := j.discoverPkgsFromNestedArchives(ctx, mainPkg)
if err != nil {
return nil, nil, err
}
pkgs = append(pkgs, nestedPkgs...)
relationships = append(relationships, nestedRelationships...)
}

// lastly, add the parent package to the list (assuming the parent exists)
if parentPkg != nil {
pkgs = append([]pkg.Package{*parentPkg}, pkgs...)
}

// add pURLs to all packages found
// note: since package information may change after initial creation when parsing multiple locations within the
// jar, we wait until the conclusion of the parsing process before synthesizing pURLs.
for i := range pkgs {
p := &pkgs[i]
if m, ok := p.Metadata.(pkg.JavaArchive); ok {
p.PURL = packageURL(p.Name, p.Version, m)
finalizePackage(&pkgs[i])
}
return pkgs, relationships, nil
}

if strings.Contains(p.PURL, "io.jenkins.plugins") || strings.Contains(p.PURL, "org.jenkins-ci.plugins") {
p.Type = pkg.JenkinsPluginPkg
}
} else {
log.WithFields("package", p.String()).Warn("unable to extract java metadata to generate purl")
}
// finalizePackage sets the PURL, and performs some checks to determine if the package should be
// classified as a Jenkins plugin, updates some information and calls p.SetID()
func finalizePackage(p *pkg.Package) {
if m, ok := p.Metadata.(pkg.JavaArchive); ok {
p.PURL = packageURL(p.Name, p.Version, m)

p.SetID()
if strings.Contains(p.PURL, "io.jenkins.plugins") || strings.Contains(p.PURL, "org.jenkins-ci.plugins") {
p.Type = pkg.JenkinsPluginPkg
}
} else {
log.WithFields("package", p.String()).Warn("unable to extract java metadata to generate purl")
}

return pkgs, relationships, nil
p.SetID()
}

// discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages.
Expand Down Expand Up @@ -279,45 +296,56 @@ func (j *archiveParser) findLicenseFromJavaMetadata(ctx context.Context, groupID
}

var err error
var pomLicenses []gopom.License
var pomLicenses []maven.License
if parsedPom != nil {
pomLicenses, err = j.maven.resolveLicenses(ctx, parsedPom.project)
pomLicenses, err = j.maven.ResolveLicenses(ctx, parsedPom.project)
if err != nil {
log.WithFields("error", err, "mavenID", j.maven.getMavenID(ctx, parsedPom.project)).Debug("error attempting to resolve pom licenses")
log.WithFields("error", err, "maven.ID", j.maven.ResolveID(ctx, parsedPom.project)).Debug("error attempting to resolve pom licenses")
}
}

if err == nil && len(pomLicenses) == 0 {
pomLicenses, err = j.maven.findLicenses(ctx, groupID, artifactID, version)
pomLicenses, err = j.maven.FindLicenses(ctx, groupID, artifactID, version)
if err != nil {
log.WithFields("error", err, "mavenID", mavenID{groupID, artifactID, version}).Debug("error attempting to find licenses")
log.WithFields("error", err, "maven.ID", maven.NewID(groupID, artifactID, version)).Debug("error attempting to find licenses")
}
}

if len(pomLicenses) == 0 {
// Try removing the last part of the groupId, as sometimes it duplicates the artifactId
packages := strings.Split(groupID, ".")
groupID = strings.Join(packages[:len(packages)-1], ".")
pomLicenses, err = j.maven.findLicenses(ctx, groupID, artifactID, version)
pomLicenses, err = j.maven.FindLicenses(ctx, groupID, artifactID, version)
if err != nil {
log.WithFields("error", err, "mavenID", mavenID{groupID, artifactID, version}).Debug("error attempting to find sub-group licenses")
log.WithFields("error", err, "maven.ID", maven.NewID(groupID, artifactID, version)).Debug("error attempting to find sub-group licenses")
}
}

return toPkgLicenses(&j.location, pomLicenses)
}

func toPkgLicenses(location *file.Location, licenses []gopom.License) []pkg.License {
func toPkgLicenses(location *file.Location, licenses []maven.License) []pkg.License {
var out []pkg.License
for _, license := range licenses {
out = append(out, pkg.NewLicenseFromFields(deref(license.Name), deref(license.URL), location))
name := ""
if license.Name != nil {
name = *license.Name
}
url := ""
if license.URL != nil {
url = *license.URL
}
if name == "" && url == "" {
continue
}
out = append(out, pkg.NewLicenseFromFields(name, url, location))
}
return out
}

type parsedPomProject struct {
path string
project *gopom.Project
project *maven.Project
}

// discoverMainPackageFromPomInfo attempts to resolve maven groupId, artifactId, version and other info from found pom information
Expand Down Expand Up @@ -352,7 +380,7 @@ func (j *archiveParser) discoverMainPackageFromPomInfo(ctx context.Context) (gro
version = pomProperties.Version

if parsedPom != nil && parsedPom.project != nil {
id := j.maven.getMavenID(ctx, parsedPom.project)
id := j.maven.ResolveID(ctx, parsedPom.project)
if group == "" {
group = id.GroupID
}
Expand Down Expand Up @@ -488,7 +516,7 @@ func discoverPkgsFromOpeners(ctx context.Context, location file.Location, opener
var relationships []artifact.Relationship

for pathWithinArchive, archiveOpener := range openers {
nestedPkgs, nestedRelationships, err := discoverPkgsFromOpener(ctx, location, pathWithinArchive, archiveOpener, cfg)
nestedPkgs, nestedRelationships, err := discoverPkgsFromOpener(ctx, location, pathWithinArchive, archiveOpener, cfg, parentPkg)
if err != nil {
log.WithFields("location", location.Path()).Warnf("unable to discover java packages from opener: %+v", err)
continue
Expand All @@ -512,7 +540,7 @@ func discoverPkgsFromOpeners(ctx context.Context, location file.Location, opener
}

// discoverPkgsFromOpener finds Java archives within the given file.
func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWithinArchive string, archiveOpener intFile.Opener, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWithinArchive string, archiveOpener intFile.Opener, cfg ArchiveCatalogerConfig, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
archiveReadCloser, err := archiveOpener.Open()
if err != nil {
return nil, nil, fmt.Errorf("unable to open archived file from tempdir: %w", err)
Expand All @@ -527,10 +555,10 @@ func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWit
nestedLocation := file.NewLocationFromCoordinates(location.Coordinates)
nestedLocation.AccessPath = nestedPath
gap := newGenericArchiveParserAdapter(cfg)
nestedPkgs, nestedRelationships, err := gap.parseJavaArchive(ctx, nil, nil, file.LocationReadCloser{
nestedPkgs, nestedRelationships, err := gap.parseJavaArchive(ctx, file.LocationReadCloser{
Location: nestedLocation,
ReadCloser: archiveReadCloser,
})
}, parentPkg)
if err != nil {
return nil, nil, fmt.Errorf("unable to process nested java archive (%s): %w", pathWithinArchive, err)
}
Expand Down Expand Up @@ -576,7 +604,7 @@ func pomProjectByParentPath(archivePath string, location file.Location, extractP
projectByParentPath := make(map[string]*parsedPomProject)
for filePath, fileContents := range contentsOfMavenProjectFiles {
// TODO: when we support locations of paths within archives we should start passing the specific pom.xml location object instead of the top jar
pom, err := decodePomXML(strings.NewReader(fileContents))
pom, err := maven.ParsePomXML(strings.NewReader(fileContents))
if err != nil {
log.WithFields("contents-path", filePath, "location", location.Path()).Warnf("failed to parse pom.xml: %+v", err)
continue
Expand All @@ -593,9 +621,57 @@ func pomProjectByParentPath(archivePath string, location file.Location, extractP
return projectByParentPath, nil
}

// newPackageFromMavenPom processes a single Maven POM for a given parent package, returning only the main package from the pom
func newPackageFromMavenPom(ctx context.Context, r *maven.Resolver, pom *maven.Project, location file.Location) *pkg.Package {
id := r.ResolveID(ctx, pom)
parent, err := r.ResolveParent(ctx, pom)
if err != nil {
// this is expected in many cases, there will be no network access and the maven resolver is unable to
// look up information, so we can continue with what little information we have
log.Trace("unable to resolve parent due to: %v", err)
}

var javaPomParent *pkg.JavaPomParent
if parent != nil {
parentID := r.ResolveID(ctx, parent)
javaPomParent = &pkg.JavaPomParent{
GroupID: parentID.GroupID,
ArtifactID: parentID.ArtifactID,
Version: parentID.Version,
}
}

pomLicenses, err := r.ResolveLicenses(ctx, pom)
if err != nil {
log.Tracef("error resolving licenses: %v", err)
}
licenses := toPkgLicenses(&location, pomLicenses)

p := pkg.Package{
Name: id.ArtifactID,
Version: id.Version,
Locations: file.NewLocationSet(
location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
),
Licenses: pkg.NewLicenseSet(licenses...),
Language: pkg.Java,
Type: pkg.JavaPkg, // FIXME this is not necessarily accurate
Metadata: pkg.JavaPomProject{
Parent: javaPomParent,
GroupID: id.GroupID,
ArtifactID: id.ArtifactID,
Version: id.Version,
Name: r.ResolveProperty(ctx, pom.Name, pom),
Description: r.ResolveProperty(ctx, pom.Description, pom),
URL: r.ResolveProperty(ctx, pom.URL, pom),
},
}
return &p
}

// newPackageFromMavenData processes a single Maven POM properties for a given parent package, returning all listed Java packages found and
// associating each discovered package to the given parent package. Note the pom.xml is optional, the pom.properties is not.
func newPackageFromMavenData(ctx context.Context, r *mavenResolver, pomProperties pkg.JavaPomProperties, parsedPom *parsedPomProject, parentPkg *pkg.Package, location file.Location) *pkg.Package {
func newPackageFromMavenData(ctx context.Context, r *maven.Resolver, pomProperties pkg.JavaPomProperties, parsedPom *parsedPomProject, parentPkg *pkg.Package, location file.Location) *pkg.Package {
// keep the artifact name within the virtual path if this package does not match the parent package
vPathSuffix := ""
groupID := ""
Expand All @@ -620,23 +696,20 @@ func newPackageFromMavenData(ctx context.Context, r *mavenResolver, pomPropertie
var pkgPomProject *pkg.JavaPomProject

var err error
var pomLicenses []gopom.License
var pomLicenses []maven.License
if parsedPom == nil {
// If we have no pom.xml, check maven central using pom.properties
pomLicenses, err = r.findLicenses(ctx, pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version)
pomLicenses, err = r.FindLicenses(ctx, pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version)
} else {
pkgPomProject = newPomProject(ctx, r, parsedPom.path, parsedPom.project)
pomLicenses, err = r.resolveLicenses(ctx, parsedPom.project)
pomLicenses, err = r.ResolveLicenses(ctx, parsedPom.project)
}

if err != nil {
log.WithFields("error", err, "mavenID", mavenID{pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version}).Debug("error attempting to resolve licenses")
log.WithFields("error", err, "maven.ID", maven.NewID(pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version)).Debug("error attempting to resolve licenses")
}

licenses := make([]pkg.License, 0)
for _, license := range pomLicenses {
licenses = append(licenses, pkg.NewLicenseFromFields(deref(license.Name), deref(license.URL), &location))
}
licenses := toPkgLicenses(&location, pomLicenses)

p := pkg.Package{
Name: pomProperties.ArtifactID,
Expand Down
Loading
Loading