Skip to content

Commit

Permalink
Parse the Maven license from the pom.xml if not contained in the mani… (
Browse files Browse the repository at this point in the history
#2115)

* Parse the Maven license from the pom.xml if not contained in the manifest

Signed-off-by: Colm O hEigeartaigh <[email protected]>

* chore: restore 10.0.2 schema

Signed-off-by: Christopher Phillips <[email protected]>

* chore: generate new 11.0.1 schema

Signed-off-by: Christopher Phillips <[email protected]>

* refactor: remove schema change

Signed-off-by: Christopher Phillips <[email protected]>

* test: update unit tests to align with new pattern

Signed-off-by: Christopher Phillips <[email protected]>

* chore: pr feedback

Signed-off-by: Christopher Phillips <[email protected]>

* chore: remove struct tags

Signed-off-by: Christopher Phillips <[email protected]>

* keep license name and url semantics preserved on the pkg object

Signed-off-by: Alex Goodman <[email protected]>

---------

Signed-off-by: Colm O hEigeartaigh <[email protected]>
Signed-off-by: Christopher Phillips <[email protected]>
Signed-off-by: Alex Goodman <[email protected]>
Co-authored-by: Christopher Angelo Phillips <[email protected]>
Co-authored-by: Christopher Phillips <[email protected]>
Co-authored-by: Alex Goodman <[email protected]>
  • Loading branch information
4 people authored Oct 10, 2023
1 parent 185d0d1 commit 0748945
Show file tree
Hide file tree
Showing 6 changed files with 692 additions and 83 deletions.
77 changes: 54 additions & 23 deletions syft/pkg/cataloger/java/archive_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,19 +171,14 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
return nil, nil
}

archiveCloser, err := os.Open(j.archivePath)
if err != nil {
return nil, fmt.Errorf("unable to open archive path (%s): %w", j.archivePath, err)
}
defer archiveCloser.Close()

// grab and assign digest for the entire archive
digests, err := intFile.NewDigestsFromFile(archiveCloser, javaArchiveHashes)
digests, err := getDigestsFromArchive(j.archivePath)
if err != nil {
log.Warnf("failed to create digest for file=%q: %+v", j.archivePath, err)
return nil, err
}

// we use j.location because we want to associate the license declaration with where we discovered the contents in the manifest
// TODO: when we support locations of paths within archives we should start passing the specific manifest location object instead of the top jar
licenses := pkg.NewLicensesFromLocation(j.location, selectLicenses(manifest)...)
/*
We should name and version from, in this order:
Expand All @@ -192,13 +187,20 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
3. manifest
4. filename
*/
name, version := j.guessMainPackageNameAndVersionFromPomInfo()
name, version, pomLicenses := j.guessMainPackageNameAndVersionFromPomInfo()
if name == "" {
name = selectName(manifest, j.fileInfo)
}
if version == "" {
version = selectVersion(manifest, j.fileInfo)
}
if len(licenses) == 0 {
// Today we don't have a way to distinguish between licenses from the manifest and licenses from the pom.xml
// until the file.Location object can support sub-paths (i.e. paths within archives, recursively; issue https://github.com/anchore/syft/issues/2211).
// Until then it's less confusing to use the licenses from the pom.xml only if the manifest did not list any.
licenses = append(licenses, pomLicenses...)
}

return &pkg.Package{
// TODO: maybe select name should just have a pom properties in it?
Name: name,
Expand All @@ -218,11 +220,16 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
}, nil
}

func (j *archiveParser) guessMainPackageNameAndVersionFromPomInfo() (string, string) {
type parsedPomProject struct {
*pkg.PomProject
Licenses []pkg.License
}

func (j *archiveParser) guessMainPackageNameAndVersionFromPomInfo() (name, version string, licenses []pkg.License) {
pomPropertyMatches := j.fileManifest.GlobMatch(pomPropertiesGlob)
pomMatches := j.fileManifest.GlobMatch(pomXMLGlob)
var pomPropertiesObject pkg.PomProperties
var pomProjectObject pkg.PomProject
var pomProjectObject parsedPomProject
if len(pomPropertyMatches) == 1 || len(pomMatches) == 1 {
// we have exactly 1 pom.properties or pom.xml in the archive; assume it represents the
// package we're scanning if the names seem like a plausible match
Expand All @@ -238,15 +245,15 @@ func (j *archiveParser) guessMainPackageNameAndVersionFromPomInfo() (string, str
}
}
}
name := pomPropertiesObject.ArtifactID
if name == "" {
name = pomPropertiesObject.ArtifactID
if name == "" && pomProjectObject.PomProject != nil {
name = pomProjectObject.ArtifactID
}
version := pomPropertiesObject.Version
if version == "" {
version = pomPropertiesObject.Version
if version == "" && pomProjectObject.PomProject != nil {
version = pomProjectObject.Version
}
return name, version
return name, version, pomProjectObject.Licenses
}

// discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given
Expand All @@ -273,7 +280,7 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([
}

for parentPath, propertiesObj := range properties {
var pomProject *pkg.PomProject
var pomProject *parsedPomProject
if proj, exists := projects[parentPath]; exists {
pomProject = &proj
}
Expand All @@ -287,6 +294,22 @@ func (j *archiveParser) discoverPkgsFromAllMavenFiles(parentPkg *pkg.Package) ([
return pkgs, nil
}

func getDigestsFromArchive(archivePath string) ([]file.Digest, error) {
archiveCloser, err := os.Open(archivePath)
if err != nil {
return nil, fmt.Errorf("unable to open archive path (%s): %w", archivePath, err)
}
defer archiveCloser.Close()

// grab and assign digest for the entire archive
digests, err := intFile.NewDigestsFromFile(archiveCloser, javaArchiveHashes)
if err != nil {
log.Warnf("failed to create digest for file=%q: %+v", archivePath, err)
}

return digests, nil
}

func (j *archiveParser) discoverPkgsFromNestedArchives(parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
// we know that all java archives are zip formatted files, so we can use the shared zip helper
return discoverPkgsFromZip(j.location, j.archivePath, j.contentPath, j.fileManifest, parentPkg)
Expand Down Expand Up @@ -388,15 +411,16 @@ func pomPropertiesByParentPath(archivePath string, location file.Location, extra
return propertiesByParentPath, nil
}

func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]pkg.PomProject, error) {
func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]parsedPomProject, error) {
contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...)
if err != nil {
return nil, fmt.Errorf("unable to extract maven files: %w", err)
}

projectByParentPath := make(map[string]pkg.PomProject)
projectByParentPath := make(map[string]parsedPomProject)
for filePath, fileContents := range contentsOfMavenProjectFiles {
pomProject, err := parsePomXMLProject(filePath, strings.NewReader(fileContents))
// TODO: when we support locations of paths within archives we should start passing the specific pom.xml location object instead of the top jar
pomProject, err := parsePomXMLProject(filePath, strings.NewReader(fileContents), location)
if err != nil {
log.WithFields("contents-path", filePath, "location", location.AccessPath()).Warnf("failed to parse pom.xml: %+v", err)
continue
Expand All @@ -418,7 +442,7 @@ func pomProjectByParentPath(archivePath string, location file.Location, extractP

// newPackageFromMavenData processes a single Maven POM properties for a given parent package, returning all listed Java packages found and
// associating each discovered package to the given parent package. Note the pom.xml is optional, the pom.properties is not.
func newPackageFromMavenData(pomProperties pkg.PomProperties, pomProject *pkg.PomProject, parentPkg *pkg.Package, location file.Location) *pkg.Package {
func newPackageFromMavenData(pomProperties pkg.PomProperties, parsedPomProject *parsedPomProject, parentPkg *pkg.Package, location file.Location) *pkg.Package {
// keep the artifact name within the virtual path if this package does not match the parent package
vPathSuffix := ""
groupID := ""
Expand All @@ -440,20 +464,27 @@ func newPackageFromMavenData(pomProperties pkg.PomProperties, pomProject *pkg.Po
}
virtualPath := location.AccessPath() + vPathSuffix

// discovered props = new package
var pkgPomProject *pkg.PomProject
licenses := make([]pkg.License, 0)
if parsedPomProject != nil {
pkgPomProject = parsedPomProject.PomProject
licenses = append(licenses, parsedPomProject.Licenses...)
}

p := pkg.Package{
Name: pomProperties.ArtifactID,
Version: pomProperties.Version,
Locations: file.NewLocationSet(
location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
),
Licenses: pkg.NewLicenseSet(licenses...),
Language: pkg.Java,
Type: pomProperties.PkgTypeIndicated(),
MetadataType: pkg.JavaMetadataType,
Metadata: pkg.JavaMetadata{
VirtualPath: virtualPath,
PomProperties: &pomProperties,
PomProject: pomProject,
PomProject: pkgPomProject,
Parent: parentPkg,
},
}
Expand Down
80 changes: 62 additions & 18 deletions syft/pkg/cataloger/java/archive_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"io"
"os"
"os/exec"
"path"
"path/filepath"
"strings"
"syscall"
Expand All @@ -17,6 +16,7 @@ import (

"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/license"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
)
Expand Down Expand Up @@ -83,11 +83,13 @@ func generateJavaBuildFixture(t *testing.T, fixturePath string) {

func TestParseJar(t *testing.T) {
tests := []struct {
name string
fixture string
expected map[string]pkg.Package
ignoreExtras []string
}{
{
name: "example-jenkins-plugin",
fixture: "test-fixtures/java-builds/packages/example-jenkins-plugin.hpi",
ignoreExtras: []string{
"Plugin-Version", // has dynamic date
Expand Down Expand Up @@ -146,6 +148,7 @@ func TestParseJar(t *testing.T) {
},
},
{
name: "example-java-app-gradle",
fixture: "test-fixtures/java-builds/packages/example-java-app-gradle-0.1.0.jar",
expected: map[string]pkg.Package{
"example-java-app-gradle": {
Expand All @@ -172,6 +175,16 @@ func TestParseJar(t *testing.T) {
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Licenses: pkg.NewLicenseSet(
pkg.NewLicenseFromFields(
"Apache 2",
"http://www.apache.org/licenses/LICENSE-2.0.txt",
func() *file.Location {
l := file.NewLocation("test-fixtures/java-builds/packages/example-java-app-gradle-0.1.0.jar")
return &l
}(),
),
),
Metadata: pkg.JavaMetadata{
// ensure that nested packages with different names than that of the parent are appended as
// a suffix on the virtual path with a colon separator between group name and artifact name
Expand All @@ -196,6 +209,7 @@ func TestParseJar(t *testing.T) {
},
},
{
name: "example-java-app-maven",
fixture: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.jar",
ignoreExtras: []string{
"Build-Jdk", // can't guarantee the JDK used at build time
Expand Down Expand Up @@ -231,9 +245,19 @@ func TestParseJar(t *testing.T) {
},
},
"joda-time": {
Name: "joda-time",
Version: "2.9.2",
PURL: "pkg:maven/joda-time/[email protected]",
Name: "joda-time",
Version: "2.9.2",
PURL: "pkg:maven/joda-time/[email protected]",
Licenses: pkg.NewLicenseSet(
pkg.NewLicenseFromFields(
"Apache 2",
"http://www.apache.org/licenses/LICENSE-2.0.txt",
func() *file.Location {
l := file.NewLocation("test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.jar")
return &l
}(),
),
),
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Expand Down Expand Up @@ -263,7 +287,7 @@ func TestParseJar(t *testing.T) {
}

for _, test := range tests {
t.Run(path.Base(test.fixture), func(t *testing.T) {
t.Run(test.name, func(t *testing.T) {

generateJavaBuildFixture(t, test.fixture)

Expand Down Expand Up @@ -618,7 +642,7 @@ func Test_newPackageFromMavenData(t *testing.T) {
tests := []struct {
name string
props pkg.PomProperties
project *pkg.PomProject
project *parsedPomProject
parent *pkg.Package
expectedParent pkg.Package
expectedPackage *pkg.Package
Expand Down Expand Up @@ -687,18 +711,29 @@ func Test_newPackageFromMavenData(t *testing.T) {
ArtifactID: "some-artifact-id",
Version: "1.0",
},
project: &pkg.PomProject{
Parent: &pkg.PomParent{
GroupID: "some-parent-group-id",
ArtifactID: "some-parent-artifact-id",
Version: "1.0-parent",
},
Name: "some-name",
GroupID: "some-group-id",
ArtifactID: "some-artifact-id",
Version: "1.0",
Description: "desc",
URL: "aweso.me",
project: &parsedPomProject{
PomProject: &pkg.PomProject{
Parent: &pkg.PomParent{
GroupID: "some-parent-group-id",
ArtifactID: "some-parent-artifact-id",
Version: "1.0-parent",
},
Name: "some-name",
GroupID: "some-group-id",
ArtifactID: "some-artifact-id",
Version: "1.0",
Description: "desc",
URL: "aweso.me",
},
Licenses: []pkg.License{
{
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: internal.NewStringSet("https://opensource.org/licenses/MIT"),
Locations: file.NewLocationSet(file.NewLocation("some-license-path")),
},
},
},
parent: &pkg.Package{
Name: "some-parent-name",
Expand Down Expand Up @@ -727,6 +762,15 @@ func Test_newPackageFromMavenData(t *testing.T) {
Language: pkg.Java,
Type: pkg.JavaPkg,
MetadataType: pkg.JavaMetadataType,
Licenses: pkg.NewLicenseSet(
pkg.License{
Value: "MIT",
SPDXExpression: "MIT",
Type: license.Declared,
URLs: internal.NewStringSet("https://opensource.org/licenses/MIT"),
Locations: file.NewLocationSet(file.NewLocation("some-license-path")),
},
),
Metadata: pkg.JavaMetadata{
VirtualPath: virtualPath + ":" + "some-group-id" + ":" + "some-artifact-id",
PomProperties: &pkg.PomProperties{
Expand Down
Loading

0 comments on commit 0748945

Please sign in to comment.