diff --git a/syft/format/internal/spdxutil/helpers/license.go b/syft/format/internal/spdxutil/helpers/license.go index b01539f2d0c..4121bc2c938 100644 --- a/syft/format/internal/spdxutil/helpers/license.go +++ b/syft/format/internal/spdxutil/helpers/license.go @@ -64,23 +64,16 @@ type SPDXLicense struct { func ParseLicenses(raw []pkg.License) (concluded, declared []SPDXLicense) { for _, l := range raw { - if l.Value == "" { + if l.Empty() { continue } candidate := SPDXLicense{} + // a pkg license can have a couple combinations of values if l.SPDXExpression != "" { candidate.ID = l.SPDXExpression - } else { - // we did not find a valid SPDX license ID so treat as separate license - if len(l.Value) <= 64 { - // if the license text is less than the size of the hash, - // just use it directly so the id is more readable - candidate.ID = spdxlicense.LicenseRefPrefix + SanitizeElementID(l.Value) - } else { - hash := sha256.Sum256([]byte(l.Value)) - candidate.ID = fmt.Sprintf("%s%x", spdxlicense.LicenseRefPrefix, hash) - } + hash := sha256.Sum256([]byte(l.Value)) + candidate.ID = fmt.Sprintf("%s%x", spdxlicense.LicenseRefPrefix, hash) candidate.Value = l.Value } diff --git a/syft/pkg/license.go b/syft/pkg/license.go index 9da0f0aec47..589f3b94646 100644 --- a/syft/pkg/license.go +++ b/syft/pkg/license.go @@ -24,6 +24,7 @@ var _ sort.Interface = (*Licenses)(nil) // in order to distinguish if packages should be kept separate // this is different for licenses since we're only looking for evidence // of where a license was declared/concluded for a given package +// Licenses can either be FullText, A valid SPDXExpression, or the Value passed from the cataloger if none of the above type License struct { Value string SPDXExpression string @@ -33,6 +34,13 @@ type License struct { Locations file.LocationSet `hash:"ignore"` } +func (l License) Empty() bool { + if l.FullText == "" && l.SPDXExpression == "" && l.Value == "" { + return true + } + return false +} + type Licenses []License func (l Licenses) Len() int { @@ -68,25 +76,28 @@ func NewLicense(value string) License { func NewLicenseFromType(value string, t license.Type) License { var spdxExpression string - if value != "" { - // when a metadata field contains a newline this is most likely an indicator - // of a full text license having made it to the constructor - // in this case we annotate this as the full text to not lose value and do not extract the complex case - if strings.Contains(value, "\n") { - return License{ - FullText: value, - } + // when a metadata field contains a newline this is most likely an indicator + // of a full text license having made it to the constructor + // in this case we annotate this as the full text to not lose value and do not extract the complex case + if strings.Contains(value, "\n") { + return License{ + FullText: value, } + } - var err error - spdxExpression, err = license.ParseExpression(value) - if err != nil { - log.WithFields("error", err, "license", value).Trace("unable to parse license expression") + // If we can't find a valid SPDX Expression we just return the value as is + var err error + spdxExpression, err = license.ParseExpression(value) + if err != nil { + log.WithFields("error", err, "license", value).Trace("unable to parse license expression") + return License{ + Value: value, + Type: t, + Locations: file.NewLocationSet(), } } return License{ - Value: value, SPDXExpression: spdxExpression, Type: t, Locations: file.NewLocationSet(), @@ -148,12 +159,12 @@ func NewLicenseFromFields(value, url string, location *file.Location) License { // Merge two licenses into a new license object. If the merge is not possible due to conflicting fields // (e.g. different values for Value, SPDXExpression, Type, or any non-collection type) an error is returned. // TODO: this is a bit of a hack to not infinitely recurse when hashing a license -func (s License) Merge(l License) (*License, error) { - sHash, err := artifact.IDByHash(s) +func (l License) Merge(lic License) (*License, error) { + sHash, err := artifact.IDByHash(l) if err != nil { return nil, err } - lHash, err := artifact.IDByHash(l) + lHash, err := artifact.IDByHash(lic) if err != nil { return nil, err } @@ -162,23 +173,23 @@ func (s License) Merge(l License) (*License, error) { } // try to keep s.URLs unallocated unless necessary (which is the default state from the constructor) - if len(l.URLs) > 0 { - s.URLs = append(s.URLs, l.URLs...) + if len(lic.URLs) > 0 { + l.URLs = append(l.URLs, lic.URLs...) } - if len(s.URLs) > 0 { - s.URLs = strset.New(s.URLs...).List() - sort.Strings(s.URLs) + if len(l.URLs) > 0 { + l.URLs = strset.New(l.URLs...).List() + sort.Strings(l.URLs) } - if l.Locations.Empty() { - return &s, nil + if lic.Locations.Empty() { + return &l, nil } // since the set instance has a reference type (map) we must make a new instance - locations := file.NewLocationSet(s.Locations.ToSlice()...) - locations.Add(l.Locations.ToSlice()...) - s.Locations = locations + locations := file.NewLocationSet(l.Locations.ToSlice()...) + locations.Add(lic.Locations.ToSlice()...) + l.Locations = locations - return &s, nil + return &l, nil } diff --git a/syft/pkg/license_set.go b/syft/pkg/license_set.go index 99593fae2a2..b907213346d 100644 --- a/syft/pkg/license_set.go +++ b/syft/pkg/license_set.go @@ -52,13 +52,15 @@ func (s *LicenseSet) Add(licenses ...License) { for _, l := range licenses { // we only want to add licenses that have a value // note, this check should be moved to the license constructor in the future - if l.Value != "" { - if id, merged, err := s.addToExisting(l); err == nil && !merged { - // doesn't exist, add it - s.set[id] = l - } else if err != nil { - log.Trace("license set failed to add license %#v: %+v", l, err) - } + if l.Value != "" && l.FullText != "" { + continue + } + + if id, merged, err := s.addToExisting(l); err == nil && !merged { + // doesn't exist, add it + s.set[id] = l + } else if err != nil { + log.Trace("license set failed to add license %#v: %+v", l, err) } } }