-
Notifications
You must be signed in to change notification settings - Fork 560
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduce indexed embedded CPE dictionary
Signed-off-by: Dan Luhring <[email protected]>
- Loading branch information
Showing
12 changed files
with
25,339 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,3 +42,4 @@ bin/ | |
# attestation | ||
cosign.key | ||
cosign.pub | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
cpe-index.json |
228 changes: 228 additions & 0 deletions
228
syft/pkg/cataloger/common/cpe/dictionary/index-generator/generate.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,228 @@ | ||
package main | ||
|
||
import ( | ||
"compress/gzip" | ||
"encoding/json" | ||
"encoding/xml" | ||
"fmt" | ||
"io" | ||
"log" | ||
"strings" | ||
|
||
"github.com/facebookincubator/nvdtools/wfn" | ||
"golang.org/x/exp/slices" | ||
|
||
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary" | ||
) | ||
|
||
func generateIndexedDictionaryJSON(rawGzipData io.Reader) ([]byte, error) { | ||
gzipReader, err := gzip.NewReader(rawGzipData) | ||
if err != nil { | ||
return nil, fmt.Errorf("unable to decompress CPE dictionary: %w", err) | ||
} | ||
defer gzipReader.Close() | ||
|
||
// Read XML data | ||
data, err := io.ReadAll(gzipReader) | ||
if err != nil { | ||
return nil, fmt.Errorf("unable to read CPE dictionary: %w", err) | ||
} | ||
|
||
// Unmarshal XML | ||
var cpeList CpeList | ||
if err := xml.Unmarshal(data, &cpeList); err != nil { | ||
return nil, fmt.Errorf("unable to unmarshal CPE dictionary XML: %w", err) | ||
} | ||
|
||
// Filter out data that's not applicable here | ||
cpeList = filterCpeList(cpeList) | ||
|
||
// Create indexed dictionary to help with looking up CPEs | ||
indexedDictionary := indexCPEList(cpeList) | ||
|
||
// Convert to JSON | ||
jsonData, err := json.Marshal(indexedDictionary) | ||
if err != nil { | ||
return nil, fmt.Errorf("unable to marshal CPE dictionary to JSON: %w", err) | ||
} | ||
return jsonData, nil | ||
} | ||
|
||
// filterCpeList removes CPE items that are not applicable to software packages. | ||
func filterCpeList(cpeList CpeList) CpeList { | ||
var processedCpeList CpeList | ||
|
||
seen := make(map[string]struct{}) | ||
|
||
for _, cpeItem := range cpeList.CpeItems { | ||
// Skip CPE items that don't have any references. | ||
if len(cpeItem.References) == 0 { | ||
continue | ||
} | ||
|
||
// Skip CPE items where the CPE URI doesn't meet our criteria. | ||
parsedName, err := wfn.Parse(cpeItem.Name) | ||
if err != nil { | ||
log.Printf("unable to parse CPE URI %q: %s", cpeItem.Name, err) | ||
} | ||
|
||
if slices.Contains([]string{"h", "o"}, parsedName.Part) { | ||
continue | ||
} | ||
|
||
normalizedName := normalizeCPE(parsedName).BindToURI() | ||
if _, ok := seen[normalizedName]; ok { | ||
continue | ||
} | ||
seen[normalizedName] = struct{}{} | ||
cpeItem.Name = normalizedName | ||
|
||
parsedCPE, err := wfn.Parse(cpeItem.Cpe23Item.Name) | ||
if err != nil { | ||
log.Printf("unable to parse CPE value %q: %s", cpeItem.Cpe23Item.Name, err) | ||
} | ||
|
||
cpeItem.Cpe23Item.Name = normalizeCPE(parsedCPE).BindToFmtString() | ||
|
||
processedCpeList.CpeItems = append(processedCpeList.CpeItems, cpeItem) | ||
} | ||
|
||
return processedCpeList | ||
} | ||
|
||
// normalizeCPE removes the version and update parts of a CPE. | ||
func normalizeCPE(cpe *wfn.Attributes) *wfn.Attributes { | ||
cpeCopy := *cpe | ||
|
||
cpeCopy.Version = "" | ||
cpeCopy.Update = "" | ||
|
||
return &cpeCopy | ||
} | ||
|
||
const ( | ||
prefixForNPMPackages = "https://www.npmjs.com/package/" | ||
prefixForRubyGems = "https://rubygems.org/gems/" | ||
prefixForRubyGemsHTTP = "http://rubygems.org/gems/" | ||
prefixForNativeRubyGems = "https://github.com/ruby/" | ||
prefixForPyPIPackages = "https://pypi.org/project/" | ||
prefixForJenkinsPlugins = "https://github.com/jenkinsci/" | ||
prefixForRustCrates = "https://crates.io/crates/" | ||
) | ||
|
||
// indexCPEList creates an index of CPEs by ecosystem. | ||
func indexCPEList(list CpeList) *dictionary.Indexed { | ||
indexed := &dictionary.Indexed{ | ||
EcosystemPackages: make(map[string]dictionary.Packages), | ||
} | ||
|
||
for _, cpeItem := range list.CpeItems { | ||
for _, reference := range cpeItem.References { | ||
ref := reference.Reference.Href | ||
|
||
switch { | ||
case strings.HasPrefix(ref, prefixForNPMPackages): | ||
addEntryForNPMPackage(indexed, ref, cpeItem) | ||
|
||
case strings.HasPrefix(ref, prefixForRubyGems), strings.HasPrefix(ref, prefixForRubyGemsHTTP): | ||
addEntryForRubyGem(indexed, ref, cpeItem) | ||
|
||
case strings.HasPrefix(ref, prefixForNativeRubyGems): | ||
addEntryForNativeRubyGem(indexed, ref, cpeItem) | ||
|
||
case strings.HasPrefix(ref, prefixForPyPIPackages): | ||
addEntryForPyPIPackage(indexed, ref, cpeItem) | ||
|
||
case strings.HasPrefix(ref, prefixForJenkinsPlugins): | ||
// It _might_ be a jenkins plugin! | ||
addEntryForJenkinsPlugin(indexed, ref, cpeItem) | ||
|
||
case strings.HasPrefix(ref, prefixForRustCrates): | ||
addEntryForRustCrate(indexed, ref, cpeItem) | ||
} | ||
} | ||
} | ||
|
||
return indexed | ||
} | ||
|
||
func addEntryForRustCrate(indexed *dictionary.Indexed, ref string, cpeItem CpeItem) { | ||
// Prune off the non-package-name parts of the URL | ||
ref = strings.TrimPrefix(ref, prefixForRustCrates) | ||
ref = strings.Split(ref, "/")[0] | ||
|
||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRustCrates]; !ok { | ||
indexed.EcosystemPackages[dictionary.EcosystemRustCrates] = make(dictionary.Packages) | ||
} | ||
|
||
indexed.EcosystemPackages[dictionary.EcosystemRustCrates][ref] = cpeItem.Cpe23Item.Name | ||
} | ||
|
||
func addEntryForJenkinsPlugin(indexed *dictionary.Indexed, ref string, cpeItem CpeItem) { | ||
// Prune off the non-package-name parts of the URL | ||
ref = strings.TrimPrefix(ref, prefixForJenkinsPlugins) | ||
ref = strings.Split(ref, "/")[0] | ||
|
||
if !strings.HasSuffix(ref, "-plugin") { | ||
// It's not a jenkins plugin! | ||
return | ||
} | ||
|
||
ref = strings.TrimSuffix(ref, "-plugin") | ||
|
||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins]; !ok { | ||
indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins] = make(dictionary.Packages) | ||
} | ||
|
||
indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins][ref] = cpeItem.Cpe23Item.Name | ||
} | ||
|
||
func addEntryForPyPIPackage(indexed *dictionary.Indexed, ref string, cpeItem CpeItem) { | ||
// Prune off the non-package-name parts of the URL | ||
ref = strings.TrimPrefix(ref, prefixForPyPIPackages) | ||
ref = strings.Split(ref, "/")[0] | ||
|
||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemPyPI]; !ok { | ||
indexed.EcosystemPackages[dictionary.EcosystemPyPI] = make(dictionary.Packages) | ||
} | ||
|
||
indexed.EcosystemPackages[dictionary.EcosystemPyPI][ref] = cpeItem.Cpe23Item.Name | ||
} | ||
|
||
func addEntryForNativeRubyGem(indexed *dictionary.Indexed, ref string, cpeItem CpeItem) { | ||
// Prune off the non-package-name parts of the URL | ||
ref = strings.TrimPrefix(ref, prefixForNativeRubyGems) | ||
ref = strings.Split(ref, "/")[0] | ||
|
||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok { | ||
indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages) | ||
} | ||
|
||
indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItem.Cpe23Item.Name | ||
} | ||
|
||
func addEntryForRubyGem(indexed *dictionary.Indexed, ref string, cpeItem CpeItem) { | ||
// Prune off the non-package-name parts of the URL | ||
ref = strings.TrimPrefix(ref, prefixForRubyGems) | ||
ref = strings.TrimPrefix(ref, prefixForRubyGemsHTTP) | ||
ref = strings.Split(ref, "/")[0] | ||
|
||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok { | ||
indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages) | ||
} | ||
|
||
indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItem.Cpe23Item.Name | ||
} | ||
|
||
func addEntryForNPMPackage(indexed *dictionary.Indexed, ref string, cpeItem CpeItem) { | ||
// Prune off the non-package-name parts of the URL | ||
ref = strings.Split(ref, "/v/")[0] | ||
ref = strings.Split(ref, "?")[0] | ||
ref = strings.TrimPrefix(ref, prefixForNPMPackages) | ||
|
||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemNPM]; !ok { | ||
indexed.EcosystemPackages[dictionary.EcosystemNPM] = make(dictionary.Packages) | ||
} | ||
|
||
indexed.EcosystemPackages[dictionary.EcosystemNPM][ref] = cpeItem.Cpe23Item.Name | ||
} |
42 changes: 42 additions & 0 deletions
42
syft/pkg/cataloger/common/cpe/dictionary/index-generator/generate_test.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
package main | ||
|
||
import ( | ||
"bytes" | ||
"compress/gzip" | ||
"io" | ||
"os" | ||
"testing" | ||
|
||
"github.com/google/go-cmp/cmp" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func Test_generateIndexedDictionaryJSON(t *testing.T) { | ||
f, err := os.Open("testdata/official-cpe-dictionary_v2.3.xml") | ||
require.NoError(t, err) | ||
|
||
// Create a buffer to store the gzipped data in memory | ||
buf := new(bytes.Buffer) | ||
|
||
w := gzip.NewWriter(buf) | ||
_, err = io.Copy(w, f) | ||
require.NoError(t, err) | ||
|
||
// (finalize the gzip stream) | ||
err = w.Close() | ||
require.NoError(t, err) | ||
|
||
dictionaryJSON, err := generateIndexedDictionaryJSON(buf) | ||
assert.NoError(t, err) | ||
|
||
expected, err := os.ReadFile("./testdata/expected-cpe-index.json") | ||
require.NoError(t, err) | ||
|
||
expectedDictionaryJSONString := string(expected) | ||
dictionaryJSONString := string(dictionaryJSON) | ||
|
||
if diff := cmp.Diff(expectedDictionaryJSONString, dictionaryJSONString); diff != "" { | ||
t.Errorf("generateIndexedDictionaryJSON() mismatch (-want +got):\n%s", diff) | ||
} | ||
} |
Oops, something went wrong.