forked from anchore/syft
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduce indexed embedded CPE dictionary (anchore#1897)
* Introduce indexed embedded CPE dictionary Signed-off-by: Dan Luhring <[email protected]> * Don't generate cpe-index on make snapshot Signed-off-by: Dan Luhring <[email protected]> * Add unit tests for individual addEntry funcs Signed-off-by: Dan Luhring <[email protected]> * migrate CPE index build to go generate and add periodic workflow Signed-off-by: Alex Goodman <[email protected]> * add test to ensure generated cpe index is wired up to function that uses it Signed-off-by: Alex Goodman <[email protected]> --------- Signed-off-by: Dan Luhring <[email protected]> Signed-off-by: Alex Goodman <[email protected]> Co-authored-by: Alex Goodman <[email protected]>
- Loading branch information
Showing
16 changed files
with
26,855 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,7 @@ on: | |
workflow_dispatch: | ||
|
||
env: | ||
GO_VERSION: "1.19.x" | ||
GO_VERSION: "1.20.x" | ||
GO_STABLE_VERSION: true | ||
|
||
jobs: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
name: PR to update CPE dictionary index | ||
on: | ||
schedule: | ||
- cron: "0 1 * * 1" # every monday at 1 AM | ||
|
||
workflow_dispatch: | ||
|
||
env: | ||
GO_VERSION: "1.20.x" | ||
GO_STABLE_VERSION: true | ||
|
||
jobs: | ||
upgrade-cpe-dictionary-index: | ||
runs-on: ubuntu-latest | ||
if: github.repository == 'anchore/syft' # only run for main repo | ||
steps: | ||
- uses: actions/checkout@v3 | ||
|
||
- uses: actions/setup-go@v4 | ||
with: | ||
go-version: ${{ env.GO_VERSION }} | ||
stable: ${{ env.GO_STABLE_VERSION }} | ||
|
||
- run: | | ||
make generate-cpe-dictionary-index | ||
- uses: tibdex/github-app-token@v1 | ||
id: generate-token | ||
with: | ||
app_id: ${{ secrets.TOKEN_APP_ID }} | ||
private_key: ${{ secrets.TOKEN_APP_PRIVATE_KEY }} | ||
|
||
- uses: peter-evans/create-pull-request@v5 | ||
with: | ||
signoff: true | ||
delete-branch: true | ||
branch: auto/latest-cpe-dictionary-index | ||
labels: dependencies | ||
commit-message: "chore(deps): update CPE dictionary index" | ||
title: "chore(deps): update CPE dictionary index" | ||
body: | | ||
Update CPE dictionary index based on the latest available CPE dictionary | ||
token: ${{ steps.generate-token.outputs.token }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,7 @@ on: | |
workflow_dispatch: | ||
|
||
env: | ||
GO_VERSION: "1.19.x" | ||
GO_VERSION: "1.20.x" | ||
GO_STABLE_VERSION: true | ||
|
||
jobs: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1,296 changes: 1,296 additions & 0 deletions
1,296
syft/pkg/cataloger/common/cpe/dictionary/data/cpe-index.json
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
package dictionary | ||
|
||
//go:generate go run ./index-generator/ -o data/cpe-index.json |
230 changes: 230 additions & 0 deletions
230
syft/pkg/cataloger/common/cpe/dictionary/index-generator/generate.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
package main | ||
|
||
import ( | ||
"compress/gzip" | ||
"encoding/json" | ||
"encoding/xml" | ||
"fmt" | ||
"io" | ||
"log" | ||
"strings" | ||
|
||
"github.com/facebookincubator/nvdtools/wfn" | ||
"golang.org/x/exp/slices" | ||
|
||
"github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary" | ||
) | ||
|
||
func generateIndexedDictionaryJSON(rawGzipData io.Reader) ([]byte, error) { | ||
gzipReader, err := gzip.NewReader(rawGzipData) | ||
if err != nil { | ||
return nil, fmt.Errorf("unable to decompress CPE dictionary: %w", err) | ||
} | ||
defer gzipReader.Close() | ||
|
||
// Read XML data | ||
data, err := io.ReadAll(gzipReader) | ||
if err != nil { | ||
return nil, fmt.Errorf("unable to read CPE dictionary: %w", err) | ||
} | ||
|
||
// Unmarshal XML | ||
var cpeList CpeList | ||
if err := xml.Unmarshal(data, &cpeList); err != nil { | ||
return nil, fmt.Errorf("unable to unmarshal CPE dictionary XML: %w", err) | ||
} | ||
|
||
// Filter out data that's not applicable here | ||
cpeList = filterCpeList(cpeList) | ||
|
||
// Create indexed dictionary to help with looking up CPEs | ||
indexedDictionary := indexCPEList(cpeList) | ||
|
||
// Convert to JSON | ||
jsonData, err := json.MarshalIndent(indexedDictionary, "", " ") | ||
if err != nil { | ||
return nil, fmt.Errorf("unable to marshal CPE dictionary to JSON: %w", err) | ||
} | ||
return jsonData, nil | ||
} | ||
|
||
// filterCpeList removes CPE items that are not applicable to software packages. | ||
func filterCpeList(cpeList CpeList) CpeList { | ||
var processedCpeList CpeList | ||
|
||
seen := make(map[string]struct{}) | ||
|
||
for _, cpeItem := range cpeList.CpeItems { | ||
// Skip CPE items that don't have any references. | ||
if len(cpeItem.References) == 0 { | ||
continue | ||
} | ||
|
||
// Skip CPE items where the CPE URI doesn't meet our criteria. | ||
parsedName, err := wfn.Parse(cpeItem.Name) | ||
if err != nil { | ||
log.Printf("unable to parse CPE URI %q: %s", cpeItem.Name, err) | ||
} | ||
|
||
if slices.Contains([]string{"h", "o"}, parsedName.Part) { | ||
continue | ||
} | ||
|
||
normalizedName := normalizeCPE(parsedName).BindToURI() | ||
if _, ok := seen[normalizedName]; ok { | ||
continue | ||
} | ||
seen[normalizedName] = struct{}{} | ||
cpeItem.Name = normalizedName | ||
|
||
parsedCPE, err := wfn.Parse(cpeItem.Cpe23Item.Name) | ||
if err != nil { | ||
log.Printf("unable to parse CPE value %q: %s", cpeItem.Cpe23Item.Name, err) | ||
} | ||
|
||
cpeItem.Cpe23Item.Name = normalizeCPE(parsedCPE).BindToFmtString() | ||
|
||
processedCpeList.CpeItems = append(processedCpeList.CpeItems, cpeItem) | ||
} | ||
|
||
return processedCpeList | ||
} | ||
|
||
// normalizeCPE removes the version and update parts of a CPE. | ||
func normalizeCPE(cpe *wfn.Attributes) *wfn.Attributes { | ||
cpeCopy := *cpe | ||
|
||
cpeCopy.Version = "" | ||
cpeCopy.Update = "" | ||
|
||
return &cpeCopy | ||
} | ||
|
||
const ( | ||
prefixForNPMPackages = "https://www.npmjs.com/package/" | ||
prefixForRubyGems = "https://rubygems.org/gems/" | ||
prefixForRubyGemsHTTP = "http://rubygems.org/gems/" | ||
prefixForNativeRubyGems = "https://github.com/ruby/" | ||
prefixForPyPIPackages = "https://pypi.org/project/" | ||
prefixForJenkinsPlugins = "https://github.com/jenkinsci/" | ||
prefixForRustCrates = "https://crates.io/crates/" | ||
) | ||
|
||
// indexCPEList creates an index of CPEs by ecosystem. | ||
func indexCPEList(list CpeList) *dictionary.Indexed { | ||
indexed := &dictionary.Indexed{ | ||
EcosystemPackages: make(map[string]dictionary.Packages), | ||
} | ||
|
||
for _, cpeItem := range list.CpeItems { | ||
cpeItemName := cpeItem.Cpe23Item.Name | ||
|
||
for _, reference := range cpeItem.References { | ||
ref := reference.Reference.Href | ||
|
||
switch { | ||
case strings.HasPrefix(ref, prefixForNPMPackages): | ||
addEntryForNPMPackage(indexed, ref, cpeItemName) | ||
|
||
case strings.HasPrefix(ref, prefixForRubyGems), strings.HasPrefix(ref, prefixForRubyGemsHTTP): | ||
addEntryForRubyGem(indexed, ref, cpeItemName) | ||
|
||
case strings.HasPrefix(ref, prefixForNativeRubyGems): | ||
addEntryForNativeRubyGem(indexed, ref, cpeItemName) | ||
|
||
case strings.HasPrefix(ref, prefixForPyPIPackages): | ||
addEntryForPyPIPackage(indexed, ref, cpeItemName) | ||
|
||
case strings.HasPrefix(ref, prefixForJenkinsPlugins): | ||
// It _might_ be a jenkins plugin! | ||
addEntryForJenkinsPlugin(indexed, ref, cpeItemName) | ||
|
||
case strings.HasPrefix(ref, prefixForRustCrates): | ||
addEntryForRustCrate(indexed, ref, cpeItemName) | ||
} | ||
} | ||
} | ||
|
||
return indexed | ||
} | ||
|
||
func addEntryForRustCrate(indexed *dictionary.Indexed, ref string, cpeItemName string) { | ||
// Prune off the non-package-name parts of the URL | ||
ref = strings.TrimPrefix(ref, prefixForRustCrates) | ||
ref = strings.Split(ref, "/")[0] | ||
|
||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRustCrates]; !ok { | ||
indexed.EcosystemPackages[dictionary.EcosystemRustCrates] = make(dictionary.Packages) | ||
} | ||
|
||
indexed.EcosystemPackages[dictionary.EcosystemRustCrates][ref] = cpeItemName | ||
} | ||
|
||
func addEntryForJenkinsPlugin(indexed *dictionary.Indexed, ref string, cpeItemName string) { | ||
// Prune off the non-package-name parts of the URL | ||
ref = strings.TrimPrefix(ref, prefixForJenkinsPlugins) | ||
ref = strings.Split(ref, "/")[0] | ||
|
||
if !strings.HasSuffix(ref, "-plugin") { | ||
// It's not a jenkins plugin! | ||
return | ||
} | ||
|
||
ref = strings.TrimSuffix(ref, "-plugin") | ||
|
||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins]; !ok { | ||
indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins] = make(dictionary.Packages) | ||
} | ||
|
||
indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins][ref] = cpeItemName | ||
} | ||
|
||
func addEntryForPyPIPackage(indexed *dictionary.Indexed, ref string, cpeItemName string) { | ||
// Prune off the non-package-name parts of the URL | ||
ref = strings.TrimPrefix(ref, prefixForPyPIPackages) | ||
ref = strings.Split(ref, "/")[0] | ||
|
||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemPyPI]; !ok { | ||
indexed.EcosystemPackages[dictionary.EcosystemPyPI] = make(dictionary.Packages) | ||
} | ||
|
||
indexed.EcosystemPackages[dictionary.EcosystemPyPI][ref] = cpeItemName | ||
} | ||
|
||
func addEntryForNativeRubyGem(indexed *dictionary.Indexed, ref string, cpeItemName string) { | ||
// Prune off the non-package-name parts of the URL | ||
ref = strings.TrimPrefix(ref, prefixForNativeRubyGems) | ||
ref = strings.Split(ref, "/")[0] | ||
|
||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok { | ||
indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages) | ||
} | ||
|
||
indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItemName | ||
} | ||
|
||
func addEntryForRubyGem(indexed *dictionary.Indexed, ref string, cpeItemName string) { | ||
// Prune off the non-package-name parts of the URL | ||
ref = strings.TrimPrefix(ref, prefixForRubyGems) | ||
ref = strings.TrimPrefix(ref, prefixForRubyGemsHTTP) | ||
ref = strings.Split(ref, "/")[0] | ||
|
||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok { | ||
indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages) | ||
} | ||
|
||
indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItemName | ||
} | ||
|
||
func addEntryForNPMPackage(indexed *dictionary.Indexed, ref string, cpeItemName string) { | ||
// Prune off the non-package-name parts of the URL | ||
ref = strings.Split(ref, "/v/")[0] | ||
ref = strings.Split(ref, "?")[0] | ||
ref = strings.TrimPrefix(ref, prefixForNPMPackages) | ||
|
||
if _, ok := indexed.EcosystemPackages[dictionary.EcosystemNPM]; !ok { | ||
indexed.EcosystemPackages[dictionary.EcosystemNPM] = make(dictionary.Packages) | ||
} | ||
|
||
indexed.EcosystemPackages[dictionary.EcosystemNPM][ref] = cpeItemName | ||
} |
Oops, something went wrong.