Skip to content

Commit

Permalink
epss: added epss enricher
Browse files Browse the repository at this point in the history
Signed-off-by: daynewlee <[email protected]>
  • Loading branch information
daynewlee committed Nov 14, 2024
1 parent e7094c2 commit 4384a07
Show file tree
Hide file tree
Showing 2 changed files with 281 additions and 0 deletions.
280 changes: 280 additions & 0 deletions enricher/epss/epss.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
package epss

import (
"compress/gzip"
"context"
"encoding/csv"
"encoding/json"
"fmt"
"github.com/google/uuid"
"github.com/pkg/errors"
"github.com/quay/claircore"
"github.com/quay/claircore/libvuln/driver"
"github.com/quay/claircore/pkg/tmp"
"github.com/quay/zlog"
"io"
"net/http"
"net/url"
"path"
"regexp"
"sort"
"strings"
"time"
)

var (
_ driver.Enricher = (*Enricher)(nil)
_ driver.EnrichmentUpdater = (*Enricher)(nil)

defaultFeed *url.URL
)

// This is a slightly more relaxed version of the validation pattern in the NVD
// JSON schema: https://csrc.nist.gov/schema/nvd/feed/1.1/CVE_JSON_4.0_min_1.1.schema
//
// It allows for "CVE" to be case insensitive and for dashes and underscores
// between the different segments.
var cveRegexp = regexp.MustCompile(`(?i:cve)[-_][0-9]{4}[-_][0-9]{4,}`)

const (
// Type is the type of data returned from the Enricher's Enrich method.
Type = `message/vnd.clair.map.vulnerability; enricher=clair.epss schema=https://csrc.nist.gov/schema/nvd/feed/1.1/cvss-v3.x.json`

// DefaultFeeds is the default place to look for EPSS feeds.
// epss_scores-YYYY-MM-DD.csv.gz needs to be specified to get all data
DefaultFeeds = `https://epss.cyentia.com/`

// epssName is the name of the enricher
epssName = `clair.epss`
)

func init() {
var err error
defaultFeed, err = url.Parse(DefaultFeeds)
if err != nil {
panic(err)
}
}

// Enricher provides EPSS data as enrichments to a VulnerabilityReport.
//
// Configure must be called before any other methods.
type Enricher struct {
driver.NoopUpdater
c *http.Client
feed *url.URL
feedPath string
}

// Config is the configuration for Enricher.
type Config struct {
FeedRoot *string `json:"feed_root" yaml:"feed_root"`
}

func (e *Enricher) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c *http.Client) error {
ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/Configure")
var cfg Config
e.c = c

if err := f(&cfg); err != nil {
return err
}

if cfg.FeedRoot != nil {
// validate the URL format
if _, err := url.Parse(*cfg.FeedRoot); err != nil {
return fmt.Errorf("invalid URL format for FeedRoot: %w", err)
}

// Check for a .gz file
if strings.HasSuffix(*cfg.FeedRoot, ".gz") {
e.feedPath = *cfg.FeedRoot
} else {
e.sourceURL() // Fallback to the default source URL if not a .gz file
}
} else {
e.sourceURL()
}

return nil
}

func (e *Enricher) FetchEnrichment(ctx context.Context, fingerprint driver.Fingerprint) (io.ReadCloser, driver.Fingerprint, error) {
ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/FetchEnrichment")
// Force a new hint, to signal updaters that this is new data.
newUUID := uuid.New()
hint := driver.Fingerprint(newUUID.String())
zlog.Info(ctx).Str("hint", string(hint)).Msg("starting fetch")

out, err := tmp.NewFile("", "enricher.epss.*.json")
if err != nil {
return nil, hint, err
}
var success bool
defer func() {
if !success {
if err := out.Close(); err != nil {
zlog.Warn(ctx).Err(err).Msg("unable to close spool")
}
}
}()

if e.feedPath == "" || !strings.HasSuffix(e.feedPath, ".gz") {
e.sourceURL()
}

resp, err := http.Get(e.feedPath)
if err != nil {
return nil, "", fmt.Errorf("failed to fetch file from %s: %w", e.feedPath, err)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return nil, "", fmt.Errorf("failed to fetch file: received status %d", resp.StatusCode)
}

gzipReader, err := gzip.NewReader(resp.Body)
if err != nil {
return nil, "", fmt.Errorf("failed to decompress file: %w", err)
}
defer gzipReader.Close()

csvReader := csv.NewReader(gzipReader)
headers, err := csvReader.Read() // Column names
if err != nil {
return nil, "", fmt.Errorf("failed to read CSV headers: %w", err)
}

enc := json.NewEncoder(out)
totalCVEs := 0

for {
record, err := csvReader.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, "", fmt.Errorf("failed to read CSV row: %w", err)
}

item := make(map[string]string)
for i, value := range record {
item[headers[i]] = value
}
enrichment, err := json.Marshal(item)

r := driver.EnrichmentRecord{
Tags: []string{item["cve"]},
Enrichment: enrichment,
}

if err = enc.Encode(&r); err != nil {
return nil, "", fmt.Errorf("encoding enrichment: %w", err)
}
totalCVEs++
}

zlog.Info(ctx).Int("totalCVEs", totalCVEs).Msg("processed CVEs")
if _, err := out.Seek(0, io.SeekStart); err != nil {
return nil, hint, fmt.Errorf("unable to reset file pointer: %w", err)
}
success = true

return out, hint, nil
}

// ParseEnrichment implements driver.EnrichmentUpdater.
func (e *Enricher) ParseEnrichment(ctx context.Context, rc io.ReadCloser) ([]driver.EnrichmentRecord, error) {
ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/ParseEnrichment")
// Our Fetch method actually has all the smarts w/r/t to constructing the
// records, so this is just decoding in a loop.
defer func() {
_ = rc.Close()
}()
var err error
dec := json.NewDecoder(rc)
ret := make([]driver.EnrichmentRecord, 0, 250_000) // Wild guess at initial capacity.
// This is going to allocate like mad, hold onto your butts.
for err == nil {
ret = append(ret, driver.EnrichmentRecord{})
err = dec.Decode(&ret[len(ret)-1])
}
zlog.Debug(ctx).
Int("count", len(ret)-1).
Msg("decoded enrichments")
if !errors.Is(err, io.EOF) {
return nil, err
}
return ret, nil
}

func (*Enricher) Name() string {
return epssName
}

func (e *Enricher) sourceURL() {
currentDate := time.Now()
formattedDate := currentDate.Format("2006-01-02")
filePath := fmt.Sprintf("epss_scores-%s.csv.gz", formattedDate)
e.feedPath = path.Join(DefaultFeeds, filePath)
}

func (e *Enricher) Enrich(ctx context.Context, g driver.EnrichmentGetter, r *claircore.VulnerabilityReport) (string, []json.RawMessage, error) {
ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/Enrich")

// We return any CVSS blobs for CVEs mentioned in the free-form parts of the
// vulnerability.
m := make(map[string][]json.RawMessage)

erCache := make(map[string][]driver.EnrichmentRecord)
for id, v := range r.Vulnerabilities {
t := make(map[string]struct{})
ctx := zlog.ContextWithValues(ctx,
"vuln", v.Name)
for _, elem := range []string{
v.Description,
v.Name,
v.Links,
} {
for _, m := range cveRegexp.FindAllString(elem, -1) {
t[m] = struct{}{}
}
}
if len(t) == 0 {
continue
}
ts := make([]string, 0, len(t))
for m := range t {
ts = append(ts, m)
}
zlog.Debug(ctx).
Strs("cve", ts).
Msg("found CVEs")

sort.Strings(ts)
cveKey := strings.Join(ts, "_")
rec, ok := erCache[cveKey]
if !ok {
var err error
rec, err = g.GetEnrichment(ctx, ts)
if err != nil {
return "", nil, err
}
erCache[cveKey] = rec
}
zlog.Debug(ctx).
Int("count", len(rec)).
Msg("found records")
for _, r := range rec {
m[id] = append(m[id], r.Enrichment)
}
}
if len(m) == 0 {
return Type, nil, nil
}
b, err := json.Marshal(m)
if err != nil {
return Type, nil, err
}
return Type, []json.RawMessage{b}, nil
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ require (
github.com/knqyf263/go-deb-version v0.0.0-20190517075300-09fca494f03d
github.com/knqyf263/go-rpm-version v0.0.0-20170716094938-74609b86c936
github.com/package-url/packageurl-go v0.1.3
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.20.5
github.com/quay/claircore/toolkit v1.2.4
github.com/quay/claircore/updater/driver v1.0.0
Expand Down

0 comments on commit 4384a07

Please sign in to comment.