From 2cc4e37374c7842e81144d5a205c9ec575a0d522 Mon Sep 17 00:00:00 2001 From: daynewlee Date: Mon, 11 Nov 2024 15:31:57 -0600 Subject: [PATCH 1/5] epss: created epss enricher and enrich test Signed-off-by: daynewlee --- enricher/constants.go | 10 + enricher/cvss/cvss.go | 11 +- enricher/epss/epss.go | 405 ++++++++++++++++++++++++++++++++ enricher/epss/epss_test.go | 395 +++++++++++++++++++++++++++++++ enricher/epss/testdata/data.csv | 31 +++ 5 files changed, 843 insertions(+), 9 deletions(-) create mode 100644 enricher/constants.go create mode 100644 enricher/epss/epss.go create mode 100644 enricher/epss/epss_test.go create mode 100644 enricher/epss/testdata/data.csv diff --git a/enricher/constants.go b/enricher/constants.go new file mode 100644 index 000000000..21296e6e1 --- /dev/null +++ b/enricher/constants.go @@ -0,0 +1,10 @@ +package enricher + +import "regexp" + +// This is a slightly more relaxed version of the validation pattern in the NVD +// JSON schema: https://csrc.nist.gov/schema/nvd/feed/1.1/CVE_JSON_4.0_min_1.1.schema +// +// It allows for "CVE" to be case insensitive and for dashes and underscores +// between the different segments. +var CVERegexp = regexp.MustCompile(`(?i:cve)[-_][0-9]{4}[-_][0-9]{4,}`) diff --git a/enricher/cvss/cvss.go b/enricher/cvss/cvss.go index 887a04309..ca0d31e77 100644 --- a/enricher/cvss/cvss.go +++ b/enricher/cvss/cvss.go @@ -11,7 +11,6 @@ import ( "io" "net/http" "net/url" - "regexp" "sort" "strings" "time" @@ -19,6 +18,7 @@ import ( "github.com/quay/zlog" "github.com/quay/claircore" + "github.com/quay/claircore/enricher" "github.com/quay/claircore/libvuln/driver" "github.com/quay/claircore/pkg/tmp" ) @@ -253,13 +253,6 @@ func (e *Enricher) ParseEnrichment(ctx context.Context, rc io.ReadCloser) ([]dri return ret, nil } -// This is a slightly more relaxed version of the validation pattern in the NVD -// JSON schema: https://csrc.nist.gov/schema/nvd/feed/1.1/CVE_JSON_4.0_min_1.1.schema -// -// It allows for "CVE" to be case insensitive and for dashes and underscores -// between the different segments. -var cveRegexp = regexp.MustCompile(`(?i:cve)[-_][0-9]{4}[-_][0-9]{4,}`) - // Enrich implements driver.Enricher. func (e *Enricher) Enrich(ctx context.Context, g driver.EnrichmentGetter, r *claircore.VulnerabilityReport) (string, []json.RawMessage, error) { ctx = zlog.ContextWithValues(ctx, "component", "enricher/cvss/Enricher/Enrich") @@ -278,7 +271,7 @@ func (e *Enricher) Enrich(ctx context.Context, g driver.EnrichmentGetter, r *cla v.Name, v.Links, } { - for _, m := range cveRegexp.FindAllString(elem, -1) { + for _, m := range enricher.CVERegexp.FindAllString(elem, -1) { t[m] = struct{}{} } } diff --git a/enricher/epss/epss.go b/enricher/epss/epss.go new file mode 100644 index 000000000..6149c56e5 --- /dev/null +++ b/enricher/epss/epss.go @@ -0,0 +1,405 @@ +package epss + +import ( + "compress/gzip" + "context" + "encoding/csv" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "path" + "sort" + "strconv" + "strings" + "time" + + "github.com/quay/claircore" + "github.com/quay/claircore/enricher" + "github.com/quay/claircore/libvuln/driver" + "github.com/quay/claircore/pkg/tmp" + "github.com/quay/zlog" +) + +var ( + _ driver.Enricher = (*Enricher)(nil) + _ driver.EnrichmentUpdater = (*Enricher)(nil) +) + +type EPSSItem struct { + ModelVersion string `json:"modelVersion"` + Date string `json:"date"` + CVE string `json:"cve"` + EPSS float64 `json:"epss"` + Percentile float64 `json:"percentile"` +} + +const ( + // Type is the type of data returned from the Enricher's Enrich method. + Type = `message/vnd.clair.map.vulnerability; enricher=clair.epss schema=https://csrc.nist.gov/schema/nvd/feed/1.1/cvss-v3.x.json` + + // DefaultFeed is the default place to look for EPSS feeds. + // epss_scores-YYYY-MM-DD.csv.gz needs to be specified to get all data + DefaultFeed = `https://epss.cyentia.com/` + + // epssName is the name of the enricher + epssName = `clair.epss` +) + +func init() { + var err error + if err != nil { + panic(err) + } +} + +// Enricher provides EPSS data as enrichments to a VulnerabilityReport. +// +// Configure must be called before any other methods. +type Enricher struct { + driver.NoopUpdater + c *http.Client + feed *url.URL + feedPath string +} + +// Config is the configuration for Enricher. +type Config struct { + FeedRoot *string `json:"feed_root" yaml:"feed_root"` +} + +func (e *Enricher) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c *http.Client) error { + ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/Configure") + var cfg Config + e.c = c + e.feedPath = currentFeedURL() + if f == nil { + zlog.Debug(ctx).Msg("No configuration provided; proceeding with default settings") + return nil + } + if err := f(&cfg); err != nil { + return err + } + if cfg.FeedRoot != nil { + // validate the URL format + if _, err := url.Parse(*cfg.FeedRoot); err != nil { + return fmt.Errorf("invalid URL format for FeedRoot: %w", err) + } + + // only .gz file is supported + if strings.HasSuffix(*cfg.FeedRoot, ".gz") { + //overwrite feedPath is cfg provides another feed path + e.feedPath = *cfg.FeedRoot + } else { + return fmt.Errorf("invalid feed root: expected a '.gz' file, but got '%q'", *cfg.FeedRoot) + } + } + + return nil +} + +// FetchEnrichment implements driver.EnrichmentUpdater. +func (e *Enricher) FetchEnrichment(ctx context.Context, prevFingerprint driver.Fingerprint) (io.ReadCloser, driver.Fingerprint, error) { + ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/FetchEnrichment") + + if e.feedPath == "" || !strings.HasSuffix(e.feedPath, ".gz") { + return nil, "", fmt.Errorf("invalid feed path: %q must be non-empty and end with '.gz'", e.feedPath) + } + + out, err := tmp.NewFile("", "epss.") + if err != nil { + return nil, "", err + } + var success bool + defer func() { + if !success { + if err := out.Close(); err != nil { + zlog.Warn(ctx).Err(err).Msg("unable to close spool") + } + } + }() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, e.feedPath, nil) + if err != nil { + return nil, "", fmt.Errorf("unable to create request for %s: %w", e.feedPath, err) + } + + resp, err := e.c.Do(req) + if err != nil { + return nil, "", fmt.Errorf("unable to fetch file from %s: %w", e.feedPath, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, "", fmt.Errorf("unable to fetch file: received status %d", resp.StatusCode) + } + + etag := resp.Header.Get("ETag") + if etag == "" { + return nil, "", fmt.Errorf("ETag not found in response headers") + } + + newFingerprint := driver.Fingerprint(etag) + + if prevFingerprint == newFingerprint { + zlog.Info(ctx).Str("fingerprint", string(newFingerprint)).Msg("file unchanged; skipping processing") + return nil, prevFingerprint, nil + } + + gzipReader, err := gzip.NewReader(resp.Body) + if err != nil { + return nil, "", fmt.Errorf("unable to decompress file: %w", err) + } + defer gzipReader.Close() + + csvReader := csv.NewReader(gzipReader) + csvReader.FieldsPerRecord = -1 // Allow variable-length fields + + // assume metadata is always in the first line + record, err := csvReader.Read() + if err != nil { + return nil, "", fmt.Errorf("unable to read metadata line: %w", err) + } + + var modelVersion, date string + for _, field := range record { + field = strings.TrimSpace(field) + if strings.HasPrefix(field, "#") { + field = strings.TrimPrefix(field, "#") + } + kv := strings.SplitN(field, ":", 2) + if len(kv) == 2 { + switch strings.TrimSpace(kv[0]) { + case "model_version": + modelVersion = strings.TrimSpace(kv[1]) + case "score_date": + date = strings.TrimSpace(kv[1]) + } + } + } + + if modelVersion == "" || date == "" { + return nil, "", fmt.Errorf("missing metadata fields in record: %v", record) + } + + csvReader.Comment = '#' // Ignore subsequent comment lines + + record, err = csvReader.Read() + if err != nil { + return nil, "", fmt.Errorf("unable to read header line: %w", err) + } + if len(record) < 3 || record[0] != "cve" || record[1] != "epss" || record[2] != "percentile" { + return nil, "", fmt.Errorf("unexpected CSV headers: %v", record) + } + headers := record + + enc := json.NewEncoder(out) + totalCVEs := 0 + + for { + record, err = csvReader.Read() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return nil, "", fmt.Errorf("unable to read line in CSV: %w", err) + } + + if len(record) != len(headers) { + zlog.Warn(ctx).Str("record", fmt.Sprintf("%v", record)).Msg("skipping record with mismatched fields") + continue + } + + r, err := newItemFeed(record, headers, modelVersion, date) + if err != nil { + zlog.Warn(ctx).Str("record", fmt.Sprintf("%v", record)).Msg("skipping invalid record") + continue + } + + if err = enc.Encode(&r); err != nil { + return nil, "", fmt.Errorf("unable to write JSON line to file: %w", err) + } + totalCVEs++ + } + + zlog.Info(ctx).Int("totalCVEs", totalCVEs).Msg("processed CVEs") + if _, err := out.Seek(0, io.SeekStart); err != nil { + return nil, newFingerprint, fmt.Errorf("unable to reset file pointer: %w", err) + } + success = true + + return out, newFingerprint, nil +} + +// ParseEnrichment implements driver.EnrichmentUpdater. +func (e *Enricher) ParseEnrichment(ctx context.Context, rc io.ReadCloser) ([]driver.EnrichmentRecord, error) { + ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/ParseEnrichment") + + defer func() { + _ = rc.Close() + }() + + dec := json.NewDecoder(rc) + ret := make([]driver.EnrichmentRecord, 0, 250_000) + var err error + + for { + var record driver.EnrichmentRecord + if err = dec.Decode(&record); err != nil { + break + } + ret = append(ret, record) + } + + zlog.Debug(ctx). + Int("count", len(ret)). + Msg("decoded enrichments") + + if !errors.Is(err, io.EOF) { + return nil, fmt.Errorf("error decoding enrichment records: %w", err) + } + + return ret, nil +} + +func (*Enricher) Name() string { + return epssName +} + +func currentFeedURL() string { + currentDate := time.Now() + formattedDate := currentDate.Format("2006-01-02") + filePath := fmt.Sprintf("epss_scores-%s.csv.gz", formattedDate) + + feedURL, err := url.Parse(DefaultFeed) + if err != nil { + panic(fmt.Errorf("invalid default feed URL: %w", err)) + } + + feedURL.Path = path.Join(feedURL.Path, filePath) + return feedURL.String() +} + +func (e *Enricher) Enrich(ctx context.Context, g driver.EnrichmentGetter, r *claircore.VulnerabilityReport) (string, []json.RawMessage, error) { + ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/Enrich") + m := make(map[string][]json.RawMessage) + erCache := make(map[string][]driver.EnrichmentRecord) + + for id, v := range r.Vulnerabilities { + t := make(map[string]struct{}) + ctx := zlog.ContextWithValues(ctx, "vuln", v.Name) + + for _, elem := range []string{ + v.Description, + v.Name, + v.Links, + } { + // Check if the element is non-empty before running the regex + if elem == "" { + zlog.Debug(ctx).Str("element", elem).Msg("skipping empty element") + continue + } + + matches := enricher.CVERegexp.FindAllString(elem, -1) + if len(matches) == 0 { + zlog.Debug(ctx).Str("element", elem).Msg("no CVEs found in element") + continue + } + for _, m := range matches { + t[m] = struct{}{} + } + } + + // Skip if no CVEs were found + if len(t) == 0 { + zlog.Debug(ctx).Msg("no CVEs found in vulnerability metadata") + continue + } + + ts := make([]string, 0, len(t)) + for m := range t { + ts = append(ts, m) + } + sort.Strings(ts) + + cveKey := strings.Join(ts, "_") + + rec, ok := erCache[cveKey] + if !ok { + var err error + rec, err = g.GetEnrichment(ctx, ts) + if err != nil { + return "", nil, err + } + erCache[cveKey] = rec + } + + zlog.Debug(ctx).Int("count", len(rec)).Msg("found records") + + // Skip if no enrichment records are found + if len(rec) == 0 { + zlog.Debug(ctx).Strs("cve", ts).Msg("no enrichment records found for CVEs") + continue + } + + for _, r := range rec { + if _, exists := m[id]; !exists { + m[id] = []json.RawMessage{} + } + m[id] = append(m[id], r.Enrichment) + } + } + + if len(m) == 0 { + return Type, nil, nil + } + + b, err := json.Marshal(m) + if err != nil { + return Type, nil, err + } + return Type, []json.RawMessage{b}, nil +} + +func newItemFeed(record []string, headers []string, modelVersion string, scoreDate string) (driver.EnrichmentRecord, error) { + if len(record) != len(headers) { + return driver.EnrichmentRecord{}, fmt.Errorf("record and headers length mismatch") + } + + var item EPSSItem + for i, value := range record { + switch headers[i] { + case "cve": + item.CVE = value + case "epss": + if f, err := strconv.ParseFloat(value, 64); err == nil { + item.EPSS = f + } else { + return driver.EnrichmentRecord{}, fmt.Errorf("invalid float for epss: %w", err) + } + case "percentile": + if f, err := strconv.ParseFloat(value, 64); err == nil { + item.Percentile = f + } else { + return driver.EnrichmentRecord{}, fmt.Errorf("invalid float for percentile: %w", err) + } + } + } + + item.ModelVersion = modelVersion + item.Date = scoreDate + + enrichment, err := json.Marshal(item) + if err != nil { + return driver.EnrichmentRecord{}, fmt.Errorf("unable to encode enrichment: %w", err) + } + + r := driver.EnrichmentRecord{ + Tags: []string{item.CVE}, // CVE field should be set + Enrichment: enrichment, + } + + return r, nil +} diff --git a/enricher/epss/epss_test.go b/enricher/epss/epss_test.go new file mode 100644 index 000000000..3ee4059b7 --- /dev/null +++ b/enricher/epss/epss_test.go @@ -0,0 +1,395 @@ +package epss + +import ( + "compress/gzip" + "context" + "encoding/json" + "errors" + "io" + "log" + "net/http" + "net/http/httptest" + "os" + "path" + "path/filepath" + "testing" + + "github.com/google/go-cmp/cmp" + + "github.com/quay/claircore" + "github.com/quay/claircore/libvuln/driver" + "github.com/quay/zlog" +) + +func TestConfigure(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) + tt := []configTestcase{ + { + Name: "None", // No configuration provided, should use default + Check: func(t *testing.T, err error) { + if err != nil { + t.Errorf("unexpected error: %v", err) + } + }, + }, + { + Name: "Not OK", // URL without .gz is invalid + Config: func(i interface{}) error { + cfg := i.(*Config) + s := "http://example.com/" + cfg.FeedRoot = &s + return nil + }, + Check: func(t *testing.T, err error) { + if err == nil { + t.Errorf("expected invalid URL error, but got none: %v", err) + } + }, + }, + + { + Name: "UnmarshalError", // Expected error on unmarshaling + Config: func(_ interface{}) error { return errors.New("expected error") }, + Check: func(t *testing.T, err error) { + if err == nil { + t.Error("expected unmarshal error, but got none") + } + }, + }, + { + Name: "BadURL", // Malformed URL in FeedRoot + Config: func(i interface{}) error { + cfg := i.(*Config) + s := "http://[notaurl:/" + cfg.FeedRoot = &s + return nil + }, + Check: func(t *testing.T, err error) { + if err == nil { + t.Error("expected URL parse error, but got none") + } + }, + }, + { + Name: "ValidGZURL", // Proper .gz URL in FeedRoot + Config: func(i interface{}) error { + cfg := i.(*Config) + s := "http://example.com/epss_scores-2024-10-25.csv.gz" + cfg.FeedRoot = &s + return nil + }, + Check: func(t *testing.T, err error) { + if err != nil { + t.Errorf("unexpected error with .gz URL: %v", err) + } + }, + }, + } + + for _, tc := range tt { + t.Run(tc.Name, tc.Run(ctx)) + } +} + +func (tc configTestcase) Run(ctx context.Context) func(*testing.T) { + e := &Enricher{} + return func(t *testing.T) { + ctx := zlog.Test(ctx, t) + f := tc.Config + if f == nil { + f = noopConfig + } + err := e.Configure(ctx, f, nil) + if tc.Check == nil { + if err != nil { + t.Errorf("unexpected err: %v", err) + } + return + } + tc.Check(t, err) + } +} + +func TestFetch(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) + srv := mockServer(t) + + tt := []fetchTestcase{ + { + Name: "Fetch OK", // Tests successful fetch and data processing + Check: func(t *testing.T, rc io.ReadCloser, fp driver.Fingerprint, err error) { + if err != nil { + t.Errorf("unexpected error: %v", err) + return + } + defer rc.Close() + if rc == nil { + t.Error("expected non-nil ReadCloser for initial fetch") + } + if fp == driver.Fingerprint("") { + t.Error("expected non-empty fingerprint") + } + + // Further check if data is correctly read and structured + data, err := io.ReadAll(rc) + if err != nil { + t.Errorf("failed to read enrichment data: %v", err) + } + t.Logf("enrichment data: %s", string(data)) + }, + }, + } + + for _, tc := range tt { + t.Run(tc.Name, tc.Run(ctx, srv)) + } +} + +type fetchTestcase struct { + Check func(*testing.T, io.ReadCloser, driver.Fingerprint, error) + Name string + Hint string +} + +type configTestcase struct { + Config func(interface{}) error + Check func(*testing.T, error) + Name string +} + +func noopConfig(_ interface{}) error { return nil } + +func mockServer(t *testing.T) *httptest.Server { + const root = `testdata/` + + // Define a static ETag for testing purposes + const etagValue = `"test-etag-12345"` + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch path.Ext(r.URL.Path) { + case ".gz": // only gz feed is supported + w.Header().Set("ETag", etagValue) + + f, err := os.Open(filepath.Join(root, "data.csv")) + if err != nil { + t.Errorf("open failed: %v", err) + w.WriteHeader(http.StatusInternalServerError) + return + } + defer f.Close() + + gz := gzip.NewWriter(w) + defer gz.Close() + if _, err := io.Copy(gz, f); err != nil { + t.Errorf("write error: %v", err) + w.WriteHeader(http.StatusInternalServerError) + return + } + default: + t.Errorf("unknown request path: %q", r.URL.Path) + w.WriteHeader(http.StatusBadRequest) + } + })) + + t.Cleanup(srv.Close) + return srv +} + +func (tc fetchTestcase) Run(ctx context.Context, srv *httptest.Server) func(*testing.T) { + return func(t *testing.T) { + e := &Enricher{} + ctx := zlog.Test(ctx, t) + configFunc := func(i interface{}) error { + cfg, ok := i.(*Config) + if !ok { + t.Fatal("expected Config type for i, but got a different type") + } + u := srv.URL + "/data.csv.gz" + cfg.FeedRoot = &u + return nil + } + + // Configure Enricher with mock server client and custom config + if err := e.Configure(ctx, configFunc, srv.Client()); err != nil { + t.Errorf("unexpected error: %v", err) + return + } + + // Run FetchEnrichment and validate the result using Check + rc, fp, err := e.FetchEnrichment(ctx, driver.Fingerprint(tc.Hint)) + if rc != nil { + defer rc.Close() + } + if tc.Check != nil { + tc.Check(t, rc, fp, err) + } else if err != nil { + t.Errorf("unexpected error: %v", err) + } + } +} + +func TestParse(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) + srv := mockServer(t) + tt := []parseTestcase{ + { + Name: "OK", + }, + } + for _, tc := range tt { + t.Run(tc.Name, tc.Run(ctx, srv)) + } +} + +type parseTestcase struct { + Check func(*testing.T, []driver.EnrichmentRecord, error) + Name string +} + +func (tc parseTestcase) Run(ctx context.Context, srv *httptest.Server) func(*testing.T) { + e := &Enricher{} + return func(t *testing.T) { + ctx := zlog.Test(ctx, t) + f := func(i interface{}) error { + cfg, ok := i.(*Config) + if !ok { + t.Fatal("assertion failed") + } + u := srv.URL + "/data.csv.gz" + cfg.FeedRoot = &u + return nil + } + if err := e.Configure(ctx, f, srv.Client()); err != nil { + t.Errorf("unexpected error: %v", err) + } + + hint := driver.Fingerprint("test-e-tag-54321") + rc, _, err := e.FetchEnrichment(ctx, hint) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + defer rc.Close() + rs, err := e.ParseEnrichment(ctx, rc) + if tc.Check == nil { + if err != nil { + t.Errorf("unexpected error: %v", err) + } + return + } + tc.Check(t, rs, err) + } +} + +type fakeGetter struct { + items []driver.EnrichmentRecord +} + +func (g *fakeGetter) GetEnrichment(ctx context.Context, cves []string) ([]driver.EnrichmentRecord, error) { + var results []driver.EnrichmentRecord + for _, cve := range cves { + for _, item := range g.items { + for _, tag := range item.Tags { + if tag == cve { + results = append(results, item) + break + } + } + } + } + return results, nil +} + +func TestEnrich(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) + srv := mockServer(t) + e := &Enricher{} + f := func(i interface{}) error { + cfg, ok := i.(*Config) + if !ok { + t.Fatal("assertion failed") + } + u := srv.URL + "/data.csv.gz" + cfg.FeedRoot = &u + return nil + } + if err := e.Configure(ctx, f, srv.Client()); err != nil { + t.Errorf("unexpected error: %v", err) + } + rc, _, err := e.FetchEnrichment(ctx, "") + if err != nil { + t.Errorf("unexpected error: %v", err) + } + defer rc.Close() + rs, err := e.ParseEnrichment(ctx, rc) + if err != nil { + t.Fatal(err) + } + g := &fakeGetter{items: rs} + r := &claircore.VulnerabilityReport{ + Vulnerabilities: map[string]*claircore.Vulnerability{ + "-1": { + Description: "This is a fake vulnerability that doesn't have a CVE.", + }, + "1": { + Description: "This is a fake vulnerability that looks like CVE-2022-34667.", + }, + "6004": { + Description: "CVE-2024-9972 is here", + }, + "6005": { + Description: "CVE-2024-9986 is awesome", + }, + }, + } + kind, es, err := e.Enrich(ctx, g, r) + if err != nil { + t.Error(err) + } + if got, want := kind, Type; got != want { + t.Errorf("got: %q, want: %q", got, want) + } + want := map[string][]map[string]interface{}{ + "1": { + { + "cve": "CVE-2022-34667", + "epss": float64(0.00073), + "percentile": float64(0.32799), + "modelVersion": "v2023.03.01", + "date": "2024-10-25T00:00:00+0000", + }, + }, + "6004": { + { + "cve": "CVE-2024-9972", + "epss": float64(0.00091), + "percentile": float64(0.39923), + "modelVersion": "v2023.03.01", + "date": "2024-10-25T00:00:00+0000", + }, + }, + "6005": { + { + "cve": "CVE-2024-9986", + "epss": float64(0.00165), + "percentile": float64(0.53867), + "modelVersion": "v2023.03.01", + "date": "2024-10-25T00:00:00+0000", + }, + }, + } + + got := map[string][]map[string]interface{}{} + if err := json.Unmarshal(es[0], &got); err != nil { + t.Error(err) + } else { + log.Printf("Got: %+v\n", got) + + if !cmp.Equal(got, want) { + t.Error(cmp.Diff(got, want)) + } + } +} diff --git a/enricher/epss/testdata/data.csv b/enricher/epss/testdata/data.csv new file mode 100644 index 000000000..306044130 --- /dev/null +++ b/enricher/epss/testdata/data.csv @@ -0,0 +1,31 @@ +#model_version:v2023.03.01,score_date:2024-10-25T00:00:00+0000 +cve,epss,percentile +CVE-1999-0005,0.91963,0.99030 +CVE-1999-0006,0.03341,0.91563 +CVE-1999-0007,0.00073,0.32734 +CVE-1999-0008,0.13967,0.95792 +CVE-1999-0009,0.09014,0.94772 +CVE-1999-0010,0.00292,0.69634 +CVE-2022-34665,0.00042,0.05099 +CVE-2022-34666,0.00042,0.05099 +CVE-2022-34667,0.00073,0.32799 +CVE-2022-34668,0.00311,0.70519 +CVE-2022-34669,0.00044,0.13516 +CVE-2022-34670,0.00044,0.13516 +CVE-2022-34671,0.00142,0.50809 +CVE-2022-34672,0.00044,0.13516 +CVE-2022-34673,0.00044,0.13516 +CVE-2022-34674,0.00047,0.18133 +CVE-2022-34675,0.00044,0.13516 +CVE-2024-9972,0.00091,0.39923 +CVE-2024-9973,0.00063,0.28042 +CVE-2024-9974,0.00063,0.28042 +CVE-2024-9975,0.00063,0.28515 +CVE-2024-9980,0.00050,0.20281 +CVE-2024-9981,0.00050,0.20281 +CVE-2024-9982,0.00091,0.39923 +CVE-2024-9983,0.00090,0.39372 +CVE-2024-9984,0.00091,0.39923 +CVE-2024-9985,0.00091,0.39923 +CVE-2024-9986,0.00165,0.53867 +CVE-2024-9987,0.00043,0.09778 \ No newline at end of file From 8deea9d0c1764630163133ce15f69d115e7e798f Mon Sep 17 00:00:00 2001 From: Yi Li Date: Thu, 5 Dec 2024 08:19:32 -0600 Subject: [PATCH 2/5] epss: added enricher/common.go and use etag Signed-off-by: Yi Li --- enricher/{constants.go => common.go} | 4 ++-- enricher/epss/epss.go | 12 +++--------- enricher/epss/epss_test.go | 2 +- 3 files changed, 6 insertions(+), 12 deletions(-) rename enricher/{constants.go => common.go} (72%) diff --git a/enricher/constants.go b/enricher/common.go similarity index 72% rename from enricher/constants.go rename to enricher/common.go index 21296e6e1..8e95d502c 100644 --- a/enricher/constants.go +++ b/enricher/common.go @@ -2,8 +2,8 @@ package enricher import "regexp" -// This is a slightly more relaxed version of the validation pattern in the NVD -// JSON schema: https://csrc.nist.gov/schema/nvd/feed/1.1/CVE_JSON_4.0_min_1.1.schema +// CVERegexp is a slightly more relaxed version of the validation pattern in the NVD +// JSON schema: https://csrc.nist.gov/schema/nvd/feed/1.1/CVE_JSON_4.0_min_1.1.schema. // // It allows for "CVE" to be case insensitive and for dashes and underscores // between the different segments. diff --git a/enricher/epss/epss.go b/enricher/epss/epss.go index 6149c56e5..f8551b05e 100644 --- a/enricher/epss/epss.go +++ b/enricher/epss/epss.go @@ -16,11 +16,12 @@ import ( "strings" "time" + "github.com/quay/zlog" + "github.com/quay/claircore" "github.com/quay/claircore/enricher" "github.com/quay/claircore/libvuln/driver" "github.com/quay/claircore/pkg/tmp" - "github.com/quay/zlog" ) var ( @@ -48,13 +49,6 @@ const ( epssName = `clair.epss` ) -func init() { - var err error - if err != nil { - panic(err) - } -} - // Enricher provides EPSS data as enrichments to a VulnerabilityReport. // // Configure must be called before any other methods. @@ -136,7 +130,7 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, prevFingerprint driver.F return nil, "", fmt.Errorf("unable to fetch file: received status %d", resp.StatusCode) } - etag := resp.Header.Get("ETag") + etag := resp.Header.Get("etag") if etag == "" { return nil, "", fmt.Errorf("ETag not found in response headers") } diff --git a/enricher/epss/epss_test.go b/enricher/epss/epss_test.go index 3ee4059b7..8e7182fdc 100644 --- a/enricher/epss/epss_test.go +++ b/enricher/epss/epss_test.go @@ -170,7 +170,7 @@ func mockServer(t *testing.T) *httptest.Server { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch path.Ext(r.URL.Path) { case ".gz": // only gz feed is supported - w.Header().Set("ETag", etagValue) + w.Header().Set("etag", etagValue) f, err := os.Open(filepath.Join(root, "data.csv")) if err != nil { From c1930d50583a0a3a617565106345b4660ea2ce29 Mon Sep 17 00:00:00 2001 From: Yi Li Date: Thu, 5 Dec 2024 09:14:58 -0600 Subject: [PATCH 3/5] epss: parse metadata and new item Signed-off-by: Yi Li --- enricher/epss/epss.go | 130 +++++++++++++++++-------------------- enricher/epss/epss_test.go | 20 +++--- 2 files changed, 71 insertions(+), 79 deletions(-) diff --git a/enricher/epss/epss.go b/enricher/epss/epss.go index f8551b05e..87fd37a60 100644 --- a/enricher/epss/epss.go +++ b/enricher/epss/epss.go @@ -11,7 +11,7 @@ import ( "net/http" "net/url" "path" - "sort" + "slices" "strconv" "strings" "time" @@ -20,6 +20,7 @@ import ( "github.com/quay/claircore" "github.com/quay/claircore/enricher" + "github.com/quay/claircore/internal/httputil" "github.com/quay/claircore/libvuln/driver" "github.com/quay/claircore/pkg/tmp" ) @@ -29,6 +30,8 @@ var ( _ driver.EnrichmentUpdater = (*Enricher)(nil) ) +// EPSSItem represents a single entry in the EPSS feed, containing information +// about a CVE's Exploit Prediction Scoring System (EPSS) score and percentile. type EPSSItem struct { ModelVersion string `json:"modelVersion"` Date string `json:"date"` @@ -39,11 +42,11 @@ type EPSSItem struct { const ( // Type is the type of data returned from the Enricher's Enrich method. - Type = `message/vnd.clair.map.vulnerability; enricher=clair.epss schema=https://csrc.nist.gov/schema/nvd/feed/1.1/cvss-v3.x.json` + Type = `message/vnd.clair.map.vulnerability; enricher=clair.epss schema=https://csrc.nist.gov/schema/nvd/baseURL/1.1/cvss-v3.x.json` - // DefaultFeed is the default place to look for EPSS feeds. + // DefaultBaseURL is the default place to look for EPSS feeds. // epss_scores-YYYY-MM-DD.csv.gz needs to be specified to get all data - DefaultFeed = `https://epss.cyentia.com/` + DefaultBaseURL = `https://epss.cyentia.com/` // epssName is the name of the enricher epssName = `clair.epss` @@ -55,13 +58,13 @@ const ( type Enricher struct { driver.NoopUpdater c *http.Client - feed *url.URL + baseURL *url.URL feedPath string } // Config is the configuration for Enricher. type Config struct { - FeedRoot *string `json:"feed_root" yaml:"feed_root"` + BaseURL *string `json:"url" yaml:"url"` } func (e *Enricher) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c *http.Client) error { @@ -76,18 +79,18 @@ func (e *Enricher) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c if err := f(&cfg); err != nil { return err } - if cfg.FeedRoot != nil { + if cfg.BaseURL != nil { // validate the URL format - if _, err := url.Parse(*cfg.FeedRoot); err != nil { - return fmt.Errorf("invalid URL format for FeedRoot: %w", err) + if _, err := url.Parse(*cfg.BaseURL); err != nil { + return fmt.Errorf("invalid URL format for BaseURL: %w", err) } // only .gz file is supported - if strings.HasSuffix(*cfg.FeedRoot, ".gz") { - //overwrite feedPath is cfg provides another feed path - e.feedPath = *cfg.FeedRoot + if strings.HasSuffix(*cfg.BaseURL, ".gz") { + //overwrite feedPath is cfg provides another baseURL path + e.feedPath = *cfg.BaseURL } else { - return fmt.Errorf("invalid feed root: expected a '.gz' file, but got '%q'", *cfg.FeedRoot) + return fmt.Errorf("invalid baseURL root: expected a '.gz' file, but got '%q'", *cfg.BaseURL) } } @@ -98,10 +101,6 @@ func (e *Enricher) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c func (e *Enricher) FetchEnrichment(ctx context.Context, prevFingerprint driver.Fingerprint) (io.ReadCloser, driver.Fingerprint, error) { ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/FetchEnrichment") - if e.feedPath == "" || !strings.HasSuffix(e.feedPath, ".gz") { - return nil, "", fmt.Errorf("invalid feed path: %q must be non-empty and end with '.gz'", e.feedPath) - } - out, err := tmp.NewFile("", "epss.") if err != nil { return nil, "", err @@ -126,8 +125,8 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, prevFingerprint driver.F } defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - return nil, "", fmt.Errorf("unable to fetch file: received status %d", resp.StatusCode) + if err = httputil.CheckResponse(resp, http.StatusOK); err != nil { + return nil, "", fmt.Errorf("unable to fetch file: %w", err) } etag := resp.Header.Get("etag") @@ -149,7 +148,7 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, prevFingerprint driver.F defer gzipReader.Close() csvReader := csv.NewReader(gzipReader) - csvReader.FieldsPerRecord = -1 // Allow variable-length fields + csvReader.FieldsPerRecord = 2 // assume metadata is always in the first line record, err := csvReader.Read() @@ -160,40 +159,45 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, prevFingerprint driver.F var modelVersion, date string for _, field := range record { field = strings.TrimSpace(field) - if strings.HasPrefix(field, "#") { - field = strings.TrimPrefix(field, "#") + field = strings.TrimPrefix(strings.TrimSpace(field), "#") + key, value, found := strings.Cut(field, ":") + if !found { + return nil, "", fmt.Errorf("unexpected metadata field format: %q", field) } - kv := strings.SplitN(field, ":", 2) - if len(kv) == 2 { - switch strings.TrimSpace(kv[0]) { - case "model_version": - modelVersion = strings.TrimSpace(kv[1]) - case "score_date": - date = strings.TrimSpace(kv[1]) - } + switch key { + case "model_version": + modelVersion = value + case "score_date": + date = value } } if modelVersion == "" || date == "" { return nil, "", fmt.Errorf("missing metadata fields in record: %v", record) } + csvReader.Comment = '#' + csvReader.FieldsPerRecord = 3 // Expect exactly 3 fields per record - csvReader.Comment = '#' // Ignore subsequent comment lines + if modelVersion == "" || date == "" { + return nil, "", fmt.Errorf("missing metadata fields in record: %v", record) + } + // Read and validate header line record, err = csvReader.Read() if err != nil { return nil, "", fmt.Errorf("unable to read header line: %w", err) } - if len(record) < 3 || record[0] != "cve" || record[1] != "epss" || record[2] != "percentile" { + + expectedHeaders := []string{"cve", "epss", "percentile"} + if !slices.Equal(record, expectedHeaders) { return nil, "", fmt.Errorf("unexpected CSV headers: %v", record) } - headers := record enc := json.NewEncoder(out) totalCVEs := 0 for { - record, err = csvReader.Read() + record, err := csvReader.Read() if errors.Is(err, io.EOF) { break } @@ -201,18 +205,13 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, prevFingerprint driver.F return nil, "", fmt.Errorf("unable to read line in CSV: %w", err) } - if len(record) != len(headers) { - zlog.Warn(ctx).Str("record", fmt.Sprintf("%v", record)).Msg("skipping record with mismatched fields") - continue - } - - r, err := newItemFeed(record, headers, modelVersion, date) + r, err := newItemFeed(record, modelVersion, date) if err != nil { - zlog.Warn(ctx).Str("record", fmt.Sprintf("%v", record)).Msg("skipping invalid record") + zlog.Warn(ctx).Err(err).Msg("skipping invalid record") continue } - if err = enc.Encode(&r); err != nil { + if err := enc.Encode(&r); err != nil { return nil, "", fmt.Errorf("unable to write JSON line to file: %w", err) } totalCVEs++ @@ -267,9 +266,9 @@ func currentFeedURL() string { formattedDate := currentDate.Format("2006-01-02") filePath := fmt.Sprintf("epss_scores-%s.csv.gz", formattedDate) - feedURL, err := url.Parse(DefaultFeed) + feedURL, err := url.Parse(DefaultBaseURL) if err != nil { - panic(fmt.Errorf("invalid default feed URL: %w", err)) + panic(fmt.Errorf("invalid default baseURL URL: %w", err)) } feedURL.Path = path.Join(feedURL.Path, filePath) @@ -316,7 +315,7 @@ func (e *Enricher) Enrich(ctx context.Context, g driver.EnrichmentGetter, r *cla for m := range t { ts = append(ts, m) } - sort.Strings(ts) + slices.Sort(ts) cveKey := strings.Join(ts, "_") @@ -339,9 +338,6 @@ func (e *Enricher) Enrich(ctx context.Context, g driver.EnrichmentGetter, r *cla } for _, r := range rec { - if _, exists := m[id]; !exists { - m[id] = []json.RawMessage{} - } m[id] = append(m[id], r.Enrichment) } } @@ -357,29 +353,25 @@ func (e *Enricher) Enrich(ctx context.Context, g driver.EnrichmentGetter, r *cla return Type, []json.RawMessage{b}, nil } -func newItemFeed(record []string, headers []string, modelVersion string, scoreDate string) (driver.EnrichmentRecord, error) { - if len(record) != len(headers) { - return driver.EnrichmentRecord{}, fmt.Errorf("record and headers length mismatch") +func newItemFeed(record []string, modelVersion string, scoreDate string) (driver.EnrichmentRecord, error) { + // Assuming record has already been validated to have 3 fields + if len(record) != 3 { + return driver.EnrichmentRecord{}, fmt.Errorf("unexpected record length: %d", len(record)) } var item EPSSItem - for i, value := range record { - switch headers[i] { - case "cve": - item.CVE = value - case "epss": - if f, err := strconv.ParseFloat(value, 64); err == nil { - item.EPSS = f - } else { - return driver.EnrichmentRecord{}, fmt.Errorf("invalid float for epss: %w", err) - } - case "percentile": - if f, err := strconv.ParseFloat(value, 64); err == nil { - item.Percentile = f - } else { - return driver.EnrichmentRecord{}, fmt.Errorf("invalid float for percentile: %w", err) - } - } + item.CVE = record[0] + + if f, err := strconv.ParseFloat(record[1], 64); err == nil { + item.EPSS = f + } else { + return driver.EnrichmentRecord{}, fmt.Errorf("invalid float for epss: %w", err) + } + + if f, err := strconv.ParseFloat(record[2], 64); err == nil { + item.Percentile = f + } else { + return driver.EnrichmentRecord{}, fmt.Errorf("invalid float for percentile: %w", err) } item.ModelVersion = modelVersion @@ -391,7 +383,7 @@ func newItemFeed(record []string, headers []string, modelVersion string, scoreDa } r := driver.EnrichmentRecord{ - Tags: []string{item.CVE}, // CVE field should be set + Tags: []string{item.CVE}, Enrichment: enrichment, } diff --git a/enricher/epss/epss_test.go b/enricher/epss/epss_test.go index 8e7182fdc..042a6711c 100644 --- a/enricher/epss/epss_test.go +++ b/enricher/epss/epss_test.go @@ -15,10 +15,10 @@ import ( "testing" "github.com/google/go-cmp/cmp" + "github.com/quay/zlog" "github.com/quay/claircore" "github.com/quay/claircore/libvuln/driver" - "github.com/quay/zlog" ) func TestConfigure(t *testing.T) { @@ -38,7 +38,7 @@ func TestConfigure(t *testing.T) { Config: func(i interface{}) error { cfg := i.(*Config) s := "http://example.com/" - cfg.FeedRoot = &s + cfg.BaseURL = &s return nil }, Check: func(t *testing.T, err error) { @@ -58,11 +58,11 @@ func TestConfigure(t *testing.T) { }, }, { - Name: "BadURL", // Malformed URL in FeedRoot + Name: "BadURL", // Malformed URL in BaseURL Config: func(i interface{}) error { cfg := i.(*Config) s := "http://[notaurl:/" - cfg.FeedRoot = &s + cfg.BaseURL = &s return nil }, Check: func(t *testing.T, err error) { @@ -72,11 +72,11 @@ func TestConfigure(t *testing.T) { }, }, { - Name: "ValidGZURL", // Proper .gz URL in FeedRoot + Name: "ValidGZURL", // Proper .gz URL in BaseURL Config: func(i interface{}) error { cfg := i.(*Config) s := "http://example.com/epss_scores-2024-10-25.csv.gz" - cfg.FeedRoot = &s + cfg.BaseURL = &s return nil }, Check: func(t *testing.T, err error) { @@ -169,7 +169,7 @@ func mockServer(t *testing.T) *httptest.Server { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { switch path.Ext(r.URL.Path) { - case ".gz": // only gz feed is supported + case ".gz": // only gz baseURL is supported w.Header().Set("etag", etagValue) f, err := os.Open(filepath.Join(root, "data.csv")) @@ -207,7 +207,7 @@ func (tc fetchTestcase) Run(ctx context.Context, srv *httptest.Server) func(*tes t.Fatal("expected Config type for i, but got a different type") } u := srv.URL + "/data.csv.gz" - cfg.FeedRoot = &u + cfg.BaseURL = &u return nil } @@ -259,7 +259,7 @@ func (tc parseTestcase) Run(ctx context.Context, srv *httptest.Server) func(*tes t.Fatal("assertion failed") } u := srv.URL + "/data.csv.gz" - cfg.FeedRoot = &u + cfg.BaseURL = &u return nil } if err := e.Configure(ctx, f, srv.Client()); err != nil { @@ -313,7 +313,7 @@ func TestEnrich(t *testing.T) { t.Fatal("assertion failed") } u := srv.URL + "/data.csv.gz" - cfg.FeedRoot = &u + cfg.BaseURL = &u return nil } if err := e.Configure(ctx, f, srv.Client()); err != nil { From b30694090d0e0c7bf6095028c495cd573deb1b76 Mon Sep 17 00:00:00 2001 From: daynewlee Date: Sun, 8 Dec 2024 15:11:08 -0600 Subject: [PATCH 4/5] epss: add UUID for customized URL Signed-off-by: daynewlee --- enricher/epss/epss.go | 56 +++++++++++++++++++------------------- enricher/epss/epss_test.go | 16 +++++------ 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/enricher/epss/epss.go b/enricher/epss/epss.go index 87fd37a60..ef95a1ec8 100644 --- a/enricher/epss/epss.go +++ b/enricher/epss/epss.go @@ -1,3 +1,4 @@ +// Package epss provides a epss enricher. package epss import ( @@ -16,6 +17,7 @@ import ( "strings" "time" + "github.com/google/uuid" "github.com/quay/zlog" "github.com/quay/claircore" @@ -42,7 +44,7 @@ type EPSSItem struct { const ( // Type is the type of data returned from the Enricher's Enrich method. - Type = `message/vnd.clair.map.vulnerability; enricher=clair.epss schema=https://csrc.nist.gov/schema/nvd/baseURL/1.1/cvss-v3.x.json` + Type = `message/vnd.clair.map.vulnerability; enricher=clair.epss schema=none` // DefaultBaseURL is the default place to look for EPSS feeds. // epss_scores-YYYY-MM-DD.csv.gz needs to be specified to get all data @@ -64,7 +66,7 @@ type Enricher struct { // Config is the configuration for Enricher. type Config struct { - BaseURL *string `json:"url" yaml:"url"` + URL *string `json:"url" yaml:"url"` } func (e *Enricher) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c *http.Client) error { @@ -73,24 +75,23 @@ func (e *Enricher) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c e.c = c e.feedPath = currentFeedURL() if f == nil { - zlog.Debug(ctx).Msg("No configuration provided; proceeding with default settings") - return nil + return fmt.Errorf("configuration is nil") } if err := f(&cfg); err != nil { return err } - if cfg.BaseURL != nil { + if cfg.URL != nil { // validate the URL format - if _, err := url.Parse(*cfg.BaseURL); err != nil { - return fmt.Errorf("invalid URL format for BaseURL: %w", err) + if _, err := url.Parse(*cfg.URL); err != nil { + return fmt.Errorf("invalid URL format for URL: %w", err) } // only .gz file is supported - if strings.HasSuffix(*cfg.BaseURL, ".gz") { + if strings.HasSuffix(*cfg.URL, ".gz") { //overwrite feedPath is cfg provides another baseURL path - e.feedPath = *cfg.BaseURL + e.feedPath = *cfg.URL } else { - return fmt.Errorf("invalid baseURL root: expected a '.gz' file, but got '%q'", *cfg.BaseURL) + return fmt.Errorf("invalid baseURL root: expected a '.gz' file, but got '%q'", *cfg.URL) } } @@ -128,14 +129,19 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, prevFingerprint driver.F if err = httputil.CheckResponse(resp, http.StatusOK); err != nil { return nil, "", fmt.Errorf("unable to fetch file: %w", err) } - - etag := resp.Header.Get("etag") - if etag == "" { - return nil, "", fmt.Errorf("ETag not found in response headers") + var str string + var newFingerprint driver.Fingerprint + str = resp.Header.Get("etag") + if str == "" { + newUUID, err := uuid.NewRandom() + if err != nil { + return nil, "", fmt.Errorf("failed to generate UUID: %w", err) + } + // Generate a UUID for customized URL + str = newUUID.String() + zlog.Warn(ctx).Msg("ETag not found; generated UUID for fingerprint") } - - newFingerprint := driver.Fingerprint(etag) - + newFingerprint = driver.Fingerprint(str) if prevFingerprint == newFingerprint { zlog.Info(ctx).Str("fingerprint", string(newFingerprint)).Msg("file unchanged; skipping processing") return nil, prevFingerprint, nil @@ -158,7 +164,6 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, prevFingerprint driver.F var modelVersion, date string for _, field := range record { - field = strings.TrimSpace(field) field = strings.TrimPrefix(strings.TrimSpace(field), "#") key, value, found := strings.Cut(field, ":") if !found { @@ -176,11 +181,8 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, prevFingerprint driver.F return nil, "", fmt.Errorf("missing metadata fields in record: %v", record) } csvReader.Comment = '#' - csvReader.FieldsPerRecord = 3 // Expect exactly 3 fields per record - if modelVersion == "" || date == "" { - return nil, "", fmt.Errorf("missing metadata fields in record: %v", record) - } + csvReader.FieldsPerRecord = 3 // Expect exactly 3 fields per record // Read and validate header line record, err = csvReader.Read() @@ -230,9 +232,7 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, prevFingerprint driver.F func (e *Enricher) ParseEnrichment(ctx context.Context, rc io.ReadCloser) ([]driver.EnrichmentRecord, error) { ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/ParseEnrichment") - defer func() { - _ = rc.Close() - }() + defer rc.Close() dec := json.NewDecoder(rc) ret := make([]driver.EnrichmentRecord, 0, 250_000) @@ -262,8 +262,8 @@ func (*Enricher) Name() string { } func currentFeedURL() string { - currentDate := time.Now() - formattedDate := currentDate.Format("2006-01-02") + yesterday := time.Now().AddDate(0, 0, -1) // Get yesterday's date + formattedDate := yesterday.Format("2006-01-02") filePath := fmt.Sprintf("epss_scores-%s.csv.gz", formattedDate) feedURL, err := url.Parse(DefaultBaseURL) @@ -354,7 +354,7 @@ func (e *Enricher) Enrich(ctx context.Context, g driver.EnrichmentGetter, r *cla } func newItemFeed(record []string, modelVersion string, scoreDate string) (driver.EnrichmentRecord, error) { - // Assuming record has already been validated to have 3 fields + // Validate the record has the expected length if len(record) != 3 { return driver.EnrichmentRecord{}, fmt.Errorf("unexpected record length: %d", len(record)) } diff --git a/enricher/epss/epss_test.go b/enricher/epss/epss_test.go index 042a6711c..32eeefcbf 100644 --- a/enricher/epss/epss_test.go +++ b/enricher/epss/epss_test.go @@ -38,7 +38,7 @@ func TestConfigure(t *testing.T) { Config: func(i interface{}) error { cfg := i.(*Config) s := "http://example.com/" - cfg.BaseURL = &s + cfg.URL = &s return nil }, Check: func(t *testing.T, err error) { @@ -58,11 +58,11 @@ func TestConfigure(t *testing.T) { }, }, { - Name: "BadURL", // Malformed URL in BaseURL + Name: "BadURL", // Malformed URL in URL Config: func(i interface{}) error { cfg := i.(*Config) s := "http://[notaurl:/" - cfg.BaseURL = &s + cfg.URL = &s return nil }, Check: func(t *testing.T, err error) { @@ -72,11 +72,11 @@ func TestConfigure(t *testing.T) { }, }, { - Name: "ValidGZURL", // Proper .gz URL in BaseURL + Name: "ValidGZURL", // Proper .gz URL in URL Config: func(i interface{}) error { cfg := i.(*Config) s := "http://example.com/epss_scores-2024-10-25.csv.gz" - cfg.BaseURL = &s + cfg.URL = &s return nil }, Check: func(t *testing.T, err error) { @@ -207,7 +207,7 @@ func (tc fetchTestcase) Run(ctx context.Context, srv *httptest.Server) func(*tes t.Fatal("expected Config type for i, but got a different type") } u := srv.URL + "/data.csv.gz" - cfg.BaseURL = &u + cfg.URL = &u return nil } @@ -259,7 +259,7 @@ func (tc parseTestcase) Run(ctx context.Context, srv *httptest.Server) func(*tes t.Fatal("assertion failed") } u := srv.URL + "/data.csv.gz" - cfg.BaseURL = &u + cfg.URL = &u return nil } if err := e.Configure(ctx, f, srv.Client()); err != nil { @@ -313,7 +313,7 @@ func TestEnrich(t *testing.T) { t.Fatal("assertion failed") } u := srv.URL + "/data.csv.gz" - cfg.BaseURL = &u + cfg.URL = &u return nil } if err := e.Configure(ctx, f, srv.Client()); err != nil { From c5d4b9a657cabc8ba2fa8f1bf6e5e0d80eb1e861 Mon Sep 17 00:00:00 2001 From: daynewlee Date: Fri, 13 Dec 2024 13:40:54 -0600 Subject: [PATCH 5/5] epss: no new fingerprint when etag is missing Signed-off-by: daynewlee --- enricher/epss/epss.go | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/enricher/epss/epss.go b/enricher/epss/epss.go index ef95a1ec8..fcbc013ee 100644 --- a/enricher/epss/epss.go +++ b/enricher/epss/epss.go @@ -17,7 +17,6 @@ import ( "strings" "time" - "github.com/google/uuid" "github.com/quay/zlog" "github.com/quay/claircore" @@ -129,24 +128,16 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, prevFingerprint driver.F if err = httputil.CheckResponse(resp, http.StatusOK); err != nil { return nil, "", fmt.Errorf("unable to fetch file: %w", err) } - var str string + var newFingerprint driver.Fingerprint - str = resp.Header.Get("etag") - if str == "" { - newUUID, err := uuid.NewRandom() - if err != nil { - return nil, "", fmt.Errorf("failed to generate UUID: %w", err) + if etag := resp.Header.Get("etag"); etag != "" { + newFingerprint = driver.Fingerprint(etag) + if prevFingerprint == newFingerprint { + zlog.Info(ctx).Str("fingerprint", string(newFingerprint)).Msg("file unchanged; skipping processing") + return nil, prevFingerprint, nil } - // Generate a UUID for customized URL - str = newUUID.String() - zlog.Warn(ctx).Msg("ETag not found; generated UUID for fingerprint") + newFingerprint = driver.Fingerprint(etag) } - newFingerprint = driver.Fingerprint(str) - if prevFingerprint == newFingerprint { - zlog.Info(ctx).Str("fingerprint", string(newFingerprint)).Msg("file unchanged; skipping processing") - return nil, prevFingerprint, nil - } - gzipReader, err := gzip.NewReader(resp.Body) if err != nil { return nil, "", fmt.Errorf("unable to decompress file: %w", err)