diff --git a/enricher/epss/epss.go b/enricher/epss/epss.go index c3642099f..97e96db32 100644 --- a/enricher/epss/epss.go +++ b/enricher/epss/epss.go @@ -1,9 +1,9 @@ package epss import ( + "bufio" "compress/gzip" "context" - "encoding/csv" "encoding/json" "fmt" "github.com/google/uuid" @@ -99,9 +99,8 @@ func (e *Enricher) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c return nil } -func (e *Enricher) FetchEnrichment(ctx context.Context, fingerprint driver.Fingerprint) (io.ReadCloser, driver.Fingerprint, error) { +func (e *Enricher) FetchEnrichment(ctx context.Context, _ driver.Fingerprint) (io.ReadCloser, driver.Fingerprint, error) { ctx = zlog.ContextWithValues(ctx, "component", "enricher/epss/Enricher/FetchEnrichment") - // Force a new hint, to signal updaters that this is new data. newUUID := uuid.New() hint := driver.Fingerprint(newUUID.String()) zlog.Info(ctx).Str("hint", string(hint)).Msg("starting fetch") @@ -139,29 +138,42 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, fingerprint driver.Finge } defer gzipReader.Close() - csvReader := csv.NewReader(gzipReader) - headers, err := csvReader.Read() // Column names - if err != nil { - return nil, "", fmt.Errorf("failed to read CSV headers: %w", err) - } - + scanner := bufio.NewScanner(gzipReader) + var headers []string enc := json.NewEncoder(out) totalCVEs := 0 - for { - record, err := csvReader.Read() - if err == io.EOF { - break + // get headers + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(line, "#") || line == "" { + continue // Skip comment or empty lines } - if err != nil { - return nil, "", fmt.Errorf("failed to read CSV row: %w", err) + headers = strings.Split(line, ",") + break + } + + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(line, "#") || line == "" { + continue + } + + record := strings.Split(line, ",") + if len(record) != len(headers) { + zlog.Warn(ctx).Str("line", line).Msg("skipping line with mismatched fields") + continue // Skip lines with mismatched number of fields } item := make(map[string]string) for i, value := range record { item[headers[i]] = value } + enrichment, err := json.Marshal(item) + if err != nil { + return nil, "", fmt.Errorf("failed to encode enrichment: %w", err) + } r := driver.EnrichmentRecord{ Tags: []string{item["cve"]}, @@ -169,11 +181,15 @@ func (e *Enricher) FetchEnrichment(ctx context.Context, fingerprint driver.Finge } if err = enc.Encode(&r); err != nil { - return nil, "", fmt.Errorf("encoding enrichment: %w", err) + return nil, "", fmt.Errorf("failed to write JSON line to file: %w", err) } totalCVEs++ } + if err := scanner.Err(); err != nil { + return nil, "", fmt.Errorf("error reading file: %w", err) + } + zlog.Info(ctx).Int("totalCVEs", totalCVEs).Msg("processed CVEs") if _, err := out.Seek(0, io.SeekStart); err != nil { return nil, hint, fmt.Errorf("unable to reset file pointer: %w", err) diff --git a/enricher/epss/epss_test.go b/enricher/epss/epss_test.go new file mode 100644 index 000000000..a7c7f9beb --- /dev/null +++ b/enricher/epss/epss_test.go @@ -0,0 +1,217 @@ +package epss + +import ( + "compress/gzip" + "context" + "errors" + "github.com/quay/claircore/libvuln/driver" + "github.com/quay/zlog" + "io" + "net/http" + "net/http/httptest" + "os" + "path" + "path/filepath" + "testing" +) + +func TestConfigure(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) + tt := []configTestcase{ + { + Name: "None", // No configuration provided, should use default + Check: func(t *testing.T, err error) { + if err != nil { + t.Errorf("unexpected error: %v", err) + } + }, + }, + { + Name: "OK", // URL without .gz will be replaced with default URL + Config: func(i interface{}) error { + cfg := i.(*Config) + s := "http://example.com/" + cfg.FeedRoot = &s + return nil + }, + Check: func(t *testing.T, err error) { + if err != nil { + t.Errorf("unexpected error with .gz URL: %v", err) + } + }, + }, + + { + Name: "UnmarshalError", // Expected error on unmarshaling + Config: func(_ interface{}) error { return errors.New("expected error") }, + Check: func(t *testing.T, err error) { + if err == nil { + t.Error("expected unmarshal error, but got none") + } + }, + }, + { + Name: "BadURL", // Malformed URL in FeedRoot + Config: func(i interface{}) error { + cfg := i.(*Config) + s := "http://[notaurl:/" + cfg.FeedRoot = &s + return nil + }, + Check: func(t *testing.T, err error) { + if err == nil { + t.Error("expected URL parse error, but got none") + } + }, + }, + { + Name: "ValidGZURL", // Proper .gz URL in FeedRoot + Config: func(i interface{}) error { + cfg := i.(*Config) + s := "http://example.com/epss_scores-2024-10-25.csv.gz" + cfg.FeedRoot = &s + return nil + }, + Check: func(t *testing.T, err error) { + if err != nil { + t.Errorf("unexpected error with .gz URL: %v", err) + } + }, + }, + } + + for _, tc := range tt { + t.Run(tc.Name, tc.Run(ctx)) + } +} + +func (tc configTestcase) Run(ctx context.Context) func(*testing.T) { + e := &Enricher{} + return func(t *testing.T) { + ctx := zlog.Test(ctx, t) + f := tc.Config + if f == nil { + f = noopConfig + } + err := e.Configure(ctx, f, nil) + if tc.Check == nil { + if err != nil { + t.Errorf("unexpected err: %v", err) + } + return + } + tc.Check(t, err) + } +} + +func TestFetch(t *testing.T) { + t.Parallel() + ctx := zlog.Test(context.Background(), t) + srv := mockServer(t) + + tt := []fetchTestcase{ + { + Name: "Fetch OK", // Tests successful fetch and data processing + Check: func(t *testing.T, rc io.ReadCloser, fp driver.Fingerprint, err error) { + if err != nil { + t.Errorf("unexpected error: %v", err) + return + } + defer rc.Close() + if rc == nil { + t.Error("expected non-nil ReadCloser for initial fetch") + } + if fp == driver.Fingerprint("") { + t.Error("expected non-empty fingerprint") + } + + // Further check if data is correctly read and structured + data, err := io.ReadAll(rc) + if err != nil { + t.Errorf("failed to read enrichment data: %v", err) + } + t.Logf("enrichment data: %s", string(data)) + }, + }, + } + + for _, tc := range tt { + t.Run(tc.Name, tc.Run(ctx, srv)) + } +} + +type fetchTestcase struct { + Check func(*testing.T, io.ReadCloser, driver.Fingerprint, error) + Name string + Hint string +} + +type configTestcase struct { + Config func(interface{}) error + Check func(*testing.T, error) + Name string +} + +func noopConfig(_ interface{}) error { return nil } + +func mockServer(t *testing.T) *httptest.Server { + const root = `testdata/` + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch path.Ext(r.URL.Path) { + case ".gz": // only gz feed is supported + f, err := os.Open(filepath.Join(root, "data.csv")) + if err != nil { + t.Errorf("open failed: %v", err) + w.WriteHeader(http.StatusInternalServerError) + break + } + defer f.Close() + gz := gzip.NewWriter(w) + defer gz.Close() + if _, err := io.Copy(gz, f); err != nil { + t.Errorf("write error: %v", err) + w.WriteHeader(http.StatusInternalServerError) + break + } + default: + t.Errorf("unknown request path: %q", r.URL.Path) + w.WriteHeader(http.StatusBadRequest) + } + })) + t.Cleanup(srv.Close) + return srv +} + +func (tc fetchTestcase) Run(ctx context.Context, srv *httptest.Server) func(*testing.T) { + return func(t *testing.T) { + e := &Enricher{} + ctx := zlog.Test(ctx, t) + configFunc := func(i interface{}) error { + cfg, ok := i.(*Config) + if !ok { + t.Fatal("expected Config type for i, but got a different type") + } + u := srv.URL + "/data.csv.gz" + cfg.FeedRoot = &u + return nil + } + + // Configure Enricher with mock server client and custom config + if err := e.Configure(ctx, configFunc, srv.Client()); err != nil { + t.Errorf("unexpected error: %v", err) + return + } + + // Run FetchEnrichment and validate the result using Check + rc, fp, err := e.FetchEnrichment(ctx, driver.Fingerprint(tc.Hint)) + if rc != nil { + defer rc.Close() + } + if tc.Check != nil { + tc.Check(t, rc, fp, err) + } else if err != nil { + t.Errorf("unexpected error: %v", err) + } + } +} diff --git a/enricher/epss/testdata/data.csv b/enricher/epss/testdata/data.csv new file mode 100644 index 000000000..e65c76241 --- /dev/null +++ b/enricher/epss/testdata/data.csv @@ -0,0 +1,34 @@ +#model_version:v2023.03.01,score_date:2024-10-25T00:00:00+0000 +cve,epss,percentile +CVE-1999-0005,0.91963,0.99030 +CVE-1999-0006,0.03341,0.91563 +CVE-1999-0007,0.00073,0.32734 +CVE-1999-0008,0.13967,0.95792 +CVE-1999-0009,0.09014,0.94772 +CVE-1999-0010,0.00292,0.69634 +CVE-2022-34665,0.00042,0.05099 +CVE-2022-34666,0.00042,0.05099 +CVE-2022-34667,0.00073,0.32799 +CVE-2022-34668,0.00311,0.70519 +CVE-2022-34669,0.00044,0.13516 +CVE-2022-34670,0.00044,0.13516 +CVE-2022-34671,0.00142,0.50809 +CVE-2022-34672,0.00044,0.13516 +CVE-2022-34673,0.00044,0.13516 +CVE-2022-34674,0.00047,0.18133 +CVE-2022-34675,0.00044,0.13516 +CVE-2024-9972,0.00091,0.39923 +CVE-2024-9973,0.00063,0.28042 +CVE-2024-9974,0.00063,0.28042 +CVE-2024-9975,0.00063,0.28515 +CVE-2024-9976,0.00063,0.28042 +CVE-2024-9977,0.00046,0.17291 +CVE-2024-9979,0.00045,0.16569 +CVE-2024-9980,0.00050,0.20281 +CVE-2024-9981,0.00050,0.20281 +CVE-2024-9982,0.00091,0.39923 +CVE-2024-9983,0.00090,0.39372 +CVE-2024-9984,0.00091,0.39923 +CVE-2024-9985,0.00091,0.39923 +CVE-2024-9986,0.00165,0.53867 +CVE-2024-9987,0.00043,0.09778