diff --git a/file.go b/file.go index 5c8bd915a..bf154f8d8 100644 --- a/file.go +++ b/file.go @@ -5,6 +5,7 @@ type FileKind string const ( FileKindWhiteout = FileKind("whiteout") + FileKindRPM = FileKind("rpm") ) // File represents interesting files that are found in the layer. diff --git a/linux/coalescer.go b/linux/coalescer.go index d68ec79b5..2dcebcc92 100644 --- a/linux/coalescer.go +++ b/linux/coalescer.go @@ -48,6 +48,7 @@ func (c *Coalescer) Coalesce(ctx context.Context, layerArtifacts []*indexer.Laye if len(artifacts.Pkgs) == 0 { continue } + lh := artifacts.Hash.String() tmp := make(map[string][]*claircore.Package) for _, pkg := range artifacts.Pkgs { @@ -58,6 +59,12 @@ func (c *Coalescer) Coalesce(ctx context.Context, layerArtifacts []*indexer.Laye for db, pkgs := range tmp { dbs[db] = pkgs } + for _, f := range artifacts.Files { + if c.ir.Files == nil { + c.ir.Files = make(map[string][]claircore.File) + } + c.ir.Files[lh] = append(c.ir.Files[lh], f) + } } for db, packages := range dbs { diff --git a/rpm/ecosystem.go b/rpm/ecosystem.go index 9ee224b5f..e6691ff70 100644 --- a/rpm/ecosystem.go +++ b/rpm/ecosystem.go @@ -28,6 +28,9 @@ func NewEcosystem(_ context.Context) *indexer.Ecosystem { RepositoryScanners: func(ctx context.Context) ([]indexer.RepositoryScanner, error) { return []indexer.RepositoryScanner{}, nil }, + FileScanners: func(ctx context.Context) ([]indexer.FileScanner, error) { + return []indexer.FileScanner{&FileScanner{}}, nil + }, Coalescer: func(ctx context.Context) (indexer.Coalescer, error) { return linux.NewCoalescer(), nil }, diff --git a/rpm/filescanner.go b/rpm/filescanner.go new file mode 100644 index 000000000..bb7c4907f --- /dev/null +++ b/rpm/filescanner.go @@ -0,0 +1,89 @@ +package rpm + +import ( + "context" + "fmt" + "io/fs" + "runtime/trace" + + "github.com/quay/zlog" + + "github.com/quay/claircore" + "github.com/quay/claircore/indexer" +) + +const ( + scannerName = "rpm" + scannerVersion = "1" + scannerKind = "file" +) + +var ( + _ indexer.FileScanner = (*FileScanner)(nil) + _ indexer.VersionedScanner = (*FileScanner)(nil) +) + +// FileScanner implements [indexer.FileScanner], it examines RPM +// databases and reports installed files. +type FileScanner struct{} + +// Name implements [indexer.VersionedScanner]. +func (*FileScanner) Name() string { return scannerName } + +// Version implements [indexer.VersionedScanner]. +func (*FileScanner) Version() string { return scannerVersion } + +// Kind implements [indexer.VersionedScanner] +func (*FileScanner) Kind() string { return scannerKind } + +// Scan reports any found Files that were installed via RPMs in the +// layer. +// +// It's an expected outcome to return (nil, nil) when no RPM packages are found in the Layer. +func (s *FileScanner) Scan(ctx context.Context, layer *claircore.Layer) ([]claircore.File, error) { + if err := ctx.Err(); err != nil { + return nil, err + } + defer trace.StartRegion(ctx, "FileScanner.Scan").End() + trace.Log(ctx, "layer", layer.Hash.String()) + ctx = zlog.ContextWithValues(ctx, + "component", "rpm/FileScanner.Scan", + "version", s.Version(), + "layer", layer.Hash.String()) + zlog.Debug(ctx).Msg("start") + defer zlog.Debug(ctx).Msg("done") + + sys, err := layer.FS() + if err != nil { + return nil, fmt.Errorf("rpm: unable to open layer: %w", err) + } + + found := make([]foundDB, 0) + if err := fs.WalkDir(sys, ".", findDBs(ctx, &found, sys)); err != nil { + return nil, fmt.Errorf("rpm: error walking fs: %w", err) + } + if len(found) == 0 { + return nil, nil + } + + done := map[string]struct{}{} + files := []claircore.File{} + + zlog.Debug(ctx).Int("count", len(found)).Msg("found possible databases") + for _, db := range found { + ctx := zlog.ContextWithValues(ctx, "db", db.String()) + zlog.Debug(ctx).Msg("examining database") + if _, ok := done[db.Path]; ok { + zlog.Debug(ctx).Msg("already seen, skipping") + continue + } + done[db.Path] = struct{}{} + fs, err := getDBObjects(ctx, sys, db, filesFromDB) + if err != nil { + return nil, fmt.Errorf("rpm: error getting native DBs: %w", err) + } + files = append(files, fs...) + } + + return files, nil +} diff --git a/rpm/filescanner_test.go b/rpm/filescanner_test.go new file mode 100644 index 000000000..b016081f6 --- /dev/null +++ b/rpm/filescanner_test.go @@ -0,0 +1,73 @@ +package rpm + +import ( + "context" + "testing" + + "github.com/quay/zlog" + + "github.com/quay/claircore" + "github.com/quay/claircore/test" +) + +var testcases = []struct { + name string + expectedFiles int + ref test.LayerRef +}{ + { + name: "python files", + expectedFiles: 821, + ref: test.LayerRef{ + Registry: "registry.access.redhat.com", + Name: "ubi9/nodejs-18", + Digest: `sha256:1ae06b64755052cef4c32979aded82a18f664c66fa7b50a6d2924afac2849c6e`, + }, + }, +} + +func TestFileScannerLayer(t *testing.T) { + ctx := zlog.Test(context.Background(), t) + var s FileScanner + a := test.NewCachedArena(t) + t.Cleanup(func() { + if err := a.Close(ctx); err != nil { + t.Error(err) + } + }) + + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + ctx := zlog.Test(ctx, t) + a.LoadLayerFromRegistry(ctx, t, tt.ref) + r := a.Realizer(ctx).(*test.CachedRealizer) + t.Cleanup(func() { + if err := r.Close(); err != nil { + t.Error(err) + } + }) + ls, err := r.RealizeDescriptions(ctx, []claircore.LayerDescription{ + { + Digest: tt.ref.Digest, + URI: "http://example.com", + MediaType: test.MediaType, + Headers: make(map[string][]string), + }, + }) + if err != nil { + t.Fatal(err) + } + + got, err := s.Scan(ctx, &ls[0]) + if err != nil { + t.Error(err) + } + + t.Logf("found %d files", len(got)) + if len(got) != tt.expectedFiles { + t.Fatalf("expected %d files but got %d", tt.expectedFiles, len(got)) + } + t.Log(got) + }) + } +} diff --git a/rpm/native_db.go b/rpm/native_db.go index 7aee7b45e..bfa88c951 100644 --- a/rpm/native_db.go +++ b/rpm/native_db.go @@ -5,6 +5,8 @@ import ( "context" "fmt" "io" + "io/fs" + "os" "path" "regexp" "runtime/trace" @@ -14,7 +16,10 @@ import ( "golang.org/x/crypto/openpgp/packet" "github.com/quay/claircore" + "github.com/quay/claircore/rpm/bdb" "github.com/quay/claircore/rpm/internal/rpm" + "github.com/quay/claircore/rpm/ndb" + "github.com/quay/claircore/rpm/sqlite" ) // NativeDB is the interface implemented for in-process RPM database handlers. @@ -23,6 +28,128 @@ type nativeDB interface { Validate(context.Context) error } +// ObjectResponse is a generic object that we're expecting to extract from +// RPM database, currently either a Package or a File. +type ObjectResponse interface { + []*claircore.Package | []claircore.File +} + +// getDBObjects does all the dirty work of extracting generic claircore objects +// from an RPM database. Provide it with a foundDB, the sys and a fn extract function +// it will create a implementation agnostic nativeDB and extract specific claircore +// objects from it. +func getDBObjects[T ObjectResponse](ctx context.Context, sys fs.FS, db foundDB, fn func(context.Context, string, nativeDB) (T, error)) (T, error) { + var nat nativeDB // see native_db.go:/nativeDB + switch db.Kind { + case kindSQLite: + r, err := sys.Open(path.Join(db.Path, `rpmdb.sqlite`)) + if err != nil { + return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) + } + defer func() { + if err := r.Close(); err != nil { + zlog.Warn(ctx).Err(err).Msg("unable to close sqlite db") + } + }() + f, err := os.CreateTemp(os.TempDir(), `rpmdb.sqlite.*`) + if err != nil { + return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) + } + defer func() { + if err := os.Remove(f.Name()); err != nil { + zlog.Error(ctx).Err(err).Msg("unable to unlink sqlite db") + } + if err := f.Close(); err != nil { + zlog.Warn(ctx).Err(err).Msg("unable to close sqlite db") + } + }() + zlog.Debug(ctx).Str("file", f.Name()).Msg("copying sqlite db out of FS") + if _, err := io.Copy(f, r); err != nil { + return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) + } + if err := f.Sync(); err != nil { + return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) + } + sdb, err := sqlite.Open(f.Name()) + if err != nil { + return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) + } + defer sdb.Close() + nat = sdb + case kindBDB: + f, err := sys.Open(path.Join(db.Path, `Packages`)) + if err != nil { + return nil, fmt.Errorf("rpm: error reading bdb db: %w", err) + } + defer f.Close() + r, done, err := mkAt(ctx, db.Kind, f) + if err != nil { + return nil, fmt.Errorf("rpm: error reading bdb db: %w", err) + } + defer done() + var bpdb bdb.PackageDB + if err := bpdb.Parse(r); err != nil { + return nil, fmt.Errorf("rpm: error parsing bdb db: %w", err) + } + nat = &bpdb + case kindNDB: + f, err := sys.Open(path.Join(db.Path, `Packages.db`)) + if err != nil { + return nil, fmt.Errorf("rpm: error reading ndb db: %w", err) + } + defer f.Close() + r, done, err := mkAt(ctx, db.Kind, f) + if err != nil { + return nil, fmt.Errorf("rpm: error reading ndb db: %w", err) + } + defer done() + var npdb ndb.PackageDB + if err := npdb.Parse(r); err != nil { + return nil, fmt.Errorf("rpm: error parsing ndb db: %w", err) + } + nat = &npdb + default: + panic("programmer error: bad kind: " + db.Kind.String()) + } + if err := nat.Validate(ctx); err != nil { + zlog.Warn(ctx). + Err(err). + Msg("rpm: invalid native DB") + return nil, nil + } + ps, err := fn(ctx, db.String(), nat) + if err != nil { + return nil, fmt.Errorf("rpm: error reading native db: %w", err) + } + + return ps, nil +} + +func filesFromDB(ctx context.Context, _ string, db nativeDB) ([]claircore.File, error) { + rds, err := db.AllHeaders(ctx) + if err != nil { + return nil, fmt.Errorf("rpm: error reading headers: %w", err) + } + fs := []claircore.File{} + for _, rd := range rds { + var h rpm.Header + if err := h.Parse(ctx, rd); err != nil { + return nil, err + } + var info Info + if err := info.Load(ctx, &h); err != nil { + return nil, err + } + for _, f := range info.Filenames { + fs = append(fs, claircore.File{ + Kind: claircore.FileKindRPM, + Path: f, + }) + } + } + return fs, nil +} + // PackagesFromDB extracts the packages from the RPM headers provided by // the database. func packagesFromDB(ctx context.Context, pkgdb string, db nativeDB) ([]*claircore.Package, error) { @@ -299,3 +426,106 @@ func constructHint(b *strings.Builder, info *Info) string { } return b.String() } + +func findDBs(ctx context.Context, out *[]foundDB, sys fs.FS) fs.WalkDirFunc { + return func(p string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + + dir, n := path.Split(p) + dir = path.Clean(dir) + switch n { + case `Packages`: + f, err := sys.Open(p) + if err != nil { + return err + } + ok := bdb.CheckMagic(ctx, f) + f.Close() + if !ok { + return nil + } + *out = append(*out, foundDB{ + Path: dir, + Kind: kindBDB, + }) + case `rpmdb.sqlite`: + *out = append(*out, foundDB{ + Path: dir, + Kind: kindSQLite, + }) + case `Packages.db`: + f, err := sys.Open(p) + if err != nil { + return err + } + ok := ndb.CheckMagic(ctx, f) + f.Close() + if !ok { + return nil + } + *out = append(*out, foundDB{ + Path: dir, + Kind: kindNDB, + }) + } + return nil + } +} + +func mkAt(ctx context.Context, k dbKind, f fs.File) (io.ReaderAt, func(), error) { + if r, ok := f.(io.ReaderAt); ok { + return r, func() {}, nil + } + spool, err := os.CreateTemp(os.TempDir(), `Packages.`+k.String()+`.`) + if err != nil { + return nil, nil, fmt.Errorf("rpm: error spooling db: %w", err) + } + ctx = zlog.ContextWithValues(ctx, "file", spool.Name()) + if err := os.Remove(spool.Name()); err != nil { + zlog.Error(ctx).Err(err).Msg("unable to remove spool; file leaked!") + } + zlog.Debug(ctx). + Msg("copying db out of fs.FS") + if _, err := io.Copy(spool, f); err != nil { + if err := spool.Close(); err != nil { + zlog.Warn(ctx).Err(err).Msg("unable to close spool") + } + return nil, nil, fmt.Errorf("rpm: error spooling db: %w", err) + } + return spool, closeSpool(ctx, spool), nil +} + +func closeSpool(ctx context.Context, f *os.File) func() { + return func() { + if err := f.Close(); err != nil { + zlog.Warn(ctx).Err(err).Msg("unable to close spool") + } + } +} + +type dbKind uint + +//go:generate -command stringer go run golang.org/x/tools/cmd/stringer +//go:generate stringer -linecomment -type dbKind + +const ( + _ dbKind = iota + + kindBDB // bdb + kindSQLite // sqlite + kindNDB // ndb +) + +type foundDB struct { + Path string + Kind dbKind +} + +func (f foundDB) String() string { + return f.Kind.String() + ":" + f.Path +} diff --git a/rpm/packagescanner.go b/rpm/packagescanner.go index 0d48cf9f5..d581f624b 100644 --- a/rpm/packagescanner.go +++ b/rpm/packagescanner.go @@ -4,19 +4,13 @@ package rpm import ( "context" "fmt" - "io" "io/fs" - "os" - "path" "runtime/trace" "github.com/quay/zlog" "github.com/quay/claircore" "github.com/quay/claircore/indexer" - "github.com/quay/claircore/rpm/bdb" - "github.com/quay/claircore/rpm/ndb" - "github.com/quay/claircore/rpm/sqlite" ) const ( @@ -89,194 +83,12 @@ func (ps *Scanner) Scan(ctx context.Context, layer *claircore.Layer) ([]*clairco continue } done[db.Path] = struct{}{} - - var nat nativeDB // see native_db.go:/nativeDB - switch db.Kind { - case kindSQLite: - r, err := sys.Open(path.Join(db.Path, `rpmdb.sqlite`)) - if err != nil { - return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) - } - defer func() { - if err := r.Close(); err != nil { - zlog.Warn(ctx).Err(err).Msg("unable to close sqlite db") - } - }() - f, err := os.CreateTemp(os.TempDir(), `rpmdb.sqlite.*`) - if err != nil { - return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) - } - defer func() { - if err := os.Remove(f.Name()); err != nil { - zlog.Error(ctx).Err(err).Msg("unable to unlink sqlite db") - } - if err := f.Close(); err != nil { - zlog.Warn(ctx).Err(err).Msg("unable to close sqlite db") - } - }() - zlog.Debug(ctx).Str("file", f.Name()).Msg("copying sqlite db out of FS") - if _, err := io.Copy(f, r); err != nil { - return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) - } - if err := f.Sync(); err != nil { - return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) - } - sdb, err := sqlite.Open(f.Name()) - if err != nil { - return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err) - } - defer sdb.Close() - nat = sdb - case kindBDB: - f, err := sys.Open(path.Join(db.Path, `Packages`)) - if err != nil { - return nil, fmt.Errorf("rpm: error reading bdb db: %w", err) - } - defer f.Close() - r, done, err := mkAt(ctx, db.Kind, f) - if err != nil { - return nil, fmt.Errorf("rpm: error reading bdb db: %w", err) - } - defer done() - var bpdb bdb.PackageDB - if err := bpdb.Parse(r); err != nil { - return nil, fmt.Errorf("rpm: error parsing bdb db: %w", err) - } - nat = &bpdb - case kindNDB: - f, err := sys.Open(path.Join(db.Path, `Packages.db`)) - if err != nil { - return nil, fmt.Errorf("rpm: error reading ndb db: %w", err) - } - defer f.Close() - r, done, err := mkAt(ctx, db.Kind, f) - if err != nil { - return nil, fmt.Errorf("rpm: error reading ndb db: %w", err) - } - defer done() - var npdb ndb.PackageDB - if err := npdb.Parse(r); err != nil { - return nil, fmt.Errorf("rpm: error parsing ndb db: %w", err) - } - nat = &npdb - default: - panic("programmer error: bad kind: " + db.Kind.String()) - } - if err := nat.Validate(ctx); err != nil { - zlog.Warn(ctx). - Err(err). - Msg("rpm: invalid native DB") - continue - } - ps, err := packagesFromDB(ctx, db.String(), nat) + ps, err := getDBObjects(ctx, sys, db, packagesFromDB) if err != nil { - return nil, fmt.Errorf("rpm: error reading native db: %w", err) + return nil, err } pkgs = append(pkgs, ps...) } return pkgs, nil } - -func findDBs(ctx context.Context, out *[]foundDB, sys fs.FS) fs.WalkDirFunc { - return func(p string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - if d.IsDir() { - return nil - } - - dir, n := path.Split(p) - dir = path.Clean(dir) - switch n { - case `Packages`: - f, err := sys.Open(p) - if err != nil { - return err - } - ok := bdb.CheckMagic(ctx, f) - f.Close() - if !ok { - return nil - } - *out = append(*out, foundDB{ - Path: dir, - Kind: kindBDB, - }) - case `rpmdb.sqlite`: - *out = append(*out, foundDB{ - Path: dir, - Kind: kindSQLite, - }) - case `Packages.db`: - f, err := sys.Open(p) - if err != nil { - return err - } - ok := ndb.CheckMagic(ctx, f) - f.Close() - if !ok { - return nil - } - *out = append(*out, foundDB{ - Path: dir, - Kind: kindNDB, - }) - } - return nil - } -} - -func mkAt(ctx context.Context, k dbKind, f fs.File) (io.ReaderAt, func(), error) { - if r, ok := f.(io.ReaderAt); ok { - return r, func() {}, nil - } - spool, err := os.CreateTemp(os.TempDir(), `Packages.`+k.String()+`.`) - if err != nil { - return nil, nil, fmt.Errorf("rpm: error spooling db: %w", err) - } - ctx = zlog.ContextWithValues(ctx, "file", spool.Name()) - if err := os.Remove(spool.Name()); err != nil { - zlog.Error(ctx).Err(err).Msg("unable to remove spool; file leaked!") - } - zlog.Debug(ctx). - Msg("copying db out of fs.FS") - if _, err := io.Copy(spool, f); err != nil { - if err := spool.Close(); err != nil { - zlog.Warn(ctx).Err(err).Msg("unable to close spool") - } - return nil, nil, fmt.Errorf("rpm: error spooling db: %w", err) - } - return spool, closeSpool(ctx, spool), nil -} - -func closeSpool(ctx context.Context, f *os.File) func() { - return func() { - if err := f.Close(); err != nil { - zlog.Warn(ctx).Err(err).Msg("unable to close spool") - } - } -} - -type dbKind uint - -//go:generate -command stringer go run golang.org/x/tools/cmd/stringer -//go:generate stringer -linecomment -type dbKind - -const ( - _ dbKind = iota - - kindBDB // bdb - kindSQLite // sqlite - kindNDB // ndb -) - -type foundDB struct { - Path string - Kind dbKind -} - -func (f foundDB) String() string { - return f.Kind.String() + ":" + f.Path -}