Skip to content

Commit

Permalink
rpm: implement an RPM filescanner to discern RPM filepaths
Browse files Browse the repository at this point in the history
Using the filepaths discovered by the RPM filescanner we can judge
whether or not a language package has been installed via RPM or not.

Signed-off-by: crozzy <[email protected]>
  • Loading branch information
crozzy committed Apr 12, 2024
1 parent e59ef5d commit 21db95f
Show file tree
Hide file tree
Showing 15 changed files with 672 additions and 50 deletions.
1 change: 1 addition & 0 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ type FileKind string

const (
FileKindWhiteout = FileKind("whiteout")
FileKindRPM = FileKind("rpm")
)

// File represents interesting files that are found in the layer.
Expand Down
2 changes: 1 addition & 1 deletion indexer/controller/coalesce.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func MergeSR(source *claircore.IndexReport, merge []*claircore.IndexReport) *cla
}

for k, v := range ir.Files {
source.Files[k] = v
source.Files[k] = append(source.Files[k], v...)
}
}
return source
Expand Down
2 changes: 1 addition & 1 deletion indexer/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func New(options *indexer.Options) *Controller {
Environments: map[string][]*claircore.Environment{},
Distributions: map[string]*claircore.Distribution{},
Repositories: map[string]*claircore.Repository{},
Files: map[string]claircore.File{},
Files: map[string][]claircore.File{},
}

s := &Controller{
Expand Down
2 changes: 1 addition & 1 deletion indexreport.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ type IndexReport struct {
// an error string in the case the index did not succeed
Err string `json:"err"`
// Files doesn't end up in the json report but needs to be available at post-coalesce
Files map[string]File `json:"-"`
Files map[string][]File `json:"-"`
}

// IndexRecords returns a list of IndexRecords derived from the IndexReport
Expand Down
1 change: 1 addition & 0 deletions libindex/libindex.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ func New(ctx context.Context, opts *Options, cl *http.Client) (*Libindex, error)
opts.Ecosystems = append(opts.Ecosystems, whiteout.NewEcosystem(ctx))
opts.Resolvers = []indexer.Resolver{
&whiteout.Resolver{},
&rpm.Resolver{},
}

if cl == nil {
Expand Down
6 changes: 6 additions & 0 deletions linux/coalescer.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ func (c *Coalescer) Coalesce(ctx context.Context, layerArtifacts []*indexer.Laye
for db, pkgs := range tmp {
dbs[db] = pkgs
}
for _, f := range artifacts.Files {
if c.ir.Files == nil {
c.ir.Files = make(map[string][]claircore.File)
}
c.ir.Files[artifacts.Hash.String()] = append(c.ir.Files[artifacts.Hash.String()], f)
}
}

for db, packages := range dbs {
Expand Down
3 changes: 3 additions & 0 deletions rpm/ecosystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ func NewEcosystem(_ context.Context) *indexer.Ecosystem {
RepositoryScanners: func(ctx context.Context) ([]indexer.RepositoryScanner, error) {
return []indexer.RepositoryScanner{}, nil
},
FileScanners: func(ctx context.Context) ([]indexer.FileScanner, error) {
return []indexer.FileScanner{&FileScanner{}}, nil
},
Coalescer: func(ctx context.Context) (indexer.Coalescer, error) {
return linux.NewCoalescer(), nil
},
Expand Down
165 changes: 165 additions & 0 deletions rpm/filescanner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
package rpm

import (
"context"
"fmt"
"io"
"io/fs"
"os"
"path"
"runtime/trace"

"github.com/quay/zlog"

"github.com/quay/claircore"
"github.com/quay/claircore/indexer"
"github.com/quay/claircore/rpm/bdb"
"github.com/quay/claircore/rpm/ndb"
"github.com/quay/claircore/rpm/sqlite"
)

const (
scannerName = "rpm"
scannerVersion = "1"
scannerKind = "file"
)

var (
_ indexer.FileScanner = (*FileScanner)(nil)
_ indexer.VersionedScanner = (*FileScanner)(nil)
)

type FileScanner struct{}

func (*FileScanner) Name() string { return scannerName }

func (*FileScanner) Version() string { return scannerVersion }

func (*FileScanner) Kind() string { return scannerKind }

func (s *FileScanner) Scan(ctx context.Context, layer *claircore.Layer) ([]claircore.File, error) {
if err := ctx.Err(); err != nil {
return nil, err
}
defer trace.StartRegion(ctx, "Scanner.Scan").End()
trace.Log(ctx, "layer", layer.Hash.String())
ctx = zlog.ContextWithValues(ctx,
"component", "rpm/FileScanner.Scan",
"version", s.Version(),
"layer", layer.Hash.String())
zlog.Debug(ctx).Msg("start")
defer zlog.Debug(ctx).Msg("done")

sys, err := layer.FS()
if err != nil {
return nil, fmt.Errorf("rpm: unable to open layer: %w", err)
}

found := make([]foundDB, 0)
if err := fs.WalkDir(sys, ".", findDBs(ctx, &found, sys)); err != nil {
return nil, fmt.Errorf("rpm: error walking fs: %w", err)
}
if len(found) == 0 {
return nil, nil
}

files := []claircore.File{}
done := map[string]struct{}{}

zlog.Debug(ctx).Int("count", len(found)).Msg("found possible databases")
for _, db := range found {
ctx := zlog.ContextWithValues(ctx, "db", db.String())
zlog.Debug(ctx).Msg("examining database")
if _, ok := done[db.Path]; ok {
zlog.Debug(ctx).Msg("already seen, skipping")
continue
}
done[db.Path] = struct{}{}

var nat nativeDB // see native_db.go:/nativeDB
switch db.Kind {
case kindSQLite:
r, err := sys.Open(path.Join(db.Path, `rpmdb.sqlite`))
if err != nil {
return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err)
}
defer func() {
if err := r.Close(); err != nil {
zlog.Warn(ctx).Err(err).Msg("unable to close sqlite db")
}
}()
f, err := os.CreateTemp(os.TempDir(), `rpmdb.sqlite.*`)
if err != nil {
return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err)
}
defer func() {
if err := os.Remove(f.Name()); err != nil {
zlog.Error(ctx).Err(err).Msg("unable to unlink sqlite db")
}
if err := f.Close(); err != nil {
zlog.Warn(ctx).Err(err).Msg("unable to close sqlite db")
}
}()
zlog.Debug(ctx).Str("file", f.Name()).Msg("copying sqlite db out of FS")
if _, err := io.Copy(f, r); err != nil {
return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err)
}
if err := f.Sync(); err != nil {
return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err)
}
sdb, err := sqlite.Open(f.Name())
if err != nil {
return nil, fmt.Errorf("rpm: error reading sqlite db: %w", err)
}
defer sdb.Close()
nat = sdb
case kindBDB:
f, err := sys.Open(path.Join(db.Path, `Packages`))
if err != nil {
return nil, fmt.Errorf("rpm: error reading bdb db: %w", err)
}
defer f.Close()
r, done, err := mkAt(ctx, db.Kind, f)
if err != nil {
return nil, fmt.Errorf("rpm: error reading bdb db: %w", err)
}
defer done()
var bpdb bdb.PackageDB
if err := bpdb.Parse(r); err != nil {
return nil, fmt.Errorf("rpm: error parsing bdb db: %w", err)
}
nat = &bpdb
case kindNDB:
f, err := sys.Open(path.Join(db.Path, `Packages.db`))
if err != nil {
return nil, fmt.Errorf("rpm: error reading ndb db: %w", err)
}
defer f.Close()
r, done, err := mkAt(ctx, db.Kind, f)
if err != nil {
return nil, fmt.Errorf("rpm: error reading ndb db: %w", err)
}
defer done()
var npdb ndb.PackageDB
if err := npdb.Parse(r); err != nil {
return nil, fmt.Errorf("rpm: error parsing ndb db: %w", err)
}
nat = &npdb
default:
panic("programmer error: bad kind: " + db.Kind.String())
}
if err := nat.Validate(ctx); err != nil {
zlog.Warn(ctx).
Err(err).
Msg("rpm: invalid native DB")
continue
}
fs, err := filesFromDB(ctx, db.String(), nat)
if err != nil {
return nil, fmt.Errorf("rpm: error reading native db: %w", err)
}
files = append(files, fs...)
}

return files, nil
}
75 changes: 75 additions & 0 deletions rpm/filescanner_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package rpm

import (
"context"
"os"
"path/filepath"
"testing"

"github.com/quay/zlog"

"github.com/quay/claircore"
"github.com/quay/claircore/test"
)

var testcases = []struct {
name string
filename string
expectedFiles int
}{
{
name: "java layer",
filename: "cdc13a947214994058941dee5dab876369896ec672defa07694cec6dd3fc7ca2",
expectedFiles: 82,
},
{
name: "open jdk layer",
filename: "f68995d3d7382737a1ee41fb69ca9369693173dba4263233621f4defcb29c4bd",
expectedFiles: 218,
},
}

func TestFileScannerLayer(t *testing.T) {
ctx := context.Background()
var s FileScanner
desc := claircore.LayerDescription{
Digest: test.RandomSHA256Digest(t).String(),
URI: "file:///dev/null",
MediaType: test.MediaType,
Headers: make(map[string][]string),
}

for _, tt := range testcases {
t.Run(tt.name, func(t *testing.T) {
ctx := zlog.Test(ctx, t)
f, err := os.Open(filepath.Join(`testdata/layers`, tt.filename))
if err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
if err := f.Close(); err != nil {
t.Error(err)
}
})
var l claircore.Layer
if err := l.Init(ctx, &desc, f); err != nil {
t.Fatal(err)
}
t.Cleanup(func() {
if err := l.Close(); err != nil {
t.Error(err)
}
})

got, err := s.Scan(ctx, &l)
if err != nil {
t.Error(err)
}
t.Logf("found %d files", len(got))
if len(got) != tt.expectedFiles {
t.Fatalf("expected %d files but got %d", tt.expectedFiles, len(got))
}
t.Log(got)
})
}
}
Loading

0 comments on commit 21db95f

Please sign in to comment.