From 9e24595eefcefaf6ced0989f604db8894c795306 Mon Sep 17 00:00:00 2001 From: "J. Victor Martins" Date: Fri, 9 Sep 2022 16:34:19 -0700 Subject: [PATCH] refactor: Move detection to a a package (#926) --- localdev/main.go | 3 +- pkg/analyzer/detection/detection.go | 139 +++++++++++++++++++++++++ pkg/analyzer/detection/errors.go | 13 +++ pkg/analyzer/nodes/node.go | 4 +- worker.go | 155 ++-------------------------- 5 files changed, 164 insertions(+), 150 deletions(-) create mode 100644 pkg/analyzer/detection/detection.go create mode 100644 pkg/analyzer/detection/errors.go diff --git a/localdev/main.go b/localdev/main.go index c8db2d01f..af50b7d70 100644 --- a/localdev/main.go +++ b/localdev/main.go @@ -19,6 +19,7 @@ import ( "github.com/stackrox/scanner/database" "github.com/stackrox/scanner/ext/imagefmt" "github.com/stackrox/scanner/pkg/analyzer" + "github.com/stackrox/scanner/pkg/analyzer/detection" "github.com/stackrox/scanner/pkg/component" "github.com/stackrox/scanner/pkg/tarutil" "github.com/stackrox/scanner/singletons/requiredfilenames" @@ -109,7 +110,7 @@ func analyzeLocalImage(path string) { if err != nil { panic(err) } - namespace = clair.DetectNamespace(l, *files, nil, false) + namespace = detection.DetectNamespace(l, *files, nil, false) if namespace != nil { break } diff --git a/pkg/analyzer/detection/detection.go b/pkg/analyzer/detection/detection.go new file mode 100644 index 000000000..6e048b06e --- /dev/null +++ b/pkg/analyzer/detection/detection.go @@ -0,0 +1,139 @@ +package detection + +import ( + "github.com/sirupsen/logrus" + "github.com/stackrox/scanner/database" + "github.com/stackrox/scanner/ext/featurefmt" + "github.com/stackrox/scanner/ext/featurens" + "github.com/stackrox/scanner/pkg/analyzer" + "github.com/stackrox/scanner/pkg/component" + features2 "github.com/stackrox/scanner/pkg/features" + "github.com/stackrox/scanner/pkg/rhelv2/rpm" + "github.com/stackrox/scanner/pkg/wellknownnamespaces" +) + +// LogLayerName is the name of the log field holding the detection target. +const LogLayerName = "layer" + +// DetectComponents detects the namespace and extracts the components present in +// the files of a filesystem or image layer. For layers, the parent layer should +// be specified. For filesystems, which don't have the concept of intermediate +// layers, or the root layer, use `nil`. Notice that language components are not +// extracted by DetectComponents, but if provided they are annotated with +// certified RHEL dependencies, and returned. +func DetectComponents(name string, files analyzer.Files, parent *database.Layer, languageComponents []*component.Component, uncertifiedRHEL bool) (*database.Namespace, []database.FeatureVersion, *database.RHELv2Components, []*component.Component, error) { + namespace := DetectNamespace(name, files, parent, uncertifiedRHEL) + + var featureVersions []database.FeatureVersion + var rhelfeatures *database.RHELv2Components + + if namespace != nil && wellknownnamespaces.IsRHELNamespace(namespace.Name) { + // This is a RHEL-based image that must be scanned in a certified manner. + // Use the RHELv2 scanner instead. + packages, cpes, err := rpm.ListFeatures(files) + if err != nil { + return nil, nil, nil, nil, err + } + rhelfeatures = &database.RHELv2Components{ + Dist: namespace.Name, + Packages: packages, + CPEs: cpes, + } + logrus.WithFields(logrus.Fields{LogLayerName: name, "rhel package count": len(packages), "rhel cpe count": len(cpes)}).Debug("detected rhelv2 features") + if err := rpm.AnnotateComponentsWithPackageManagerInfo(files, languageComponents); err != nil { + logrus.WithError(err).Errorf("Failed to analyze package manager info for language components: %s", name) + } + } else { + var err error + // Detect features. + featureVersions, err = detectFeatureVersions(name, files, namespace, parent) + if err != nil { + return nil, nil, nil, nil, err + } + if len(featureVersions) > 0 { + logrus.WithFields(logrus.Fields{LogLayerName: name, "feature count": len(featureVersions)}).Debug("detected features") + } + } + return namespace, featureVersions, rhelfeatures, languageComponents, nil +} + +// DetectNamespace detects the layer's namespace. +func DetectNamespace(name string, files analyzer.Files, parent *database.Layer, uncertifiedRHEL bool) *database.Namespace { + namespace := featurens.Detect(files, &featurens.DetectorOptions{ + UncertifiedRHEL: uncertifiedRHEL, + }) + if namespace != nil { + logrus.WithFields(logrus.Fields{LogLayerName: name, "detected namespace": namespace.Name}).Debug("detected namespace") + return namespace + } + + // Fallback to the parent's namespace. + if parent != nil { + namespace = parent.Namespace + if namespace != nil { + logrus.WithFields(logrus.Fields{LogLayerName: name, "detected namespace": namespace.Name}).Debug("detected namespace (from parent)") + return namespace + } + } + + return nil +} + +func detectFeatureVersions(name string, files analyzer.Files, namespace *database.Namespace, parent *database.Layer) (features []database.FeatureVersion, err error) { + // TODO(Quentin-M): We need to pass the parent image to DetectFeatures because it's possible that + // some detectors would need it in order to produce the entire feature list (if they can only + // detect a diff). Also, we should probably pass the detected namespace so detectors could + // make their own decision. + features, err = featurefmt.ListFeatures(files) + if err != nil { + return + } + + // If there are no FeatureVersions, use parent's FeatureVersions if possible. + // TODO(Quentin-M): We eventually want to give the choice to each detectors to use none/some of + // their parent's FeatureVersions. It would be useful for detectors that can't find their entire + // result using one Layer. + if len(features) == 0 && parent != nil { + features = parent.Features + return + } + + // Build a map of the namespaces for each FeatureVersion in our parent layer. + parentFeatureNamespaces := make(map[string]database.Namespace) + if parent != nil { + for _, parentFeature := range parent.Features { + parentFeatureNamespaces[parentFeature.Feature.Name+":"+parentFeature.Version] = parentFeature.Feature.Namespace + } + } + + // Ensure that each FeatureVersion has an associated Namespace. + for i, feature := range features { + if feature.Feature.Namespace.Name != "" { + // There is a Namespace associated. + continue + } + + if parentFeatureNamespace, ok := parentFeatureNamespaces[feature.Feature.Name+":"+feature.Version]; ok { + // The FeatureVersion is present in the parent layer; associate with their Namespace. + features[i].Feature.Namespace = parentFeatureNamespace + continue + } + + if namespace != nil { + // The Namespace has been detected in this layer; associate it. + features[i].Feature.Namespace = *namespace + continue + } + + logrus.WithFields(logrus.Fields{"feature name": feature.Feature.Name, "feature version": feature.Version, LogLayerName: name}).Warning("Namespace unknown") + if features2.ContinueUnknownOS.Enabled() { + features = nil + return + } + + err = ErrUnsupported + return + } + + return +} diff --git a/pkg/analyzer/detection/errors.go b/pkg/analyzer/detection/errors.go new file mode 100644 index 000000000..16477dd0f --- /dev/null +++ b/pkg/analyzer/detection/errors.go @@ -0,0 +1,13 @@ +package detection + +import "github.com/stackrox/scanner/pkg/commonerr" + +var ( + // ErrUnsupported is the error that should be raised when an OS or package + // manager is not supported. + ErrUnsupported = commonerr.NewBadRequestError("worker: OS and/or package manager are not supported") + + // ErrParentUnknown is the error that should be raised when a parent layer + // has yet to be processed for the current layer. + ErrParentUnknown = commonerr.NewBadRequestError("worker: parent layer is unknown, it must be processed first") +) diff --git a/pkg/analyzer/nodes/node.go b/pkg/analyzer/nodes/node.go index 3e41588e1..58c1726d6 100644 --- a/pkg/analyzer/nodes/node.go +++ b/pkg/analyzer/nodes/node.go @@ -8,9 +8,9 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" - clair "github.com/stackrox/scanner" "github.com/stackrox/scanner/database" "github.com/stackrox/scanner/pkg/analyzer" + "github.com/stackrox/scanner/pkg/analyzer/detection" "github.com/stackrox/scanner/pkg/component" "github.com/stackrox/scanner/pkg/matcher" "github.com/stackrox/scanner/pkg/metrics" @@ -60,7 +60,7 @@ func Analyze(nodeName, rootFSdir string, uncertifiedRHEL bool) (*Components, err } c := &Components{} c.OSNamespace, c.OSComponents, c.CertifiedRHELComponents, _, err = - clair.DetectFromFiles(files, nodeName, nil, nil, uncertifiedRHEL) + detection.DetectComponents(nodeName, files, nil, nil, uncertifiedRHEL) if err != nil { return nil, nil } diff --git a/worker.go b/worker.go index a89adcbbb..765b438c0 100644 --- a/worker.go +++ b/worker.go @@ -21,18 +21,14 @@ import ( log "github.com/sirupsen/logrus" "github.com/stackrox/rox/pkg/utils" "github.com/stackrox/scanner/database" - "github.com/stackrox/scanner/ext/featurefmt" - "github.com/stackrox/scanner/ext/featurens" "github.com/stackrox/scanner/ext/imagefmt" "github.com/stackrox/scanner/pkg/analyzer" + "github.com/stackrox/scanner/pkg/analyzer/detection" "github.com/stackrox/scanner/pkg/commonerr" "github.com/stackrox/scanner/pkg/component" "github.com/stackrox/scanner/pkg/env" - featureFlags "github.com/stackrox/scanner/pkg/features" "github.com/stackrox/scanner/pkg/matcher" - rhelv2 "github.com/stackrox/scanner/pkg/rhelv2/rpm" "github.com/stackrox/scanner/pkg/tarutil" - namespaces "github.com/stackrox/scanner/pkg/wellknownnamespaces" "github.com/stackrox/scanner/singletons/analyzers" "github.com/stackrox/scanner/singletons/requiredfilenames" ) @@ -40,18 +36,7 @@ import ( const ( // Version (integer) represents the worker version. // Increased each time the engine changes. - Version = 3 - logLayerName = "layer" -) - -var ( - // ErrUnsupported is the error that should be raised when an OS or package - // manager is not supported. - ErrUnsupported = commonerr.NewBadRequestError("worker: OS and/or package manager are not supported") - - // ErrParentUnknown is the error that should be raised when a parent layer - // has yet to be processed for the current layer. - ErrParentUnknown = commonerr.NewBadRequestError("worker: parent layer is unknown, it must be processed first") + Version = 3 ) func preProcessLayer(datastore database.Datastore, imageFormat, name, lineage, parentName, parentLineage string, uncertifiedRHEL bool) (database.Layer, bool, error) { @@ -88,8 +73,8 @@ func preProcessLayer(datastore database.Datastore, imageFormat, name, lineage, p return layer, false, err } if err == commonerr.ErrNotFound { - log.WithFields(log.Fields{logLayerName: name, "parent layer": parentName}).Warning("the parent layer is unknown. it must be processed first") - return layer, false, ErrParentUnknown + log.WithFields(log.Fields{detection.LogLayerName: name, "parent layer": parentName}).Warning("the parent layer is unknown. it must be processed first") + return layer, false, detection.ErrParentUnknown } layer.Parent = &parent } @@ -97,10 +82,10 @@ func preProcessLayer(datastore database.Datastore, imageFormat, name, lineage, p } // The layer is already in the database, check if we need to update it. if layer.EngineVersion >= Version { - log.WithFields(log.Fields{logLayerName: name, "past engine version": layer.EngineVersion, "current engine version": Version}).Debug("layer content has already been processed in the past with older engine. skipping analysis") + log.WithFields(log.Fields{detection.LogLayerName: name, "past engine version": layer.EngineVersion, "current engine version": Version}).Debug("layer content has already been processed in the past with older engine. skipping analysis") return layer, true, nil } - log.WithFields(log.Fields{logLayerName: name, "past engine version": layer.EngineVersion, "current engine version": Version}).Debug("layer content has already been processed in the past with older engine. analyzing again") + log.WithFields(log.Fields{detection.LogLayerName: name, "past engine version": layer.EngineVersion, "current engine version": Version}).Debug("layer content has already been processed in the past with older engine. analyzing again") return layer, false, nil } @@ -166,49 +151,6 @@ func ProcessLayerFromReader(datastore database.Datastore, imageFormat, name, lin return files, datastore.InsertLayerComponents(layer.Name, lineage, languageComponents, files.GetRemovedFiles(), opts) } -// DetectFromFiles detects the namespace and extracts the components present in -// the files of a filesystem or image layer. For layers, the parent layer should -// be specified. For filesystems, which don't have the concept of intermediate -// layers, or the root layer, use `nil`. Notice that language components are not -// extracted by DetectFromFiles, but if provided they are annotated with -// certified RHEL dependencies, and returned. -func DetectFromFiles(files analyzer.Files, name string, parent *database.Layer, languageComponents []*component.Component, uncertifiedRHEL bool) (*database.Namespace, - []database.FeatureVersion, *database.RHELv2Components, []*component.Component, error) { - namespace := DetectNamespace(name, files, parent, uncertifiedRHEL) - - var featureVersions []database.FeatureVersion - var rhelfeatures *database.RHELv2Components - - if namespace != nil && namespaces.IsRHELNamespace(namespace.Name) { - // This is a RHEL-based image that must be scanned in a certified manner. - // Use the RHELv2 scanner instead. - packages, cpes, err := rhelv2.ListFeatures(files) - if err != nil { - return nil, nil, nil, nil, err - } - rhelfeatures = &database.RHELv2Components{ - Dist: namespace.Name, - Packages: packages, - CPEs: cpes, - } - log.WithFields(log.Fields{logLayerName: name, "rhel package count": len(packages), "rhel cpe count": len(cpes)}).Debug("detected rhelv2 features") - if err := rhelv2.AnnotateComponentsWithPackageManagerInfo(files, languageComponents); err != nil { - log.WithError(err).Errorf("Failed to analyze package manager info for language components: %s", name) - } - } else { - var err error - // Detect features. - featureVersions, err = detectFeatureVersions(name, files, namespace, parent) - if err != nil { - return nil, nil, nil, nil, err - } - if len(featureVersions) > 0 { - log.WithFields(log.Fields{logLayerName: name, "feature count": len(featureVersions)}).Debug("detected features") - } - } - return namespace, featureVersions, rhelfeatures, languageComponents, nil -} - // analyzingMatcher is a Matcher implementation that calls ProcessFile on each analyzer, // stores the resulting components, and then delegates to another matcher. type analyzingMatcher struct { @@ -251,10 +193,10 @@ func DetectContentFromReader(reader io.ReadCloser, format, name string, parent * files.MergeBaseAndResolveSymlinks(base) if len(m.components) > 0 { - log.WithFields(log.Fields{logLayerName: name, "component count": len(m.components)}).Debug("detected components") + log.WithFields(log.Fields{detection.LogLayerName: name, "component count": len(m.components)}).Debug("detected components") } - namespace, features, rhelv2Components, languageComponents, err := DetectFromFiles(*files, name, parent, m.components, uncertifiedRHEL) + namespace, features, rhelv2Components, languageComponents, err := detection.DetectComponents(name, *files, parent, m.components, uncertifiedRHEL) distroless := isDistroless(*files) || (parent != nil && parent.Distroless) return namespace, distroless, features, rhelv2Components, languageComponents, files, err @@ -264,84 +206,3 @@ func isDistroless(filesMap tarutil.LayerFiles) bool { _, ok := filesMap.Get("var/lib/dpkg/status.d/") return ok } - -// DetectNamespace detects the layer's namespace. -func DetectNamespace(name string, files analyzer.Files, parent *database.Layer, uncertifiedRHEL bool) *database.Namespace { - namespace := featurens.Detect(files, &featurens.DetectorOptions{ - UncertifiedRHEL: uncertifiedRHEL, - }) - if namespace != nil { - log.WithFields(log.Fields{logLayerName: name, "detected namespace": namespace.Name}).Debug("detected namespace") - return namespace - } - - // Fallback to the parent's namespace. - if parent != nil { - namespace = parent.Namespace - if namespace != nil { - log.WithFields(log.Fields{logLayerName: name, "detected namespace": namespace.Name}).Debug("detected namespace (from parent)") - return namespace - } - } - - return nil -} - -func detectFeatureVersions(name string, files analyzer.Files, namespace *database.Namespace, parent *database.Layer) (features []database.FeatureVersion, err error) { - // TODO(Quentin-M): We need to pass the parent image to DetectFeatures because it's possible that - // some detectors would need it in order to produce the entire feature list (if they can only - // detect a diff). Also, we should probably pass the detected namespace so detectors could - // make their own decision. - features, err = featurefmt.ListFeatures(files) - if err != nil { - return - } - - // If there are no FeatureVersions, use parent's FeatureVersions if possible. - // TODO(Quentin-M): We eventually want to give the choice to each detectors to use none/some of - // their parent's FeatureVersions. It would be useful for detectors that can't find their entire - // result using one Layer. - if len(features) == 0 && parent != nil { - features = parent.Features - return - } - - // Build a map of the namespaces for each FeatureVersion in our parent layer. - parentFeatureNamespaces := make(map[string]database.Namespace) - if parent != nil { - for _, parentFeature := range parent.Features { - parentFeatureNamespaces[parentFeature.Feature.Name+":"+parentFeature.Version] = parentFeature.Feature.Namespace - } - } - - // Ensure that each FeatureVersion has an associated Namespace. - for i, feature := range features { - if feature.Feature.Namespace.Name != "" { - // There is a Namespace associated. - continue - } - - if parentFeatureNamespace, ok := parentFeatureNamespaces[feature.Feature.Name+":"+feature.Version]; ok { - // The FeatureVersion is present in the parent layer; associate with their Namespace. - features[i].Feature.Namespace = parentFeatureNamespace - continue - } - - if namespace != nil { - // The Namespace has been detected in this layer; associate it. - features[i].Feature.Namespace = *namespace - continue - } - - log.WithFields(log.Fields{"feature name": feature.Feature.Name, "feature version": feature.Version, logLayerName: name}).Warning("Namespace unknown") - if featureFlags.ContinueUnknownOS.Enabled() { - features = nil - return - } - - err = ErrUnsupported - return - } - - return -}