Skip to content

Commit

Permalink
Set limit of prallel checks when validating images
Browse files Browse the repository at this point in the history
In order to validate a container image, EC gathers quite a bit of data,
e.g.  SLSA Provenance, SBOM, etc. All of this is fed into rego. At some
point it is all loaded into memory at the same time.

When validating multiple images, EC does that for each image.

This can be quite a bit of data depending on what is attached to each
image.

Prior to this change, the `ec validate image` command would happily
validate all the images in parallel. Given that memory is constrained
resource, this is problematic.

This commit adds a limit to the amount of images that are validated in
parallel. It hardcodes it to 5. If there are less than 5 images to
validate, then the other workers are a no-op. If there are more, at most
5 will be processed at a time. Of course, 5 is a magic number. It is a
starting point. Eventually, we need to make this number configurable,
likely via a new CLI parameter and/or automatically adjusted based on
available resources. That is for another day.

This should resolve RHTAPBUGS-1121.

Signed-off-by: Luiz Carvalho <[email protected]>
  • Loading branch information
lcarva committed Jan 23, 2024
1 parent 5010a91 commit 719f6b3
Showing 1 changed file with 31 additions and 14 deletions.
45 changes: 31 additions & 14 deletions cmd/validate/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ import (
"errors"
"fmt"
"sort"
"sync"

hd "github.com/MakeNowJust/heredoc"
"github.com/hashicorp/go-multierror"
app "github.com/redhat-appstudio/application-api/api/v1alpha1"
"github.com/sigstore/cosign/v2/pkg/cosign"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"

"github.com/enterprise-contract/ec-cli/internal/applicationsnapshot"
Expand Down Expand Up @@ -226,14 +226,12 @@ func validateImageCmd(validate imageValidationFunc) *cobra.Command {

appComponents := data.spec.Components

ch := make(chan result, len(appComponents))

var lock sync.WaitGroup
for _, c := range appComponents {
lock.Add(1)
go func(comp app.SnapshotComponent) {
defer lock.Done()

// worker is responsible for processing one component at a time from the jobs channel,
// and for emitting a corresponding result for the component on the results channel.
worker := func(id int, jobs <-chan app.SnapshotComponent, results chan<- result) {
log.Debugf("Starting worker %d", id)
for comp := range jobs {
log.Debugf("Worker %d got a component %q", id, comp.ContainerImage)
ctx := cmd.Context()
out, err := validate(ctx, comp, data.policy, data.info)
res := result{
Expand Down Expand Up @@ -265,18 +263,36 @@ func validateImageCmd(validate imageValidationFunc) *cobra.Command {
}
res.component.Success = err == nil && len(res.component.Violations) == 0

ch <- res
}(c)
results <- res
}
log.Debugf("Done with worker %d", id)
}

lock.Wait()
close(ch)
numComponents := len(appComponents)
// TODO: Eventually, this should either be set as a parameter or adjusted based on the
// available resources. The main constraint seems to be memory.
numWorkers := 5

jobs := make(chan app.SnapshotComponent, numComponents)
results := make(chan result, numComponents)
// Initialize each worker. They will wait patiently until a job is sent to the jobs
// channel, or the jobs channel is closed.
for i := 0; i <= numWorkers; i++ {
go worker(i, jobs, results)
}
// Initialize all the jobs. Each worker will pick a job from the channel when the worker
// is ready to consume a new job.
for _, c := range appComponents {
jobs <- c
}
close(jobs)

var components []applicationsnapshot.Component
var manyData [][]evaluator.Data
var manyPolicyInput [][]byte
var allErrors error = nil
for r := range ch {
for i := 0; i < numComponents; i++ {
r := <-results
if r.err != nil {
e := fmt.Errorf("error validating image %s of component %s: %w", r.component.ContainerImage, r.component.Name, r.err)
allErrors = multierror.Append(allErrors, e)
Expand All @@ -286,6 +302,7 @@ func validateImageCmd(validate imageValidationFunc) *cobra.Command {
manyPolicyInput = append(manyPolicyInput, r.policyInput)
}
}
close(results)
if allErrors != nil {
return allErrors
}
Expand Down

0 comments on commit 719f6b3

Please sign in to comment.