diff --git a/go.mod b/go.mod index 5943a2c9c8..660bf2fc70 100644 --- a/go.mod +++ b/go.mod @@ -34,6 +34,7 @@ require ( github.com/shirou/gopsutil/v3 v3.24.5 github.com/spf13/cobra v1.9.1 github.com/stretchr/testify v1.10.0 + golang.org/x/sys v0.30.0 golang.org/x/tools v0.30.0 gopkg.in/dnaeon/go-vcr.v3 v3.2.0 gopkg.in/yaml.v3 v3.0.1 @@ -153,7 +154,6 @@ require ( golang.org/x/net v0.35.0 // indirect golang.org/x/oauth2 v0.23.0 // indirect golang.org/x/sync v0.11.0 // indirect - golang.org/x/sys v0.30.0 // indirect golang.org/x/term v0.29.0 // indirect golang.org/x/text v0.22.0 // indirect golang.org/x/time v0.7.0 // indirect diff --git a/internal/common/bytesize.go b/internal/common/bytesize.go new file mode 100644 index 0000000000..feb342b22e --- /dev/null +++ b/internal/common/bytesize.go @@ -0,0 +1,168 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package common + +import ( + "encoding/json" + "fmt" + "math" + "regexp" + "strconv" + + "gopkg.in/yaml.v3" +) + +// Common units for sizes in bytes. +const ( + Byte = ByteSize(1) + KiloByte = 1024 * Byte + MegaByte = 1024 * KiloByte + GigaByte = 1024 * MegaByte +) + +const ( + byteString = "B" + kiloByteString = "KB" + megaByteString = "MB" + gigaByteString = "GB" +) + +// ByteSize represents the size of a file. +type ByteSize uint64 + +// Ensure FileSize implements these interfaces. +var ( + _ json.Marshaler = new(ByteSize) + _ json.Unmarshaler = new(ByteSize) + _ yaml.Marshaler = new(ByteSize) + _ yaml.Unmarshaler = new(ByteSize) +) + +func parseFileSizeInt(s string) (uint64, error) { + // os.FileInfo reports size as int64, don't support bigger numbers. + maxBitSize := 63 + return strconv.ParseUint(s, 10, maxBitSize) +} + +// MarshalJSON implements the json.Marshaler interface for FileSize, it returns +// the string representation in a format that can be unmarshaled back to an +// equivalent value. +func (s ByteSize) MarshalJSON() ([]byte, error) { + return json.Marshal(s.String()) +} + +// MarshalYAML implements the yaml.Marshaler interface for FileSize, it returns +// the string representation in a format that can be unmarshaled back to an +// equivalent value. +func (s ByteSize) MarshalYAML() (interface{}, error) { + return s.String(), nil +} + +// UnmarshalJSON implements the json.Unmarshaler interface for FileSize. +func (s *ByteSize) UnmarshalJSON(d []byte) error { + // Support unquoted plain numbers. + bytes, err := parseFileSizeInt(string(d)) + if err == nil { + *s = ByteSize(bytes) + return nil + } + + var text string + err = json.Unmarshal(d, &text) + if err != nil { + return err + } + + return s.unmarshalString(text) +} + +// UnmarshalYAML implements the yaml.Unmarshaler interface for FileSize. +func (s *ByteSize) UnmarshalYAML(value *yaml.Node) error { + // Support unquoted plain numbers. + bytes, err := parseFileSizeInt(value.Value) + if err == nil { + *s = ByteSize(bytes) + return nil + } + + return s.unmarshalString(value.Value) +} + +var bytesPattern = regexp.MustCompile(fmt.Sprintf(`^(\d+(\.\d+)?)(%s|%s|%s|%s|)$`, byteString, kiloByteString, megaByteString, gigaByteString)) + +func (s *ByteSize) unmarshalString(text string) error { + match := bytesPattern.FindStringSubmatch(text) + if len(match) < 3 { + return fmt.Errorf("invalid format for size in bytes (%s)", text) + } + + if match[2] == "" { + q, err := parseFileSizeInt(match[1]) + if err != nil { + return fmt.Errorf("invalid format for size in bytes (%s): %w", text, err) + } + + unit := match[3] + switch unit { + case gigaByteString: + *s = ByteSize(q) * GigaByte + case megaByteString: + *s = ByteSize(q) * MegaByte + case kiloByteString: + *s = ByteSize(q) * KiloByte + case byteString, "": + *s = ByteSize(q) * Byte + default: + return fmt.Errorf("invalid unit for filesize (%s): %s", text, unit) + } + } else { + q, err := strconv.ParseFloat(match[1], 64) + if err != nil { + return fmt.Errorf("invalid format for size in bytes (%s): %w", text, err) + } + + unit := match[3] + switch unit { + case gigaByteString: + *s = approxFloat(q, GigaByte) + case megaByteString: + *s = approxFloat(q, MegaByte) + case kiloByteString: + *s = approxFloat(q, KiloByte) + case byteString, "": + *s = approxFloat(q, Byte) + default: + return fmt.Errorf("invalid unit for filesize (%s): %s", text, unit) + } + } + + return nil +} + +func approxFloat(n float64, unit ByteSize) ByteSize { + approx := n * float64(unit) + return ByteSize(math.Round(approx)) +} + +// String returns the string representation of the FileSize. +func (s ByteSize) String() string { + format := func(q ByteSize, unit string) string { + return fmt.Sprintf("%d%s", q, unit) + } + + if s >= GigaByte && (s%GigaByte == 0) { + return format(s/GigaByte, gigaByteString) + } + + if s >= MegaByte && (s%MegaByte == 0) { + return format(s/MegaByte, megaByteString) + } + + if s >= KiloByte && (s%KiloByte == 0) { + return format(s/KiloByte, kiloByteString) + } + + return format(s, byteString) +} diff --git a/internal/common/bytesize_test.go b/internal/common/bytesize_test.go new file mode 100644 index 0000000000..50b0a3c6e8 --- /dev/null +++ b/internal/common/bytesize_test.go @@ -0,0 +1,103 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package common + +import ( + "encoding/json" + "testing" + + "gopkg.in/yaml.v3" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFileSizeMarshallJSON(t *testing.T) { + cases := []struct { + fileSize ByteSize + expected string + }{ + {ByteSize(0), `"0B"`}, + {ByteSize(1024), `"1KB"`}, + {ByteSize(1025), `"1025B"`}, + {5 * MegaByte, `"5MB"`}, + {5 * GigaByte, `"5GB"`}, + } + + for _, c := range cases { + t.Run(c.expected, func(t *testing.T) { + d, err := json.Marshal(c.fileSize) + require.NoError(t, err) + assert.Equal(t, c.expected, string(d)) + }) + } +} + +func TestFileSizeMarshallYAML(t *testing.T) { + cases := []struct { + fileSize ByteSize + expected string + }{ + {ByteSize(0), "0B\n"}, + {ByteSize(1024), "1KB\n"}, + {ByteSize(1025), "1025B\n"}, + {5 * MegaByte, "5MB\n"}, + {5 * GigaByte, "5GB\n"}, + } + + for _, c := range cases { + t.Run(c.expected, func(t *testing.T) { + d, err := yaml.Marshal(c.fileSize) + require.NoError(t, err) + assert.Equal(t, c.expected, string(d)) + }) + } +} + +func TestFileSizeUnmarshal(t *testing.T) { + t.Run("json", func(t *testing.T) { + testFileSizeUnmarshalFormat(t, json.Unmarshal) + }) + t.Run("yaml", func(t *testing.T) { + testFileSizeUnmarshalFormat(t, yaml.Unmarshal) + }) +} + +func testFileSizeUnmarshalFormat(t *testing.T, unmarshaler func([]byte, interface{}) error) { + cases := []struct { + json string + expected ByteSize + valid bool + }{ + {"0", 0, true}, + {"1024", 1024 * Byte, true}, + {`"1024"`, 1024 * Byte, true}, + {`"1024B"`, 1024 * Byte, true}, + {`"10MB"`, 10 * MegaByte, true}, + {`"40GB"`, 40 * GigaByte, true}, + {`"56.21GB"`, approxFloat(56.21, GigaByte), true}, + {`"2KB"`, 2 * KiloByte, true}, + {`"KB"`, 0, false}, + {`"1s"`, 0, false}, + {`""`, 0, false}, + {`"B"`, 0, false}, + {`"-200MB"`, 0, false}, + {`"-1"`, 0, false}, + {`"10000000000000000000MB"`, 0, false}, + } + + for _, c := range cases { + t.Run(c.json, func(t *testing.T) { + var found ByteSize + err := unmarshaler([]byte(c.json), &found) + if c.valid { + require.NoError(t, err) + assert.Equal(t, c.expected, found) + } else { + require.Error(t, err) + } + }) + } +} diff --git a/internal/compose/compose.go b/internal/compose/compose.go index 80f9a4291c..d1dce1d9d7 100644 --- a/internal/compose/compose.go +++ b/internal/compose/compose.go @@ -14,6 +14,7 @@ import ( "os" "os/exec" "regexp" + "slices" "strconv" "strings" "time" @@ -60,7 +61,23 @@ type Config struct { Services map[string]service } +// Images lists the images found in the configuration. +func (c *Config) Images() []string { + var images []string + for _, service := range c.Services { + if service.Image == "" { + continue + } + if slices.Contains(images, service.Image) { + continue + } + images = append(images, service.Image) + } + return images +} + type service struct { + Image string Ports []portMapping Environment map[string]string } diff --git a/internal/docker/imagesgc.go b/internal/docker/imagesgc.go new file mode 100644 index 0000000000..677bdbd4b5 --- /dev/null +++ b/internal/docker/imagesgc.go @@ -0,0 +1,289 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package docker + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "regexp" + "slices" + "strings" + "time" + + "golang.org/x/sys/execabs" + + "github.com/elastic/elastic-package/internal/common" +) + +type ImagesGCConfig struct { + // Enabled controls if the garbage collector performs any deletion. + // When set to false, it does not remove anything, but it still keeps track of images. + Enabled bool `yaml:"enabled"` + + // MaxTotalSize removes images only after the total size of images is beyond this number, zero to disable. + MaxTotalSize common.ByteSize `yaml:"max_total_size"` + + // MaxUnused removes only images that haven't been used for the specified time. + MaxUnused time.Duration `yaml:"max_unused"` +} + +func DefaultImagesGCConfig() ImagesGCConfig { + return ImagesGCConfig{ + Enabled: false, + MaxTotalSize: 0, + MaxUnused: 4 * 7 * 24 * time.Hour, // 4 weeks + } +} + +type ImagesGC struct { + // path contains the path to the GC cache when read from disk. + path string + + // images contains the entries of the tracked docker images. + images []gcEntry + + // clock returns the current time. + clock func() time.Time + + // client implements a docker client. + client imagesGCClient + + ImagesGCConfig +} + +type imagesGCClient interface { + // ListImages should list local images in the same format as "docker-compose images". + ListImages() ([]string, error) + + // RemoveImage should try to remove an image. If the image is busy, it returns ErrBusyImage. + RemoveImage(image string) error + + // TotalImagesSize returns the total size of the local images. + TotalImagesSize() (common.ByteSize, error) +} + +var ErrBusyImage = errors.New("image is being used") + +func defaultImagesGC() ImagesGC { + return ImagesGC{ + clock: time.Now, + client: defaultImagesGCClient(), + ImagesGCConfig: DefaultImagesGCConfig(), + } +} + +type gcEntry struct { + ImageTag string `json:"image_tag"` + LastUsed time.Time `json:"last_used"` +} + +func NewImagesGCFromCacheDir(cacheDir string) (*ImagesGC, error) { + return NewImagesGC(filepath.Join(cacheDir, "docker-images-gc.json")) +} + +func NewImagesGC(path string) (*ImagesGC, error) { + d, err := os.Open(path) + if errors.Is(err, fs.ErrNotExist) { + gc := defaultImagesGC() + gc.path = path + return &gc, nil + } + if err != nil { + return nil, err + } + defer d.Close() + dec := json.NewDecoder(d) + var entries []gcEntry + if err := dec.Decode(&entries); err != nil { + return nil, err + } + + gc := defaultImagesGC() + gc.path = path + gc.images = entries + return &gc, nil +} + +// Persist saves the list of images to disk. +func (gc *ImagesGC) Persist() error { + if gc.path == "" { + return errors.New("GC list was not created with a path") + } + if len(gc.images) == 0 { + return nil + } + + err := os.MkdirAll(filepath.Dir(gc.path), 0755) + if err != nil && !errors.Is(err, os.ErrExist) { + return err + } + + d, err := json.Marshal(gc.images) + if err != nil { + return fmt.Errorf("failed to encode list of images: %w", err) + } + return os.WriteFile(gc.path, d, 0644) +} + +// Track images before they are downloaded. Images already present are ignored if they are not already tracked. +func (gc *ImagesGC) Track(images ...string) error { + present, err := gc.client.ListImages() + if err != nil { + return fmt.Errorf("failed to list local Docker images: %w", err) + } + + now := gc.clock() + for _, image := range images { + currentIndex := slices.IndexFunc(gc.images, func(i gcEntry) bool { return i.ImageTag == image }) + if currentIndex >= 0 { + // Already tracked, update last used time. + gc.images[currentIndex].LastUsed = now + continue + } + + if slices.Contains(present, image) { + // Don't track images already present. + continue + } + + gc.images = append(gc.images, gcEntry{ + ImageTag: image, + LastUsed: now, + }) + } + + return nil +} + +// Run runs garbage collection, it removes images according to the rules. +func (gc *ImagesGC) Run() error { + if !gc.Enabled { + return nil + } + + present, err := gc.client.ListImages() + if err != nil { + return fmt.Errorf("failed to list local Docker images: %w", err) + } + + sizeOk := gc.MaxTotalSize == 0 + maxUnused := gc.clock().Add(-gc.MaxUnused) + var images []gcEntry + slices.SortFunc(gc.images, func(a, b gcEntry) int { return a.LastUsed.Compare(b.LastUsed) }) + for i, image := range gc.images { + if !sizeOk { + totalSize, err := gc.client.TotalImagesSize() + if err != nil { + gc.images = append(images, gc.images[i:]...) + return fmt.Errorf("cannot get total images size: %w", err) + } + sizeOk = totalSize <= gc.MaxTotalSize + } + if !sizeOk || image.LastUsed.Before(maxUnused) { + if slices.Contains(present, image.ImageTag) { + err := gc.client.RemoveImage(image.ImageTag) + if errors.Is(err, ErrBusyImage) { + continue + } + if err != nil { + gc.images = append(images, gc.images[i:]...) + return fmt.Errorf("cannot remove image %s: %w", image.ImageTag, err) + } + continue + } + } + + images = append(images, image) + } + + gc.images = images + return nil +} + +type localImagesGCClient struct { +} + +func defaultImagesGCClient() localImagesGCClient { + return localImagesGCClient{} +} + +func (localImagesGCClient) ListImages() ([]string, error) { + cmd := execabs.Command("docker", "image", "list", "--format=json") + errOutput := new(bytes.Buffer) + cmd.Stderr = errOutput + + output, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("docker image list failed (stderr=%q): %w", errOutput, err) + } + + var line struct { + Repository string `json:"Repository"` + Tag string `json:"Tag"` + } + var result []string + dec := json.NewDecoder(bytes.NewReader(output)) + for dec.More() { + err = dec.Decode(&line) + if err != nil { + return nil, fmt.Errorf("cannot decode output of docker image list: %w", err) + } + result = append(result, line.Repository+":"+line.Tag) + } + + return result, nil +} + +var removeConflictRegexp = regexp.MustCompile("container [^/s]+ is using its referenced image [^/s]+") + +func (localImagesGCClient) RemoveImage(image string) error { + cmd := execabs.Command("docker", "image", "rm", image) + errOutput := new(bytes.Buffer) + cmd.Stderr = errOutput + + err := cmd.Run() + if err != nil { + errMessage := errOutput.String() + if removeConflictRegexp.MatchString(errMessage) { + return ErrBusyImage + } + return fmt.Errorf("%w: %s", err, strings.TrimPrefix(errMessage, "Error response from daemon: ")) + } + + return nil +} + +func (localImagesGCClient) TotalImagesSize() (common.ByteSize, error) { + cmd := execabs.Command("docker", "system", "df", "--format=json") + errOutput := new(bytes.Buffer) + cmd.Stderr = errOutput + + output, err := cmd.Output() + if err != nil { + return 0, fmt.Errorf("docker system df failed (stderr=%q): %w", errOutput, err) + } + + var df struct { + Type string `json:"Type"` + Size common.ByteSize `json:"Size"` + } + dec := json.NewDecoder(bytes.NewReader(output)) + for dec.More() { + err = dec.Decode(&df) + if err != nil { + return 0, fmt.Errorf("cannot decode output of docker system df: %w", err) + } + if df.Type == "Images" { + return df.Size, nil + } + } + + return 0, fmt.Errorf("total images size not found") +} diff --git a/internal/install/application_configuration.go b/internal/install/application_configuration.go index 075761ab9f..0326241f02 100644 --- a/internal/install/application_configuration.go +++ b/internal/install/application_configuration.go @@ -16,6 +16,7 @@ import ( "gopkg.in/yaml.v3" "github.com/elastic/elastic-package/internal/configuration/locations" + "github.com/elastic/elastic-package/internal/docker" "github.com/elastic/elastic-package/internal/environment" "github.com/elastic/elastic-package/internal/logger" "github.com/elastic/elastic-package/internal/profile" @@ -86,7 +87,8 @@ type configFile struct { } type stack struct { - ImageRefOverrides map[string]ImageRefs `yaml:"image_ref_overrides"` + ImageRefOverrides map[string]ImageRefs `yaml:"image_ref_overrides"` + GC docker.ImagesGCConfig `yaml:"gc"` } func checkImageRefOverride(envVar, fallback string) string { @@ -132,6 +134,10 @@ func (ac *ApplicationConfiguration) StackImageRefs() ImageRefs { return refs } +func (ac *ApplicationConfiguration) DockerGCConfig() docker.ImagesGCConfig { + return ac.c.Stack.GC +} + // CurrentProfile returns the current profile, or the default one if not set. func (ac *ApplicationConfiguration) CurrentProfile() string { fromEnv := os.Getenv(ProfileNameEnvVar) @@ -246,6 +252,7 @@ func Configuration(options ...ConfigurationOption) (*ApplicationConfiguration, e } var c configFile + c.Stack.GC = docker.DefaultImagesGCConfig() err = yaml.Unmarshal(cfg, &c) if err != nil { return nil, fmt.Errorf("can't unmarshal configuration file: %w", err) diff --git a/internal/stack/compose.go b/internal/stack/compose.go index 483e638e80..d552e02bcd 100644 --- a/internal/stack/compose.go +++ b/internal/stack/compose.go @@ -10,6 +10,7 @@ import ( "strings" "github.com/elastic/elastic-package/internal/compose" + "github.com/elastic/elastic-package/internal/configuration/locations" "github.com/elastic/elastic-package/internal/docker" "github.com/elastic/elastic-package/internal/install" ) @@ -50,6 +51,34 @@ func (eb *envBuilder) build() []string { return eb.vars } +func runDockerImagesGC(ctx context.Context, project *compose.Project, opts compose.CommandOptions, appConfig *install.ApplicationConfiguration) error { + loc, err := locations.NewLocationManager() + if err != nil { + return err + } + opts = compose.CommandOptions{ + Env: opts.Env, + } + config, err := project.Config(ctx, opts) + if err != nil { + return fmt.Errorf("failed to get project config: %w", err) + } + gc, err := docker.NewImagesGCFromCacheDir(loc.CacheDir("")) + if err != nil { + return fmt.Errorf("failed to open GC data file: %w", err) + } + gc.ImagesGCConfig = appConfig.DockerGCConfig() + err = gc.Track(config.Images()...) + if err != nil { + return fmt.Errorf("failed to track docker images for GC: %w", err) + } + err = gc.Run() + if err != nil { + return fmt.Errorf("failed to run GC: %w", err) + } + return gc.Persist() +} + func dockerComposeBuild(ctx context.Context, options Options) error { c, err := compose.NewProject(DockerComposeProjectName(options.Profile), options.Profile.Path(ProfileStackPath, ComposeFile)) if err != nil { @@ -69,6 +98,10 @@ func dockerComposeBuild(ctx context.Context, options Options) error { build(), Services: withIsReadyServices(withDependentServices(options.Services)), } + err = runDockerImagesGC(ctx, c, opts, appConfig) + if err != nil { + return fmt.Errorf("could not run docker images GC: %w", err) + } if err := c.Build(ctx, opts); err != nil { return fmt.Errorf("running command failed: %w", err) @@ -95,6 +128,10 @@ func dockerComposePull(ctx context.Context, options Options) error { build(), Services: withIsReadyServices(withDependentServices(options.Services)), } + err = runDockerImagesGC(ctx, c, opts, appConfig) + if err != nil { + return fmt.Errorf("could not run docker images GC: %w", err) + } if err := c.Pull(ctx, opts); err != nil { return fmt.Errorf("running command failed: %w", err) @@ -127,6 +164,10 @@ func dockerComposeUp(ctx context.Context, options Options) error { ExtraArgs: args, Services: withIsReadyServices(withDependentServices(options.Services)), } + err = runDockerImagesGC(ctx, c, opts, appConfig) + if err != nil { + return fmt.Errorf("could not run docker images GC: %w", err) + } if err := c.Up(ctx, opts); err != nil { return fmt.Errorf("running command failed: %w", err)