From 43bba3603c442b9c464509a9df272b9a17394dc1 Mon Sep 17 00:00:00 2001 From: Joao Marcal Date: Fri, 24 Nov 2023 16:07:28 +0000 Subject: [PATCH 01/20] Groundwork for azure implementation --- pkg/bloomcompactor/bloomcompactor.go | 2 +- pkg/logcli/query/query.go | 2 + pkg/loki/common/common.go | 7 ++ pkg/loki/config_wrapper.go | 11 +++ pkg/loki/modules.go | 12 +-- pkg/storage/bucket/client.go | 92 +++++++++++++++---- pkg/storage/bucket/filesystem/config.go | 8 +- pkg/storage/bucket/http/config.go | 8 +- pkg/storage/factory.go | 40 +++++--- pkg/storage/factory_test.go | 9 +- pkg/storage/store.go | 20 ++-- .../stores/shipper/bloomshipper/client.go | 3 +- .../boltdb/compactor/util_test.go | 5 +- tools/tsdb/bloom-tester/lib.go | 2 +- tools/tsdb/bloom-tester/readlib.go | 2 +- tools/tsdb/index-analyzer/main.go | 2 +- tools/tsdb/migrate-versions/main.go | 2 +- tools/tsdb/migrate-versions/main_test.go | 3 +- 18 files changed, 161 insertions(+), 69 deletions(-) diff --git a/pkg/bloomcompactor/bloomcompactor.go b/pkg/bloomcompactor/bloomcompactor.go index 9eb7ed11ca5d..7be359dbe062 100644 --- a/pkg/bloomcompactor/bloomcompactor.go +++ b/pkg/bloomcompactor/bloomcompactor.go @@ -128,7 +128,7 @@ func New( } // Configure ObjectClient and IndexShipper for series and chunk management - objectClient, err := storage.NewObjectClient(periodicConfig.ObjectType, storageCfg, clientMetrics) + objectClient, err := storage.NewObjectClient("bloom-compactor", periodicConfig.ObjectType, storageCfg, clientMetrics, r) if err != nil { return nil, fmt.Errorf("error creating object client '%s': %w", periodicConfig.ObjectType, err) } diff --git a/pkg/logcli/query/query.go b/pkg/logcli/query/query.go index fc5be5f393cb..6610d7c8e812 100644 --- a/pkg/logcli/query/query.go +++ b/pkg/logcli/query/query.go @@ -511,9 +511,11 @@ func (q *Query) DoLocalQuery(out output.LogOutput, statistics bool, orgID string func GetObjectClient(store string, conf loki.Config, cm storage.ClientMetrics) (chunk.ObjectClient, error) { oc, err := storage.NewObjectClient( + "log-cli-query", store, conf.StorageConfig, cm, + prometheus.DefaultRegisterer, ) if err != nil { return nil, err diff --git a/pkg/loki/common/common.go b/pkg/loki/common/common.go index 6f7fd1c768ed..e9d5a017c03a 100644 --- a/pkg/loki/common/common.go +++ b/pkg/loki/common/common.go @@ -6,6 +6,7 @@ import ( "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/netutil" + "github.com/grafana/loki/pkg/storage/bucket" "github.com/grafana/loki/pkg/storage/chunk/client/alibaba" "github.com/grafana/loki/pkg/storage/chunk/client/aws" "github.com/grafana/loki/pkg/storage/chunk/client/azure" @@ -58,6 +59,8 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { c.Ring.RegisterFlagsWithPrefix("common.storage.", "collectors/", f) c.Ring.RegisterFlagsWithPrefix("common.storage.", "collectors/", throwaway) + f.BoolVar(&c.Storage.ThanosObjStore, "common.thanos.enable", false, "Enable the thanos.io/objstore to be the backend for object storage") + // instance related flags. c.InstanceInterfaceNames = netutil.PrivateNetworkInterfacesWithFallback([]string{"eth0", "en0"}, util_log.Logger) throwaway.StringVar(&c.InstanceAddr, "common.instance-addr", "", "Default advertised address to be used by Loki components.") @@ -78,6 +81,8 @@ type Storage struct { Hedging hedging.Config `yaml:"hedging"` COS ibmcloud.COSConfig `yaml:"cos"` CongestionControl congestion.Config `yaml:"congestion_control,omitempty"` + ThanosObjStore bool `yaml:"thanos_objstore"` + ObjStoreConf bucket.Config `yaml:"objstore_config"` } func (s *Storage) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { @@ -91,6 +96,8 @@ func (s *Storage) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { s.Hedging.RegisterFlagsWithPrefix(prefix, f) s.COS.RegisterFlagsWithPrefix(prefix, f) s.CongestionControl.RegisterFlagsWithPrefix(prefix, f) + + s.ObjStoreConf.RegisterFlagsWithPrefix(prefix+"thanos.", f) } type FilesystemConfig struct { diff --git a/pkg/loki/config_wrapper.go b/pkg/loki/config_wrapper.go index 41a87775a9ec..efe378741b9b 100644 --- a/pkg/loki/config_wrapper.go +++ b/pkg/loki/config_wrapper.go @@ -564,6 +564,17 @@ func applyStorageConfig(cfg, defaults *ConfigWrapper) error { } } + if !reflect.DeepEqual(cfg.Common.Storage.ObjStoreConf, defaults.StorageConfig.ObjStoreConf) { + configsFound++ + + applyConfig = func(r *ConfigWrapper) { + //TODO (JoaoBraveCoding) Add Ruler config + r.StorageConfig.ThanosObjStore = r.Common.Storage.ThanosObjStore + r.StorageConfig.ObjStoreConf = r.Common.Storage.ObjStoreConf + r.StorageConfig.Hedging = r.Common.Storage.Hedging + } + } + if configsFound > 1 { return ErrTooManyStorageConfigs } diff --git a/pkg/loki/modules.go b/pkg/loki/modules.go index bf450f852be5..5356483a18e2 100644 --- a/pkg/loki/modules.go +++ b/pkg/loki/modules.go @@ -608,9 +608,7 @@ func (t *Loki) initTableManager() (services.Service, error) { os.Exit(1) } - reg := prometheus.WrapRegistererWith(prometheus.Labels{"component": "table-manager-store"}, prometheus.DefaultRegisterer) - - tableClient, err := storage.NewTableClient(lastConfig.IndexType, *lastConfig, t.Cfg.StorageConfig, t.clientMetrics, reg, util_log.Logger) + tableClient, err := storage.NewTableClient("table-manager-store", lastConfig.IndexType, *lastConfig, t.Cfg.StorageConfig, t.clientMetrics, prometheus.DefaultRegisterer, util_log.Logger) if err != nil { return nil, err } @@ -1211,7 +1209,7 @@ func (t *Loki) initCompactor() (services.Service, error) { continue } - objectClient, err := storage.NewObjectClient(periodConfig.ObjectType, t.Cfg.StorageConfig, t.clientMetrics) + objectClient, err := storage.NewObjectClient("compactor", periodConfig.ObjectType, t.Cfg.StorageConfig, t.clientMetrics, prometheus.DefaultRegisterer) if err != nil { return nil, fmt.Errorf("failed to create object client: %w", err) } @@ -1222,7 +1220,7 @@ func (t *Loki) initCompactor() (services.Service, error) { var deleteRequestStoreClient client.ObjectClient if t.Cfg.CompactorConfig.RetentionEnabled { if deleteStore := t.Cfg.CompactorConfig.DeleteRequestStore; deleteStore != "" { - if deleteRequestStoreClient, err = storage.NewObjectClient(deleteStore, t.Cfg.StorageConfig, t.clientMetrics); err != nil { + if deleteRequestStoreClient, err = storage.NewObjectClient("compactor", deleteStore, t.Cfg.StorageConfig, t.clientMetrics, prometheus.DefaultRegisterer); err != nil { return nil, fmt.Errorf("failed to create delete request store object client: %w", err) } } else { @@ -1315,7 +1313,7 @@ func (t *Loki) initIndexGateway() (services.Service, error) { } tableRange := period.GetIndexTableNumberRange(periodEndTime) - indexClient, err := storage.NewIndexClient(period, tableRange, t.Cfg.StorageConfig, t.Cfg.SchemaConfig, t.Overrides, t.clientMetrics, shardingStrategy, + indexClient, err := storage.NewIndexClient("index-gateway", period, tableRange, t.Cfg.StorageConfig, t.Cfg.SchemaConfig, t.Overrides, t.clientMetrics, shardingStrategy, prometheus.DefaultRegisterer, log.With(util_log.Logger, "index-store", fmt.Sprintf("%s-%s", period.IndexType, period.From.String())), t.Cfg.MetricsNamespace, ) if err != nil { @@ -1515,7 +1513,7 @@ func (t *Loki) initAnalytics() (services.Service, error) { return nil, err } - objectClient, err := storage.NewObjectClient(period.ObjectType, t.Cfg.StorageConfig, t.clientMetrics) + objectClient, err := storage.NewObjectClient("analytics", period.ObjectType, t.Cfg.StorageConfig, t.clientMetrics, prometheus.DefaultRegisterer) if err != nil { level.Info(util_log.Logger).Log("msg", "failed to initialize usage report", "err", err) return nil, nil diff --git a/pkg/storage/bucket/client.go b/pkg/storage/bucket/client.go index 57751afe3654..0cc8897b3d92 100644 --- a/pkg/storage/bucket/client.go +++ b/pkg/storage/bucket/client.go @@ -5,12 +5,13 @@ import ( "errors" "flag" "fmt" + "regexp" "strings" "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/thanos-io/objstore" - opentracing "github.com/thanos-io/objstore/tracing/opentracing" + objstoretracing "github.com/thanos-io/objstore/tracing/opentracing" "github.com/grafana/loki/pkg/storage/bucket/azure" "github.com/grafana/loki/pkg/storage/bucket/filesystem" @@ -35,17 +36,22 @@ const ( // Filesystem is the value for the filesystem storage backend. Filesystem = "filesystem" + + // validPrefixCharactersRegex allows only alphanumeric characters to prevent subtle bugs and simplify validation + validPrefixCharactersRegex = `^[\da-zA-Z]+$` ) var ( SupportedBackends = []string{S3, GCS, Azure, Swift, Filesystem} - ErrUnsupportedStorageBackend = errors.New("unsupported storage backend") + ErrUnsupportedStorageBackend = errors.New("unsupported storage backend") + ErrInvalidCharactersInStoragePrefix = errors.New("storage prefix contains invalid characters, it may only contain digits and English alphabet letters") ) -// Config holds configuration for accessing long-term storage. -type Config struct { +// StorageBackendConfig holds configuration for accessing long-term storage. +type StorageBackendConfig struct { Backend string `yaml:"backend"` + // Backends S3 s3.Config `yaml:"s3"` GCS gcs.Config `yaml:"gcs"` @@ -53,36 +59,36 @@ type Config struct { Swift swift.Config `yaml:"swift"` Filesystem filesystem.Config `yaml:"filesystem"` - // Not used internally, meant to allow callers to wrap Buckets - // created using this config - Middlewares []func(objstore.Bucket) (objstore.Bucket, error) `yaml:"-"` - // Used to inject additional backends into the config. Allows for this config to // be embedded in multiple contexts and support non-object storage based backends. ExtraBackends []string `yaml:"-"` } // Returns the supportedBackends for the package and any custom backends injected into the config. -func (cfg *Config) supportedBackends() []string { +func (cfg *StorageBackendConfig) supportedBackends() []string { return append(SupportedBackends, cfg.ExtraBackends...) } // RegisterFlags registers the backend storage config. -func (cfg *Config) RegisterFlags(f *flag.FlagSet) { +func (cfg *StorageBackendConfig) RegisterFlags(f *flag.FlagSet) { cfg.RegisterFlagsWithPrefix("", f) } -func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { +func (cfg *StorageBackendConfig) RegisterFlagsWithPrefixAndDefaultDirectory(prefix, dir string, f *flag.FlagSet) { cfg.S3.RegisterFlagsWithPrefix(prefix, f) cfg.GCS.RegisterFlagsWithPrefix(prefix, f) cfg.Azure.RegisterFlagsWithPrefix(prefix, f) cfg.Swift.RegisterFlagsWithPrefix(prefix, f) - cfg.Filesystem.RegisterFlagsWithPrefix(prefix, f) + cfg.Filesystem.RegisterFlagsWithPrefixAndDefaultDirectory(prefix, dir, f) - f.StringVar(&cfg.Backend, prefix+"backend", S3, fmt.Sprintf("Backend storage to use. Supported backends are: %s.", strings.Join(cfg.supportedBackends(), ", "))) + f.StringVar(&cfg.Backend, prefix+"backend", Filesystem, fmt.Sprintf("Backend storage to use. Supported backends are: %s.", strings.Join(cfg.supportedBackends(), ", "))) } -func (cfg *Config) Validate() error { +func (cfg *StorageBackendConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { + cfg.RegisterFlagsWithPrefixAndDefaultDirectory(prefix, "", f) +} + +func (cfg *StorageBackendConfig) Validate() error { if !util.StringsContain(cfg.supportedBackends(), cfg.Backend) { return ErrUnsupportedStorageBackend } @@ -96,8 +102,49 @@ func (cfg *Config) Validate() error { return nil } +// Config holds configuration for accessing long-term storage. +type Config struct { + StorageBackendConfig `yaml:",inline"` + + StoragePrefix string `yaml:"storage_prefix"` + + // Not used internally, meant to allow callers to wrap Buckets + // created using this config + Middlewares []func(objstore.InstrumentedBucket) (objstore.InstrumentedBucket, error) `yaml:"-"` +} + +// RegisterFlags registers the backend storage config. +func (cfg *Config) RegisterFlags(f *flag.FlagSet) { + cfg.RegisterFlagsWithPrefix("", f) +} + +func (cfg *Config) RegisterFlagsWithPrefixAndDefaultDirectory(prefix, dir string, f *flag.FlagSet) { + cfg.StorageBackendConfig.RegisterFlagsWithPrefixAndDefaultDirectory(prefix, dir, f) + f.StringVar(&cfg.StoragePrefix, prefix+"storage-prefix", "", "Prefix for all objects stored in the backend storage. For simplicity, it may only contain digits and English alphabet letters.") +} + +func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { + cfg.RegisterFlagsWithPrefixAndDefaultDirectory(prefix, "", f) +} + +func (cfg *Config) Validate() error { + if cfg.StoragePrefix != "" { + acceptablePrefixCharacters := regexp.MustCompile(validPrefixCharactersRegex) + if !acceptablePrefixCharacters.MatchString(cfg.StoragePrefix) { + return ErrInvalidCharactersInStoragePrefix + } + } + + return cfg.StorageBackendConfig.Validate() +} + // NewClient creates a new bucket client based on the configured backend -func NewClient(ctx context.Context, cfg Config, name string, logger log.Logger, reg prometheus.Registerer) (client objstore.Bucket, err error) { +func NewClient(ctx context.Context, cfg Config, name string, logger log.Logger, reg prometheus.Registerer) (objstore.InstrumentedBucket, error) { + var ( + client objstore.Bucket + err error + ) + switch cfg.Backend { case S3: client, err = s3.NewBucketClient(cfg.S3, name, logger) @@ -117,17 +164,21 @@ func NewClient(ctx context.Context, cfg Config, name string, logger log.Logger, return nil, err } - client = opentracing.WrapWithTraces(bucketWithMetrics(client, name, reg)) + if cfg.StoragePrefix != "" { + client = NewPrefixedBucketClient(client, cfg.StoragePrefix) + } + + instrumentedClient := objstoretracing.WrapWithTraces(bucketWithMetrics(client, name, reg)) // Wrap the client with any provided middleware for _, wrap := range cfg.Middlewares { - client, err = wrap(client) + instrumentedClient, err = wrap(instrumentedClient) if err != nil { return nil, err } } - return client, nil + return instrumentedClient, nil } func bucketWithMetrics(bucketClient objstore.Bucket, name string, reg prometheus.Registerer) objstore.Bucket { @@ -135,8 +186,11 @@ func bucketWithMetrics(bucketClient objstore.Bucket, name string, reg prometheus return bucketClient } + reg = prometheus.WrapRegistererWithPrefix("loki_", reg) + reg = prometheus.WrapRegistererWith(prometheus.Labels{"component": name}, reg) + return objstore.WrapWithMetrics( bucketClient, - prometheus.WrapRegistererWith(prometheus.Labels{"component": name}, reg), + reg, "") } diff --git a/pkg/storage/bucket/filesystem/config.go b/pkg/storage/bucket/filesystem/config.go index 923923a03290..873a2eb1ba28 100644 --- a/pkg/storage/bucket/filesystem/config.go +++ b/pkg/storage/bucket/filesystem/config.go @@ -12,7 +12,13 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.RegisterFlagsWithPrefix("", f) } +// RegisterFlagsWithPrefixAndDefaultDirectory registers the flags for filesystem +// storage with the provided prefix and sets the default directory to dir. +func (cfg *Config) RegisterFlagsWithPrefixAndDefaultDirectory(prefix, dir string, f *flag.FlagSet) { + f.StringVar(&cfg.Directory, prefix+"filesystem.dir", dir, "Local filesystem storage directory.") +} + // RegisterFlagsWithPrefix registers the flags for filesystem storage with the provided prefix func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { - f.StringVar(&cfg.Directory, prefix+"filesystem.dir", "", "Local filesystem storage directory.") + cfg.RegisterFlagsWithPrefixAndDefaultDirectory(prefix, "", f) } diff --git a/pkg/storage/bucket/http/config.go b/pkg/storage/bucket/http/config.go index 1c83e1f311bb..509de0bf301f 100644 --- a/pkg/storage/bucket/http/config.go +++ b/pkg/storage/bucket/http/config.go @@ -15,6 +15,7 @@ type Config struct { MaxIdleConns int `yaml:"max_idle_connections"` MaxIdleConnsPerHost int `yaml:"max_idle_connections_per_host"` MaxConnsPerHost int `yaml:"max_connections_per_host"` + CAFile string `yaml:"ca_file"` } // RegisterFlags registers the flags for the storage HTTP client. @@ -24,12 +25,13 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { // RegisterFlagsWithPrefix registers the flags for the storage HTTP client with the provided prefix. func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { - f.DurationVar(&cfg.IdleConnTimeout, prefix+"http.idle-conn-timeout", 90*time.Second, "The time an idle connection will remain idle before closing.") - f.DurationVar(&cfg.ResponseHeaderTimeout, prefix+"http.response-header-timeout", 2*time.Minute, "The amount of time the client will wait for a servers response headers.") - f.BoolVar(&cfg.InsecureSkipVerify, prefix+"http.insecure-skip-verify", false, "If the client connects via HTTPS and this option is enabled, the client will accept any certificate and hostname.") + f.DurationVar(&cfg.IdleConnTimeout, prefix+"idle-conn-timeout", 90*time.Second, "The time an idle connection will remain idle before closing.") + f.DurationVar(&cfg.ResponseHeaderTimeout, prefix+"response-header-timeout", 2*time.Minute, "The amount of time the client will wait for a servers response headers.") + f.BoolVar(&cfg.InsecureSkipVerify, prefix+"insecure-skip-verify", false, "If the client connects via HTTPS and this option is enabled, the client will accept any certificate and hostname.") f.DurationVar(&cfg.TLSHandshakeTimeout, prefix+"tls-handshake-timeout", 10*time.Second, "Maximum time to wait for a TLS handshake. 0 means no limit.") f.DurationVar(&cfg.ExpectContinueTimeout, prefix+"expect-continue-timeout", 1*time.Second, "The time to wait for a server's first response headers after fully writing the request headers if the request has an Expect header. 0 to send the request body immediately.") f.IntVar(&cfg.MaxIdleConns, prefix+"max-idle-connections", 100, "Maximum number of idle (keep-alive) connections across all hosts. 0 means no limit.") f.IntVar(&cfg.MaxIdleConnsPerHost, prefix+"max-idle-connections-per-host", 100, "Maximum number of idle (keep-alive) connections to keep per-host. If 0, a built-in default value is used.") f.IntVar(&cfg.MaxConnsPerHost, prefix+"max-connections-per-host", 0, "Maximum number of connections per host. 0 means no limit.") + f.StringVar(&cfg.CAFile, prefix+"ca-file", "", "Path to the trusted CA file that signed the SSL certificate of the object storage endpoint.") } diff --git a/pkg/storage/factory.go b/pkg/storage/factory.go index da687c5ea9c7..5cefa4137bef 100644 --- a/pkg/storage/factory.go +++ b/pkg/storage/factory.go @@ -14,6 +14,7 @@ import ( "github.com/grafana/dskit/flagext" + "github.com/grafana/loki/pkg/storage/bucket" "github.com/grafana/loki/pkg/storage/chunk/cache" "github.com/grafana/loki/pkg/storage/chunk/client" "github.com/grafana/loki/pkg/storage/chunk/client/alibaba" @@ -333,6 +334,9 @@ type Config struct { DisableBroadIndexQueries bool `yaml:"disable_broad_index_queries"` MaxParallelGetChunk int `yaml:"max_parallel_get_chunk"` + ThanosObjStore bool `yaml:"thanos_objstore"` + ObjStoreConf bucket.Config `yaml:"objstore_config"` + MaxChunkBatchSize int `yaml:"max_chunk_batch_size"` BoltDBShipperConfig boltdb.IndexCfg `yaml:"boltdb_shipper" doc:"description=Configures storing index in an Object Store (GCS/S3/Azure/Swift/COS/Filesystem) in the form of boltdb files. Required fields only required when boltdb-shipper is defined in config."` TSDBShipperConfig indexshipper.Config `yaml:"tsdb_shipper" doc:"description=Configures storing index in an Object Store (GCS/S3/Azure/Swift/COS/Filesystem) in a prometheus TSDB-like format. Required fields only required when TSDB is defined in config."` @@ -360,6 +364,9 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.Hedging.RegisterFlagsWithPrefix("store.", f) cfg.CongestionControl.RegisterFlagsWithPrefix("store.", f) + f.BoolVar(&cfg.ThanosObjStore, "thanos.enable", false, "Enable the thanos.io/objstore to be the backend for object storage") + cfg.ObjStoreConf.RegisterFlagsWithPrefix("thanos.", f) + cfg.IndexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "", f) f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Cache validity for active index entries. Should be no higher than -ingester.max-chunk-idle.") f.StringVar(&cfg.ObjectPrefix, "store.object-prefix", "", "The prefix to all keys inserted in object storage. Example: loki-instances/west/") @@ -398,11 +405,15 @@ func (cfg *Config) Validate() error { return errors.Wrap(err, "invalid bloom shipper config") } + if err := cfg.ObjStoreConf.Validate(); err != nil { + return err + } + return cfg.NamedStores.Validate() } // NewIndexClient creates a new index client of the desired type specified in the PeriodConfig -func NewIndexClient(periodCfg config.PeriodConfig, tableRange config.TableRange, cfg Config, schemaCfg config.SchemaConfig, limits StoreLimits, cm ClientMetrics, shardingStrategy indexgateway.ShardingStrategy, registerer prometheus.Registerer, logger log.Logger, metricsNamespace string) (index.Client, error) { +func NewIndexClient(component string, periodCfg config.PeriodConfig, tableRange config.TableRange, cfg Config, schemaCfg config.SchemaConfig, limits StoreLimits, cm ClientMetrics, shardingStrategy indexgateway.ShardingStrategy, registerer prometheus.Registerer, logger log.Logger, metricsNamespace string) (index.Client, error) { switch true { case util.StringsContain(testingStorageTypes, periodCfg.IndexType): @@ -433,7 +444,7 @@ func NewIndexClient(periodCfg config.PeriodConfig, tableRange config.TableRange, return client, nil } - objectClient, err := NewObjectClient(periodCfg.ObjectType, cfg, cm) + objectClient, err := NewObjectClient(component, periodCfg.ObjectType, cfg, cm, registerer) if err != nil { return nil, err } @@ -494,7 +505,7 @@ func NewIndexClient(periodCfg config.PeriodConfig, tableRange config.TableRange, } // NewChunkClient makes a new chunk.Client of the desired types. -func NewChunkClient(name string, cfg Config, schemaCfg config.SchemaConfig, cc congestion.Controller, registerer prometheus.Registerer, clientMetrics ClientMetrics, logger log.Logger) (client.Client, error) { +func NewChunkClient(component, name string, cfg Config, schemaCfg config.SchemaConfig, cc congestion.Controller, registerer prometheus.Registerer, clientMetrics ClientMetrics, logger log.Logger) (client.Client, error) { var storeType = name // lookup storeType for named stores @@ -507,7 +518,7 @@ func NewChunkClient(name string, cfg Config, schemaCfg config.SchemaConfig, cc c case util.StringsContain(testingStorageTypes, storeType): switch storeType { case config.StorageTypeInMemory: - c, err := NewObjectClient(name, cfg, clientMetrics) + c, err := NewObjectClient(component, name, cfg, clientMetrics, registerer) if err != nil { return nil, err } @@ -517,21 +528,21 @@ func NewChunkClient(name string, cfg Config, schemaCfg config.SchemaConfig, cc c case util.StringsContain(supportedStorageTypes, storeType): switch storeType { case config.StorageTypeFileSystem: - c, err := NewObjectClient(name, cfg, clientMetrics) + c, err := NewObjectClient(component, name, cfg, clientMetrics, registerer) if err != nil { return nil, err } return client.NewClientWithMaxParallel(c, client.FSEncoder, cfg.MaxParallelGetChunk, schemaCfg), nil case config.StorageTypeAWS, config.StorageTypeS3, config.StorageTypeAzure, config.StorageTypeBOS, config.StorageTypeSwift, config.StorageTypeCOS, config.StorageTypeAlibabaCloud: - c, err := NewObjectClient(name, cfg, clientMetrics) + c, err := NewObjectClient(component, name, cfg, clientMetrics, registerer) if err != nil { return nil, err } return client.NewClientWithMaxParallel(c, nil, cfg.MaxParallelGetChunk, schemaCfg), nil case config.StorageTypeGCS: - c, err := NewObjectClient(name, cfg, clientMetrics) + c, err := NewObjectClient(component, name, cfg, clientMetrics, registerer) if err != nil { return nil, err } @@ -572,7 +583,7 @@ func NewChunkClient(name string, cfg Config, schemaCfg config.SchemaConfig, cc c } // NewTableClient makes a new table client based on the configuration. -func NewTableClient(name string, periodCfg config.PeriodConfig, cfg Config, cm ClientMetrics, registerer prometheus.Registerer, logger log.Logger) (index.TableClient, error) { +func NewTableClient(component, name string, periodCfg config.PeriodConfig, cfg Config, cm ClientMetrics, registerer prometheus.Registerer, logger log.Logger) (index.TableClient, error) { switch true { case util.StringsContain(testingStorageTypes, name): switch name { @@ -581,7 +592,7 @@ func NewTableClient(name string, periodCfg config.PeriodConfig, cfg Config, cm C } case util.StringsContain(supportedIndexTypes, name): - objectClient, err := NewObjectClient(periodCfg.ObjectType, cfg, cm) + objectClient, err := NewObjectClient(component, periodCfg.ObjectType, cfg, cm, registerer) if err != nil { return nil, err } @@ -636,8 +647,8 @@ func (c *ClientMetrics) Unregister() { } // NewObjectClient makes a new StorageClient with the prefix in the front. -func NewObjectClient(name string, cfg Config, clientMetrics ClientMetrics) (client.ObjectClient, error) { - actual, err := internalNewObjectClient(name, cfg, clientMetrics) +func NewObjectClient(component, name string, cfg Config, clientMetrics ClientMetrics, reg prometheus.Registerer) (client.ObjectClient, error) { + actual, err := internalNewObjectClient(component, name, cfg, clientMetrics, reg) if err != nil { return nil, err } @@ -651,12 +662,17 @@ func NewObjectClient(name string, cfg Config, clientMetrics ClientMetrics) (clie } // internalNewObjectClient makes the underlying StorageClient of the desired types. -func internalNewObjectClient(name string, cfg Config, clientMetrics ClientMetrics) (client.ObjectClient, error) { +func internalNewObjectClient(component, name string, cfg Config, clientMetrics ClientMetrics, reg prometheus.Registerer) (client.ObjectClient, error) { var ( namedStore string storeType = name ) + // preserve olf reg behaviour + if !cfg.ThanosObjStore { + reg = prometheus.WrapRegistererWith(prometheus.Labels{"component": component}, reg) + } + // lookup storeType for named stores if nsType, ok := cfg.NamedStores.storeType[name]; ok { storeType = nsType diff --git a/pkg/storage/factory_test.go b/pkg/storage/factory_test.go index 2588c9dc69dd..242f99abc9bf 100644 --- a/pkg/storage/factory_test.go +++ b/pkg/storage/factory_test.go @@ -8,6 +8,7 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/flagext" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -233,7 +234,7 @@ func TestNewObjectClient_prefixing(t *testing.T) { var cfg Config flagext.DefaultValues(&cfg) - objectClient, err := NewObjectClient("inmemory", cfg, cm) + objectClient, err := NewObjectClient("inmemory", "inmemory", cfg, cm, prometheus.NewRegistry()) require.NoError(t, err) _, ok := objectClient.(client.PrefixedObjectClient) @@ -245,7 +246,7 @@ func TestNewObjectClient_prefixing(t *testing.T) { flagext.DefaultValues(&cfg) cfg.ObjectPrefix = "my/prefix/" - objectClient, err := NewObjectClient("inmemory", cfg, cm) + objectClient, err := NewObjectClient("inmemory", "inmemory", cfg, cm, prometheus.NewRegistry()) require.NoError(t, err) prefixed, ok := objectClient.(client.PrefixedObjectClient) @@ -258,7 +259,7 @@ func TestNewObjectClient_prefixing(t *testing.T) { flagext.DefaultValues(&cfg) cfg.ObjectPrefix = "my/prefix" - objectClient, err := NewObjectClient("inmemory", cfg, cm) + objectClient, err := NewObjectClient("inmemory", "inmemory", cfg, cm, prometheus.NewRegistry()) require.NoError(t, err) prefixed, ok := objectClient.(client.PrefixedObjectClient) @@ -271,7 +272,7 @@ func TestNewObjectClient_prefixing(t *testing.T) { flagext.DefaultValues(&cfg) cfg.ObjectPrefix = "/my/prefix/" - objectClient, err := NewObjectClient("inmemory", cfg, cm) + objectClient, err := NewObjectClient("inmemory", "inmemory", cfg, cm, prometheus.NewRegistry()) require.NoError(t, err) prefixed, ok := objectClient.(client.PrefixedObjectClient) diff --git a/pkg/storage/store.go b/pkg/storage/store.go index 6781dbbff8a3..a71af3dc3d78 100644 --- a/pkg/storage/store.go +++ b/pkg/storage/store.go @@ -216,9 +216,6 @@ func (s *LokiStore) chunkClientForPeriod(p config.PeriodConfig) (client.Client, if objectStoreType == "" { objectStoreType = p.IndexType } - chunkClientReg := prometheus.WrapRegistererWith( - prometheus.Labels{"component": "chunk-store-" + p.From.String()}, s.registerer) - var cc congestion.Controller ccCfg := s.cfg.CongestionControl @@ -230,7 +227,8 @@ func (s *LokiStore) chunkClientForPeriod(p config.PeriodConfig) (client.Client, ) } - chunks, err := NewChunkClient(objectStoreType, s.cfg, s.schemaCfg, cc, chunkClientReg, s.clientMetrics, s.logger) + component := "chunk-store-" + p.From.String() + chunks, err := NewChunkClient(component, objectStoreType, s.cfg, s.schemaCfg, cc, s.registerer, s.clientMetrics, s.logger) if err != nil { return nil, errors.Wrap(err, "error creating object client") } @@ -253,14 +251,8 @@ func shouldUseIndexGatewayClient(cfg indexshipper.Config) bool { } func (s *LokiStore) storeForPeriod(p config.PeriodConfig, tableRange config.TableRange, chunkClient client.Client, f *fetcher.Fetcher) (stores.ChunkWriter, index.ReaderWriter, func(), error) { - indexClientReg := prometheus.WrapRegistererWith( - prometheus.Labels{ - "component": fmt.Sprintf( - "index-store-%s-%s", - p.IndexType, - p.From.String(), - ), - }, s.registerer) + component := fmt.Sprintf("index-store-%s-%s", p.IndexType, p.From.String()) + indexClientReg := prometheus.WrapRegistererWith(prometheus.Labels{"component": component}, s.registerer) indexClientLogger := log.With(s.logger, "index-store", fmt.Sprintf("%s-%s", p.IndexType, p.From.String())) if p.IndexType == config.TSDBType { @@ -278,7 +270,7 @@ func (s *LokiStore) storeForPeriod(p config.PeriodConfig, tableRange config.Tabl }, nil } - objectClient, err := NewObjectClient(p.ObjectType, s.cfg, s.clientMetrics) + objectClient, err := NewObjectClient(component, p.ObjectType, s.cfg, s.clientMetrics, s.registerer) if err != nil { return nil, nil, nil, err } @@ -301,7 +293,7 @@ func (s *LokiStore) storeForPeriod(p config.PeriodConfig, tableRange config.Tabl }, nil } - idx, err := NewIndexClient(p, tableRange, s.cfg, s.schemaCfg, s.limits, s.clientMetrics, nil, indexClientReg, indexClientLogger, s.metricsNamespace) + idx, err := NewIndexClient(component, p, tableRange, s.cfg, s.schemaCfg, s.limits, s.clientMetrics, nil, s.registerer, indexClientLogger, s.metricsNamespace) if err != nil { return nil, nil, nil, errors.Wrap(err, "error creating index client") } diff --git a/pkg/storage/stores/shipper/bloomshipper/client.go b/pkg/storage/stores/shipper/bloomshipper/client.go index d1e9f24ef866..3d8abafa13ea 100644 --- a/pkg/storage/stores/shipper/bloomshipper/client.go +++ b/pkg/storage/stores/shipper/bloomshipper/client.go @@ -11,6 +11,7 @@ import ( "strings" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/grafana/dskit/concurrency" "github.com/prometheus/common/model" @@ -104,7 +105,7 @@ type Client interface { func NewBloomClient(periodicConfigs []config.PeriodConfig, storageConfig storage.Config, clientMetrics storage.ClientMetrics) (*BloomClient, error) { periodicObjectClients := make(map[config.DayTime]client.ObjectClient) for _, periodicConfig := range periodicConfigs { - objectClient, err := storage.NewObjectClient(periodicConfig.ObjectType, storageConfig, clientMetrics) + objectClient, err := storage.NewObjectClient("bloom-shipper", periodicConfig.ObjectType, storageConfig, clientMetrics, prometheus.DefaultRegisterer) if err != nil { return nil, fmt.Errorf("error creating object client '%s': %w", periodicConfig.ObjectType, err) } diff --git a/pkg/storage/stores/shipper/indexshipper/boltdb/compactor/util_test.go b/pkg/storage/stores/shipper/indexshipper/boltdb/compactor/util_test.go index 12dbfb8d9a7d..a716f5522041 100644 --- a/pkg/storage/stores/shipper/indexshipper/boltdb/compactor/util_test.go +++ b/pkg/storage/stores/shipper/indexshipper/boltdb/compactor/util_test.go @@ -10,6 +10,7 @@ import ( ww "github.com/grafana/dskit/server" "github.com/grafana/dskit/user" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/require" @@ -128,11 +129,11 @@ type testObjectClient struct { } func newTestObjectClient(path string, clientMetrics storage.ClientMetrics) client.ObjectClient { - c, err := storage.NewObjectClient("filesystem", storage.Config{ + c, err := storage.NewObjectClient("compactor", "filesystem", storage.Config{ FSConfig: local.FSConfig{ Directory: path, }, - }, clientMetrics) + }, clientMetrics, prometheus.NewRegistry()) if err != nil { panic(err) } diff --git a/tools/tsdb/bloom-tester/lib.go b/tools/tsdb/bloom-tester/lib.go index 5b997d903a37..efe6e25d46eb 100644 --- a/tools/tsdb/bloom-tester/lib.go +++ b/tools/tsdb/bloom-tester/lib.go @@ -52,7 +52,7 @@ func execute() { periodCfg, tableRange, tableName, err := helpers.GetPeriodConfigForTableNumber(bucket, conf.SchemaConfig.Configs) helpers.ExitErr("find period config for bucket", err) - objectClient, err := storage.NewObjectClient(periodCfg.ObjectType, conf.StorageConfig, clientMetrics) + objectClient, err := storage.NewObjectClient("bloom-tester", periodCfg.ObjectType, conf.StorageConfig, clientMetrics, prometheus.DefaultRegisterer) helpers.ExitErr("creating object client", err) chunkClient := client.NewClient(objectClient, nil, conf.SchemaConfig) diff --git a/tools/tsdb/bloom-tester/readlib.go b/tools/tsdb/bloom-tester/readlib.go index 6be3b767ec63..8b7b6f38094b 100644 --- a/tools/tsdb/bloom-tester/readlib.go +++ b/tools/tsdb/bloom-tester/readlib.go @@ -65,7 +65,7 @@ func executeRead() { periodCfg, tableRange, tableName, err := helpers.GetPeriodConfigForTableNumber(bucket, conf.SchemaConfig.Configs) helpers.ExitErr("find period config for bucket", err) - objectClient, err := storage.NewObjectClient(periodCfg.ObjectType, conf.StorageConfig, clientMetrics) + objectClient, err := storage.NewObjectClient("bloom-tester", periodCfg.ObjectType, conf.StorageConfig, clientMetrics, prometheus.DefaultRegisterer) helpers.ExitErr("creating object client", err) chunkClient := client.NewClient(objectClient, nil, conf.SchemaConfig) diff --git a/tools/tsdb/index-analyzer/main.go b/tools/tsdb/index-analyzer/main.go index fd59bd4792fd..87162630675b 100644 --- a/tools/tsdb/index-analyzer/main.go +++ b/tools/tsdb/index-analyzer/main.go @@ -24,7 +24,7 @@ func main() { periodCfg, tableRange, tableName, err := helpers.GetPeriodConfigForTableNumber(bucket, conf.SchemaConfig.Configs) helpers.ExitErr("find period config for bucket", err) - objectClient, err := storage.NewObjectClient(periodCfg.ObjectType, conf.StorageConfig, clientMetrics) + objectClient, err := storage.NewObjectClient("index-analyzer", periodCfg.ObjectType, conf.StorageConfig, clientMetrics, prometheus.DefaultRegisterer) helpers.ExitErr("creating object client", err) shipper, err := indexshipper.NewIndexShipper( diff --git a/tools/tsdb/migrate-versions/main.go b/tools/tsdb/migrate-versions/main.go index b458c80d4c1b..bb9c3b8f23bc 100644 --- a/tools/tsdb/migrate-versions/main.go +++ b/tools/tsdb/migrate-versions/main.go @@ -97,7 +97,7 @@ func main() { } func migrateTables(pCfg config.PeriodConfig, storageCfg storage.Config, clientMetrics storage.ClientMetrics, tableRange config.TableRange) error { - objClient, err := storage.NewObjectClient(pCfg.ObjectType, storageCfg, clientMetrics) + objClient, err := storage.NewObjectClient("tables-migration-tool", pCfg.ObjectType, storageCfg, clientMetrics, prometheus.DefaultRegisterer) if err != nil { return err } diff --git a/tools/tsdb/migrate-versions/main_test.go b/tools/tsdb/migrate-versions/main_test.go index 2f4690fde0a7..28c0c1a2358c 100644 --- a/tools/tsdb/migrate-versions/main_test.go +++ b/tools/tsdb/migrate-versions/main_test.go @@ -10,6 +10,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/require" @@ -52,7 +53,7 @@ func TestMigrateTables(t *testing.T) { } clientMetrics := storage.NewClientMetrics() - objClient, err := storage.NewObjectClient(pcfg.ObjectType, storageCfg, clientMetrics) + objClient, err := storage.NewObjectClient("table-migration-tool", pcfg.ObjectType, storageCfg, clientMetrics, prometheus.DefaultRegisterer) require.NoError(t, err) indexStorageClient := shipperstorage.NewIndexStorageClient(objClient, pcfg.IndexTables.PathPrefix) From b93fbcd4ddc8ae11fadcf4c8ea39dd0e14ea3c3e Mon Sep 17 00:00:00 2001 From: Joao Marcal Date: Fri, 24 Nov 2023 16:46:59 +0000 Subject: [PATCH 02/20] Fixed configuration for bucket/azure --- pkg/storage/bucket/azure/bucket_client.go | 16 ++++----- pkg/storage/bucket/azure/config.go | 17 +++++++--- pkg/storage/bucket/azure/config_test.go | 40 +++++++++++++---------- 3 files changed, 43 insertions(+), 30 deletions(-) diff --git a/pkg/storage/bucket/azure/bucket_client.go b/pkg/storage/bucket/azure/bucket_client.go index 7cc9a9176740..e6df3721a3de 100644 --- a/pkg/storage/bucket/azure/bucket_client.go +++ b/pkg/storage/bucket/azure/bucket_client.go @@ -17,14 +17,14 @@ func NewBucketClient(cfg Config, name string, logger log.Logger) (objstore.Bucke Endpoint: cfg.EndpointSuffix, MaxRetries: cfg.MaxRetries, HTTPConfig: azure.HTTPConfig{ - IdleConnTimeout: model.Duration(cfg.IdleConnTimeout), - ResponseHeaderTimeout: model.Duration(cfg.ResponseHeaderTimeout), - InsecureSkipVerify: cfg.InsecureSkipVerify, - TLSHandshakeTimeout: model.Duration(cfg.TLSHandshakeTimeout), - ExpectContinueTimeout: model.Duration(cfg.ExpectContinueTimeout), - MaxIdleConns: cfg.MaxIdleConns, - MaxIdleConnsPerHost: cfg.MaxIdleConnsPerHost, - MaxConnsPerHost: cfg.MaxConnsPerHost, + IdleConnTimeout: model.Duration(cfg.HTTP.IdleConnTimeout), + ResponseHeaderTimeout: model.Duration(cfg.HTTP.ResponseHeaderTimeout), + InsecureSkipVerify: cfg.HTTP.InsecureSkipVerify, + TLSHandshakeTimeout: model.Duration(cfg.HTTP.TLSHandshakeTimeout), + ExpectContinueTimeout: model.Duration(cfg.HTTP.ExpectContinueTimeout), + MaxIdleConns: cfg.HTTP.MaxIdleConns, + MaxIdleConnsPerHost: cfg.HTTP.MaxIdleConnsPerHost, + MaxConnsPerHost: cfg.HTTP.MaxConnsPerHost, }, } diff --git a/pkg/storage/bucket/azure/config.go b/pkg/storage/bucket/azure/config.go index 18d0f74fc3e8..47027c86c82f 100644 --- a/pkg/storage/bucket/azure/config.go +++ b/pkg/storage/bucket/azure/config.go @@ -3,11 +3,20 @@ package azure import ( "flag" - "github.com/grafana/dskit/flagext" + "net/http" - "github.com/grafana/loki/pkg/storage/bucket/http" + "github.com/grafana/dskit/flagext" + bucket_http "github.com/grafana/loki/pkg/storage/bucket/http" ) +// HTTPConfig stores the http.Transport configuration for the blob storage client. +type HTTPConfig struct { + bucket_http.Config `yaml:",inline"` + + // Allow upstream callers to inject a round tripper + Transport http.RoundTripper `yaml:"-"` +} + // Config holds the config options for an Azure backend type Config struct { StorageAccountName string `yaml:"account_name"` @@ -17,7 +26,7 @@ type Config struct { EndpointSuffix string `yaml:"endpoint_suffix"` MaxRetries int `yaml:"max_retries"` - http.Config `yaml:"http"` + HTTP HTTPConfig `yaml:"http"` } // RegisterFlags registers the flags for Azure storage @@ -33,5 +42,5 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.StringVar(&cfg.ContainerName, prefix+"azure.container-name", "loki", "Azure storage container name") f.StringVar(&cfg.EndpointSuffix, prefix+"azure.endpoint-suffix", "", "Azure storage endpoint suffix without schema. The account name will be prefixed to this value to create the FQDN") f.IntVar(&cfg.MaxRetries, prefix+"azure.max-retries", 20, "Number of retries for recoverable errors") - cfg.Config.RegisterFlagsWithPrefix(prefix+"azure.", f) + cfg.HTTP.RegisterFlagsWithPrefix(prefix+"azure.http", f) } diff --git a/pkg/storage/bucket/azure/config_test.go b/pkg/storage/bucket/azure/config_test.go index 7d3c6d9f326d..c5486ceb1506 100644 --- a/pkg/storage/bucket/azure/config_test.go +++ b/pkg/storage/bucket/azure/config_test.go @@ -15,15 +15,17 @@ import ( var defaultConfig = Config{ ContainerName: "loki", MaxRetries: 20, - Config: http.Config{ - IdleConnTimeout: 90 * time.Second, - ResponseHeaderTimeout: 2 * time.Minute, - InsecureSkipVerify: false, - TLSHandshakeTimeout: 10 * time.Second, - ExpectContinueTimeout: 1 * time.Second, - MaxIdleConns: 100, - MaxIdleConnsPerHost: 100, - MaxConnsPerHost: 0, + HTTP: HTTPConfig{ + Config: http.Config{ + IdleConnTimeout: 90 * time.Second, + ResponseHeaderTimeout: 2 * time.Minute, + InsecureSkipVerify: false, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + MaxIdleConns: 100, + MaxIdleConnsPerHost: 100, + MaxConnsPerHost: 0, + }, }, } @@ -65,15 +67,17 @@ http: ContainerName: "test-container-name", EndpointSuffix: "test-endpoint-suffix", MaxRetries: 1, - Config: http.Config{ - IdleConnTimeout: 2 * time.Second, - ResponseHeaderTimeout: 3 * time.Second, - InsecureSkipVerify: true, - TLSHandshakeTimeout: 4 * time.Second, - ExpectContinueTimeout: 5 * time.Second, - MaxIdleConns: 6, - MaxIdleConnsPerHost: 7, - MaxConnsPerHost: 8, + HTTP: HTTPConfig{ + Config: http.Config{ + IdleConnTimeout: 2 * time.Second, + ResponseHeaderTimeout: 3 * time.Second, + InsecureSkipVerify: true, + TLSHandshakeTimeout: 4 * time.Second, + ExpectContinueTimeout: 5 * time.Second, + MaxIdleConns: 6, + MaxIdleConnsPerHost: 7, + MaxConnsPerHost: 8, + }, }, }, expectedErr: nil, From a259abd3c40c23fae1c2cdf78195877b2c6acf6b Mon Sep 17 00:00:00 2001 From: Joao Marcal Date: Fri, 24 Nov 2023 16:54:42 +0000 Subject: [PATCH 03/20] Added support for BlobStorage using thanos/objstore --- .../blob_storage_thanos_object_client.go | 126 ++++++++++++++++++ .../blob_storage_thanos_object_client_test.go | 122 +++++++++++++++++ pkg/storage/factory.go | 4 + 3 files changed, 252 insertions(+) create mode 100644 pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go create mode 100644 pkg/storage/chunk/client/azure/blob_storage_thanos_object_client_test.go diff --git a/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go b/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go new file mode 100644 index 000000000000..f54502e6c43e --- /dev/null +++ b/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go @@ -0,0 +1,126 @@ +package azure + +import ( + "context" + "io" + "strings" + + "github.com/go-kit/log" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/thanos-io/objstore" + + "github.com/grafana/loki/pkg/storage/bucket" + "github.com/grafana/loki/pkg/storage/chunk/client" + "github.com/grafana/loki/pkg/storage/chunk/client/hedging" +) + +type BlobStorageThanosObjectClient struct { + client objstore.Bucket + hedgedClient objstore.Bucket +} + +// NewBlobStorageObjectClient makes a new BlobStorage-backed ObjectClient. +func NewBlobStorageThanosObjectClient(ctx context.Context, cfg bucket.Config, component string, logger log.Logger, hedgingCfg hedging.Config, reg prometheus.Registerer) (*BlobStorageThanosObjectClient, error) { + client, err := newBlobStorageThanosObjClient(ctx, cfg, component, logger, false, hedgingCfg, prometheus.WrapRegistererWith(prometheus.Labels{"hedging": "false"}, reg)) + if err != nil { + return nil, err + } + hedgedClient, err := newBlobStorageThanosObjClient(ctx, cfg, component, logger, true, hedgingCfg, prometheus.WrapRegistererWith(prometheus.Labels{"hedging": "true"}, reg)) + if err != nil { + return nil, err + } + return &BlobStorageThanosObjectClient{ + client: client, + hedgedClient: hedgedClient, + }, nil +} + +func newBlobStorageThanosObjClient(ctx context.Context, cfg bucket.Config, component string, logger log.Logger, hedging bool, hedgingCfg hedging.Config, reg prometheus.Registerer) (objstore.Bucket, error) { + if hedging { + hedgedTrasport, err := hedgingCfg.RoundTripperWithRegisterer(nil, reg) + if err != nil { + return nil, err + } + + cfg.Azure.HTTP.Transport = hedgedTrasport + } + + return bucket.NewClient(ctx, cfg, component, logger, reg) +} + +// Stop fulfills the chunk.ObjectClient interface +func (s *BlobStorageThanosObjectClient) Stop() {} + +// ObjectExists checks if a given objectKey exists in the AWS bucket +func (s *BlobStorageThanosObjectClient) ObjectExists(ctx context.Context, objectKey string) (bool, error) { + return s.hedgedClient.Exists(ctx, objectKey) +} + +// PutObject into the store +func (s *BlobStorageThanosObjectClient) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error { + return s.client.Upload(ctx, objectKey, object) +} + +// DeleteObject deletes the specified objectKey from the appropriate BlobStorage bucket +func (s *BlobStorageThanosObjectClient) DeleteObject(ctx context.Context, objectKey string) error { + return s.client.Delete(ctx, objectKey) +} + +// GetObject returns a reader and the size for the specified object key from the configured BlobStorage bucket. +func (s *BlobStorageThanosObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, int64, error) { + reader, err := s.hedgedClient.Get(ctx, objectKey) + if err != nil { + return nil, 0, err + } + + attr, err := s.hedgedClient.Attributes(ctx, objectKey) + if err != nil { + return nil, 0, errors.Wrapf(err, "failed to get attributes for %s", objectKey) + } + + return reader, attr.Size, err +} + +// List implements chunk.ObjectClient. +func (s *BlobStorageThanosObjectClient) List(ctx context.Context, prefix, delimiter string) ([]client.StorageObject, []client.StorageCommonPrefix, error) { + var storageObjects []client.StorageObject + var commonPrefixes []client.StorageCommonPrefix + var iterParams []objstore.IterOption + + // If delimiter is empty we want to list all files + if delimiter == "" { + iterParams = append(iterParams, objstore.WithRecursiveIter) + } + + s.client.Iter(ctx, prefix, func(objectKey string) error { + // CommonPrefixes are keys that have the prefix and have the delimiter + // as a suffix + if delimiter != "" && strings.HasSuffix(objectKey, delimiter) { + commonPrefixes = append(commonPrefixes, client.StorageCommonPrefix(objectKey)) + return nil + } + attr, err := s.client.Attributes(ctx, objectKey) + if err != nil { + return errors.Wrapf(err, "failed to get attributes for %s", objectKey) + } + + storageObjects = append(storageObjects, client.StorageObject{ + Key: objectKey, + ModifiedAt: attr.LastModified, + }) + + return nil + + }, iterParams...) + + return storageObjects, commonPrefixes, nil +} + +// IsObjectNotFoundErr returns true if error means that object is not found. Relevant to GetObject and DeleteObject operations. +func (s *BlobStorageThanosObjectClient) IsObjectNotFoundErr(err error) bool { + return s.client.IsObjNotFoundErr(err) +} + +// TODO(dannyk): implement for client +func (s *BlobStorageThanosObjectClient) IsRetryableErr(error) bool { return false } diff --git a/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client_test.go b/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client_test.go new file mode 100644 index 000000000000..69e5321c7f1b --- /dev/null +++ b/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client_test.go @@ -0,0 +1,122 @@ +package azure + +import ( + "bytes" + "context" + "sort" + "testing" + + "github.com/grafana/loki/pkg/storage/bucket/filesystem" + "github.com/grafana/loki/pkg/storage/chunk/client" + "github.com/stretchr/testify/require" +) + +func TestAzureThanosObjStore_List(t *testing.T) { + tests := []struct { + name string + prefix string + delimiter string + storageObjKeys []string + storageCommonPref []client.StorageCommonPrefix + wantErr error + }{ + { + "list_top_level_only", + "", + "/", + []string{"top-level-file-1", "top-level-file-2"}, + []client.StorageCommonPrefix{"dir-1/", "dir-2/", "depply/"}, + nil, + }, + { + "list_all_dir_1", + "dir-1", + "", + []string{"dir-1/file-1", "dir-1/file-2"}, + nil, + nil, + }, + { + "list_recursive", + "", + "", + []string{ + "top-level-file-1", + "top-level-file-2", + "dir-1/file-1", + "dir-1/file-2", + "dir-2/file-3", + "dir-2/file-4", + "dir-2/file-5", + "depply/nested/folder/a", + "depply/nested/folder/b", + "depply/nested/folder/c", + }, + nil, + nil, + }, + { + "unknown_prefix", + "test", + "", + []string{}, + nil, + nil, + }, + { + "only_storage_common_prefix", + "depply/", + "/", + []string{}, + []client.StorageCommonPrefix{ + "depply/nested/", + }, + nil, + }, + } + + for _, tt := range tests { + config := filesystem.Config{ + Directory: t.TempDir(), + } + newBucket, err := filesystem.NewBucketClient(config) + require.NoError(t, err) + + buff := bytes.NewBufferString("foo") + require.NoError(t, newBucket.Upload(context.Background(), "top-level-file-1", buff)) + require.NoError(t, newBucket.Upload(context.Background(), "top-level-file-2", buff)) + require.NoError(t, newBucket.Upload(context.Background(), "dir-1/file-1", buff)) + require.NoError(t, newBucket.Upload(context.Background(), "dir-1/file-2", buff)) + require.NoError(t, newBucket.Upload(context.Background(), "dir-2/file-3", buff)) + require.NoError(t, newBucket.Upload(context.Background(), "dir-2/file-4", buff)) + require.NoError(t, newBucket.Upload(context.Background(), "dir-2/file-5", buff)) + require.NoError(t, newBucket.Upload(context.Background(), "depply/nested/folder/a", buff)) + require.NoError(t, newBucket.Upload(context.Background(), "depply/nested/folder/b", buff)) + require.NoError(t, newBucket.Upload(context.Background(), "depply/nested/folder/c", buff)) + + gcpClient := &BlobStorageThanosObjectClient{} + gcpClient.client = newBucket + + storageObj, storageCommonPref, err := gcpClient.List(context.Background(), tt.prefix, tt.delimiter) + if tt.wantErr != nil { + require.Equal(t, tt.wantErr.Error(), err.Error()) + continue + } + + keys := []string{} + for _, key := range storageObj { + keys = append(keys, key.Key) + } + + sort.Slice(tt.storageObjKeys, func(i, j int) bool { + return tt.storageObjKeys[i] < tt.storageObjKeys[j] + }) + sort.Slice(tt.storageCommonPref, func(i, j int) bool { + return tt.storageCommonPref[i] < tt.storageCommonPref[j] + }) + + require.NoError(t, err) + require.Equal(t, tt.storageObjKeys, keys) + require.Equal(t, tt.storageCommonPref, storageCommonPref) + } +} diff --git a/pkg/storage/factory.go b/pkg/storage/factory.go index 5cefa4137bef..71c061b0a8c7 100644 --- a/pkg/storage/factory.go +++ b/pkg/storage/factory.go @@ -41,6 +41,7 @@ import ( "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper/indexgateway" "github.com/grafana/loki/pkg/util" "github.com/grafana/loki/pkg/util/constants" + util_log "github.com/grafana/loki/pkg/util/log" ) var ( @@ -732,6 +733,9 @@ func internalNewObjectClient(component, name string, cfg Config, clientMetrics C } azureCfg = (azure.BlobStorageConfig)(nsCfg) } + if cfg.ThanosObjStore { + azure.NewBlobStorageThanosObjectClient(context.Background(), cfg.ObjStoreConf, component, util_log.Logger, cfg.Hedging, reg) + } return azure.NewBlobStorage(&azureCfg, clientMetrics.AzureMetrics, cfg.Hedging) case config.StorageTypeSwift: From 42f37ba5e1efa4ecfa2d09ad5e4e19a4a95d5a4e Mon Sep 17 00:00:00 2001 From: Joao Marcal Date: Mon, 27 Nov 2023 11:59:06 +0000 Subject: [PATCH 04/20] Azure CLI review --- pkg/storage/bucket/azure/bucket_client.go | 6 +++++- pkg/storage/bucket/azure/config.go | 19 ++++++++++++------- pkg/storage/bucket/azure/config_test.go | 12 ++++++------ 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/pkg/storage/bucket/azure/bucket_client.go b/pkg/storage/bucket/azure/bucket_client.go index e6df3721a3de..53112302de76 100644 --- a/pkg/storage/bucket/azure/bucket_client.go +++ b/pkg/storage/bucket/azure/bucket_client.go @@ -12,10 +12,14 @@ func NewBucketClient(cfg Config, name string, logger log.Logger) (objstore.Bucke bucketConfig := azure.Config{ StorageAccountName: cfg.StorageAccountName, StorageAccountKey: cfg.StorageAccountKey.String(), - StorageConnectionString: cfg.ConnectionString.String(), + StorageConnectionString: cfg.StorageConnectionString.String(), ContainerName: cfg.ContainerName, Endpoint: cfg.EndpointSuffix, MaxRetries: cfg.MaxRetries, + UserAssignedID: cfg.UserAssignedID, + PipelineConfig: azure.PipelineConfig{ + MaxRetryDelay: model.Duration(cfg.MaxRetryDelay), + }, HTTPConfig: azure.HTTPConfig{ IdleConnTimeout: model.Duration(cfg.HTTP.IdleConnTimeout), ResponseHeaderTimeout: model.Duration(cfg.HTTP.ResponseHeaderTimeout), diff --git a/pkg/storage/bucket/azure/config.go b/pkg/storage/bucket/azure/config.go index 47027c86c82f..57d90f708f42 100644 --- a/pkg/storage/bucket/azure/config.go +++ b/pkg/storage/bucket/azure/config.go @@ -2,6 +2,7 @@ package azure import ( "flag" + "time" "net/http" @@ -19,12 +20,14 @@ type HTTPConfig struct { // Config holds the config options for an Azure backend type Config struct { - StorageAccountName string `yaml:"account_name"` - StorageAccountKey flagext.Secret `yaml:"account_key"` - ConnectionString flagext.Secret `yaml:"connection_string"` - ContainerName string `yaml:"container_name"` - EndpointSuffix string `yaml:"endpoint_suffix"` - MaxRetries int `yaml:"max_retries"` + StorageAccountName string `yaml:"account_name"` + StorageAccountKey flagext.Secret `yaml:"account_key"` + StorageConnectionString flagext.Secret `yaml:"connection_string"` + ContainerName string `yaml:"container_name"` + EndpointSuffix string `yaml:"endpoint_suffix"` + UserAssignedID string `yaml:"user_assigned_id"` + MaxRetries int `yaml:"max_retries"` + MaxRetryDelay time.Duration `yaml:"max_retry_delay"` HTTP HTTPConfig `yaml:"http"` } @@ -38,9 +41,11 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.StringVar(&cfg.StorageAccountName, prefix+"azure.account-name", "", "Azure storage account name") f.Var(&cfg.StorageAccountKey, prefix+"azure.account-key", "Azure storage account key") - f.Var(&cfg.ConnectionString, prefix+"azure.connection-string", "If `connection-string` is set, the values of `account-name` and `endpoint-suffix` values will not be used. Use this method over `account-key` if you need to authenticate via a SAS token. Or if you use the Azurite emulator.") + f.Var(&cfg.StorageConnectionString, prefix+"azure.connection-string", "If `connection-string` is set, the values of `account-name` and `endpoint-suffix` values will not be used. Use this method over `account-key` if you need to authenticate via a SAS token. Or if you use the Azurite emulator.") f.StringVar(&cfg.ContainerName, prefix+"azure.container-name", "loki", "Azure storage container name") f.StringVar(&cfg.EndpointSuffix, prefix+"azure.endpoint-suffix", "", "Azure storage endpoint suffix without schema. The account name will be prefixed to this value to create the FQDN") + f.StringVar(&cfg.UserAssignedID, prefix+"azure.user-assigned-id", "", "User assigned identity ID to authenticate to the Azure storage account.") f.IntVar(&cfg.MaxRetries, prefix+"azure.max-retries", 20, "Number of retries for recoverable errors") + f.DurationVar(&cfg.MaxRetryDelay, prefix+"azure.max-retry-delay", 500*time.Millisecond, "Maximum time to wait before retrying a request.") cfg.HTTP.RegisterFlagsWithPrefix(prefix+"azure.http", f) } diff --git a/pkg/storage/bucket/azure/config_test.go b/pkg/storage/bucket/azure/config_test.go index c5486ceb1506..8d9d2e87910d 100644 --- a/pkg/storage/bucket/azure/config_test.go +++ b/pkg/storage/bucket/azure/config_test.go @@ -61,12 +61,12 @@ http: max_connections_per_host: 8 `, expectedConfig: Config{ - StorageAccountName: "test-account-name", - StorageAccountKey: flagext.SecretWithValue("test-account-key"), - ConnectionString: flagext.SecretWithValue("test-connection-string"), - ContainerName: "test-container-name", - EndpointSuffix: "test-endpoint-suffix", - MaxRetries: 1, + StorageAccountName: "test-account-name", + StorageAccountKey: flagext.SecretWithValue("test-account-key"), + StorageConnectionString: flagext.SecretWithValue("test-connection-string"), + ContainerName: "test-container-name", + EndpointSuffix: "test-endpoint-suffix", + MaxRetries: 1, HTTP: HTTPConfig{ Config: http.Config{ IdleConnTimeout: 2 * time.Second, From 40aa2f948d28f56cf44eb50b59a90b8f883d2004 Mon Sep 17 00:00:00 2001 From: Joao Marcal Date: Thu, 7 Dec 2023 09:56:40 +0000 Subject: [PATCH 05/20] Fixes from testing --- pkg/storage/bucket/azure/bucket_client.go | 55 +++++++++++------------ pkg/storage/bucket/azure/config.go | 4 +- pkg/storage/bucket/azure/config_test.go | 5 ++- pkg/storage/factory.go | 1 + 4 files changed, 34 insertions(+), 31 deletions(-) diff --git a/pkg/storage/bucket/azure/bucket_client.go b/pkg/storage/bucket/azure/bucket_client.go index 53112302de76..61da23493f23 100644 --- a/pkg/storage/bucket/azure/bucket_client.go +++ b/pkg/storage/bucket/azure/bucket_client.go @@ -5,39 +5,38 @@ import ( "github.com/prometheus/common/model" "github.com/thanos-io/objstore" "github.com/thanos-io/objstore/providers/azure" - yaml "gopkg.in/yaml.v2" ) func NewBucketClient(cfg Config, name string, logger log.Logger) (objstore.Bucket, error) { - bucketConfig := azure.Config{ - StorageAccountName: cfg.StorageAccountName, - StorageAccountKey: cfg.StorageAccountKey.String(), - StorageConnectionString: cfg.StorageConnectionString.String(), - ContainerName: cfg.ContainerName, - Endpoint: cfg.EndpointSuffix, - MaxRetries: cfg.MaxRetries, - UserAssignedID: cfg.UserAssignedID, - PipelineConfig: azure.PipelineConfig{ - MaxRetryDelay: model.Duration(cfg.MaxRetryDelay), - }, - HTTPConfig: azure.HTTPConfig{ - IdleConnTimeout: model.Duration(cfg.HTTP.IdleConnTimeout), - ResponseHeaderTimeout: model.Duration(cfg.HTTP.ResponseHeaderTimeout), - InsecureSkipVerify: cfg.HTTP.InsecureSkipVerify, - TLSHandshakeTimeout: model.Duration(cfg.HTTP.TLSHandshakeTimeout), - ExpectContinueTimeout: model.Duration(cfg.HTTP.ExpectContinueTimeout), - MaxIdleConns: cfg.HTTP.MaxIdleConns, - MaxIdleConnsPerHost: cfg.HTTP.MaxIdleConnsPerHost, - MaxConnsPerHost: cfg.HTTP.MaxConnsPerHost, - }, + return newBucketClient(cfg, name, logger, azure.NewBucketWithConfig) +} + +func newBucketClient(cfg Config, name string, logger log.Logger, factory func(log.Logger, azure.Config, string) (*azure.Bucket, error)) (objstore.Bucket, error) { + // Start with default config to make sure that all parameters are set to sensible values, especially + // HTTP Config field. + bucketConfig := azure.DefaultConfig + bucketConfig.StorageAccountName = cfg.StorageAccountName + bucketConfig.StorageAccountKey = cfg.StorageAccountKey.String() + bucketConfig.ContainerName = cfg.ContainerName + bucketConfig.MaxRetries = cfg.MaxRetries + bucketConfig.UserAssignedID = cfg.UserAssignedID + bucketConfig.PipelineConfig.MaxRetryDelay = model.Duration(cfg.MaxRetryDelay) + + bucketConfig.HTTPConfig = azure.HTTPConfig{ + IdleConnTimeout: model.Duration(cfg.HTTP.IdleConnTimeout), + ResponseHeaderTimeout: model.Duration(cfg.HTTP.ResponseHeaderTimeout), + InsecureSkipVerify: cfg.HTTP.InsecureSkipVerify, + TLSHandshakeTimeout: model.Duration(cfg.HTTP.TLSHandshakeTimeout), + ExpectContinueTimeout: model.Duration(cfg.HTTP.ExpectContinueTimeout), + MaxIdleConns: cfg.HTTP.MaxIdleConns, + MaxIdleConnsPerHost: cfg.HTTP.MaxIdleConnsPerHost, + MaxConnsPerHost: cfg.HTTP.MaxConnsPerHost, } - // Thanos currently doesn't support passing the config as is, but expects a YAML, - // so we're going to serialize it. - serialized, err := yaml.Marshal(bucketConfig) - if err != nil { - return nil, err + if cfg.Endpoint != "" { + // azure.DefaultConfig has the default Endpoint, overwrite it only if a different one was explicitly provided. + bucketConfig.Endpoint = cfg.Endpoint } - return azure.NewBucket(logger, serialized, name) + return factory(logger, bucketConfig, name) } diff --git a/pkg/storage/bucket/azure/config.go b/pkg/storage/bucket/azure/config.go index 57d90f708f42..dc107d404c49 100644 --- a/pkg/storage/bucket/azure/config.go +++ b/pkg/storage/bucket/azure/config.go @@ -24,7 +24,7 @@ type Config struct { StorageAccountKey flagext.Secret `yaml:"account_key"` StorageConnectionString flagext.Secret `yaml:"connection_string"` ContainerName string `yaml:"container_name"` - EndpointSuffix string `yaml:"endpoint_suffix"` + Endpoint string `yaml:"endpoint_suffix"` UserAssignedID string `yaml:"user_assigned_id"` MaxRetries int `yaml:"max_retries"` MaxRetryDelay time.Duration `yaml:"max_retry_delay"` @@ -43,7 +43,7 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.Var(&cfg.StorageAccountKey, prefix+"azure.account-key", "Azure storage account key") f.Var(&cfg.StorageConnectionString, prefix+"azure.connection-string", "If `connection-string` is set, the values of `account-name` and `endpoint-suffix` values will not be used. Use this method over `account-key` if you need to authenticate via a SAS token. Or if you use the Azurite emulator.") f.StringVar(&cfg.ContainerName, prefix+"azure.container-name", "loki", "Azure storage container name") - f.StringVar(&cfg.EndpointSuffix, prefix+"azure.endpoint-suffix", "", "Azure storage endpoint suffix without schema. The account name will be prefixed to this value to create the FQDN") + f.StringVar(&cfg.Endpoint, prefix+"azure.endpoint-suffix", "", "Azure storage endpoint suffix without schema. The account name will be prefixed to this value to create the FQDN") f.StringVar(&cfg.UserAssignedID, prefix+"azure.user-assigned-id", "", "User assigned identity ID to authenticate to the Azure storage account.") f.IntVar(&cfg.MaxRetries, prefix+"azure.max-retries", 20, "Number of retries for recoverable errors") f.DurationVar(&cfg.MaxRetryDelay, prefix+"azure.max-retry-delay", 500*time.Millisecond, "Maximum time to wait before retrying a request.") diff --git a/pkg/storage/bucket/azure/config_test.go b/pkg/storage/bucket/azure/config_test.go index 8d9d2e87910d..d1636d636d56 100644 --- a/pkg/storage/bucket/azure/config_test.go +++ b/pkg/storage/bucket/azure/config_test.go @@ -15,6 +15,7 @@ import ( var defaultConfig = Config{ ContainerName: "loki", MaxRetries: 20, + MaxRetryDelay: 500000000, HTTP: HTTPConfig{ Config: http.Config{ IdleConnTimeout: 90 * time.Second, @@ -50,6 +51,7 @@ connection_string: test-connection-string container_name: test-container-name endpoint_suffix: test-endpoint-suffix max_retries: 1 +max_retry_delay: 500000000 http: idle_conn_timeout: 2s response_header_timeout: 3s @@ -65,8 +67,9 @@ http: StorageAccountKey: flagext.SecretWithValue("test-account-key"), StorageConnectionString: flagext.SecretWithValue("test-connection-string"), ContainerName: "test-container-name", - EndpointSuffix: "test-endpoint-suffix", + Endpoint: "test-endpoint-suffix", MaxRetries: 1, + MaxRetryDelay: 500000000, HTTP: HTTPConfig{ Config: http.Config{ IdleConnTimeout: 2 * time.Second, diff --git a/pkg/storage/factory.go b/pkg/storage/factory.go index 71c061b0a8c7..0002593ba0f6 100644 --- a/pkg/storage/factory.go +++ b/pkg/storage/factory.go @@ -734,6 +734,7 @@ func internalNewObjectClient(component, name string, cfg Config, clientMetrics C azureCfg = (azure.BlobStorageConfig)(nsCfg) } if cfg.ThanosObjStore { + clientMetrics.Unregister() azure.NewBlobStorageThanosObjectClient(context.Background(), cfg.ObjStoreConf, component, util_log.Logger, cfg.Hedging, reg) } return azure.NewBlobStorage(&azureCfg, clientMetrics.AzureMetrics, cfg.Hedging) From 679c2a88bea1bea70aa2457a7c5dd537ae41d4cd Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Mon, 21 Oct 2024 17:04:16 +0530 Subject: [PATCH 06/20] clean-up deleted files from main --- pkg/bloomcompactor/tsdb.go | 261 ------------------------------------- 1 file changed, 261 deletions(-) delete mode 100644 pkg/bloomcompactor/tsdb.go diff --git a/pkg/bloomcompactor/tsdb.go b/pkg/bloomcompactor/tsdb.go deleted file mode 100644 index 4c32e27d86da..000000000000 --- a/pkg/bloomcompactor/tsdb.go +++ /dev/null @@ -1,261 +0,0 @@ -package bloomcompactor - -import ( - "context" - "fmt" - "io" - "math" - "path" - "strings" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/pkg/errors" - "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/model/labels" - - "github.com/grafana/loki/v3/pkg/chunkenc" - baseStore "github.com/grafana/loki/v3/pkg/storage" - v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" - "github.com/grafana/loki/v3/pkg/storage/config" - "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/storage" - "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/tsdb" - "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/tsdb/index" - "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/tsdb/sharding" - "github.com/grafana/loki/v3/pkg/storage/types" -) - -const ( - gzipExtension = ".gz" -) - -type TSDBStore interface { - UsersForPeriod(ctx context.Context, table config.DayTable) ([]string, error) - ResolveTSDBs(ctx context.Context, table config.DayTable, tenant string) ([]tsdb.SingleTenantTSDBIdentifier, error) - LoadTSDB( - ctx context.Context, - table config.DayTable, - tenant string, - id tsdb.Identifier, - bounds v1.FingerprintBounds, - ) (v1.Iterator[*v1.Series], error) -} - -// BloomTSDBStore is a wrapper around the storage.Client interface which -// implements the TSDBStore interface for this pkg. -type BloomTSDBStore struct { - storage storage.Client - logger log.Logger -} - -func NewBloomTSDBStore(storage storage.Client, logger log.Logger) *BloomTSDBStore { - return &BloomTSDBStore{ - storage: storage, - logger: logger, - } -} - -func (b *BloomTSDBStore) UsersForPeriod(ctx context.Context, table config.DayTable) ([]string, error) { - _, users, err := b.storage.ListFiles(ctx, table.Addr(), true) // bypass cache for ease of testing - return users, err -} - -func (b *BloomTSDBStore) ResolveTSDBs(ctx context.Context, table config.DayTable, tenant string) ([]tsdb.SingleTenantTSDBIdentifier, error) { - indices, err := b.storage.ListUserFiles(ctx, table.Addr(), tenant, true) // bypass cache for ease of testing - if err != nil { - return nil, errors.Wrap(err, "failed to list user files") - } - - ids := make([]tsdb.SingleTenantTSDBIdentifier, 0, len(indices)) - for _, index := range indices { - key := index.Name - if decompress := storage.IsCompressedFile(index.Name); decompress { - key = strings.TrimSuffix(key, gzipExtension) - } - - id, ok := tsdb.ParseSingleTenantTSDBPath(path.Base(key)) - if !ok { - return nil, errors.Errorf("failed to parse single tenant tsdb path: %s", key) - } - - ids = append(ids, id) - - } - return ids, nil -} - -func (b *BloomTSDBStore) LoadTSDB( - ctx context.Context, - table config.DayTable, - tenant string, - id tsdb.Identifier, - bounds v1.FingerprintBounds, -) (v1.Iterator[*v1.Series], error) { - withCompression := id.Name() + gzipExtension - - data, err := b.storage.GetUserFile(ctx, table.Addr(), tenant, withCompression) - if err != nil { - return nil, errors.Wrap(err, "failed to get file") - } - defer data.Close() - - decompressorPool := chunkenc.GetReaderPool(chunkenc.EncGZIP) - decompressor, err := decompressorPool.GetReader(data) - if err != nil { - return nil, errors.Wrap(err, "failed to get decompressor") - } - defer decompressorPool.PutReader(decompressor) - - buf, err := io.ReadAll(decompressor) - if err != nil { - return nil, errors.Wrap(err, "failed to read file") - } - - reader, err := index.NewReader(index.RealByteSlice(buf)) - if err != nil { - return nil, errors.Wrap(err, "failed to create index reader") - } - - idx := tsdb.NewTSDBIndex(reader) - defer func() { - if err := idx.Close(); err != nil { - level.Error(b.logger).Log("msg", "failed to close index", "err", err) - } - }() - - return NewTSDBSeriesIter(ctx, tenant, idx, bounds) -} - -func NewTSDBSeriesIter(ctx context.Context, user string, f sharding.ForSeries, bounds v1.FingerprintBounds) (v1.Iterator[*v1.Series], error) { - // TODO(salvacorts): Create a pool - series := make([]*v1.Series, 0, 100) - - if err := f.ForSeries( - ctx, - user, - bounds, - 0, math.MaxInt64, - func(_ labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) (stop bool) { - select { - case <-ctx.Done(): - return true - default: - res := &v1.Series{ - Fingerprint: fp, - Chunks: make(v1.ChunkRefs, 0, len(chks)), - } - for _, chk := range chks { - res.Chunks = append(res.Chunks, v1.ChunkRef{ - From: model.Time(chk.MinTime), - Through: model.Time(chk.MaxTime), - Checksum: chk.Checksum, - }) - } - - series = append(series, res) - return false - } - }, - labels.MustNewMatcher(labels.MatchEqual, "", ""), - ); err != nil { - return nil, err - } - - select { - case <-ctx.Done(): - return v1.NewEmptyIter[*v1.Series](), ctx.Err() - default: - return v1.NewCancelableIter[*v1.Series](ctx, v1.NewSliceIter[*v1.Series](series)), nil - } -} - -type TSDBStores struct { - schemaCfg config.SchemaConfig - stores []TSDBStore -} - -func NewTSDBStores( - schemaCfg config.SchemaConfig, - storeCfg baseStore.Config, - clientMetrics baseStore.ClientMetrics, - logger log.Logger, -) (*TSDBStores, error) { - res := &TSDBStores{ - schemaCfg: schemaCfg, - stores: make([]TSDBStore, len(schemaCfg.Configs)), - } - - for i, cfg := range schemaCfg.Configs { - if cfg.IndexType == types.TSDBType { - - c, err := baseStore.NewObjectClient(cfg.ObjectType, "bloom-compactor", storeCfg) - if err != nil { - return nil, errors.Wrap(err, "failed to create object client") - } - res.stores[i] = NewBloomTSDBStore(storage.NewIndexStorageClient(c, cfg.IndexTables.PathPrefix), logger) - } - } - - return res, nil -} - -func (s *TSDBStores) storeForPeriod(table config.DayTime) (TSDBStore, error) { - for i := len(s.schemaCfg.Configs) - 1; i >= 0; i-- { - period := s.schemaCfg.Configs[i] - - if !table.Before(period.From) { - // we have the desired period config - - if s.stores[i] != nil { - // valid: it's of tsdb type - return s.stores[i], nil - } - - // invalid - return nil, errors.Errorf( - "store for period is not of TSDB type (%s) while looking up store for (%v)", - period.IndexType, - table, - ) - } - - } - - return nil, fmt.Errorf( - "there is no store matching no matching period found for table (%v) -- too early", - table, - ) -} - -func (s *TSDBStores) UsersForPeriod(ctx context.Context, table config.DayTable) ([]string, error) { - store, err := s.storeForPeriod(table.DayTime) - if err != nil { - return nil, err - } - - return store.UsersForPeriod(ctx, table) -} - -func (s *TSDBStores) ResolveTSDBs(ctx context.Context, table config.DayTable, tenant string) ([]tsdb.SingleTenantTSDBIdentifier, error) { - store, err := s.storeForPeriod(table.DayTime) - if err != nil { - return nil, err - } - - return store.ResolveTSDBs(ctx, table, tenant) -} - -func (s *TSDBStores) LoadTSDB( - ctx context.Context, - table config.DayTable, - tenant string, - id tsdb.Identifier, - bounds v1.FingerprintBounds, -) (v1.Iterator[*v1.Series], error) { - store, err := s.storeForPeriod(table.DayTime) - if err != nil { - return nil, err - } - - return store.LoadTSDB(ctx, table, tenant, id, bounds) -} From 06f22808168d471b92764e7aa3ef9c35d30f3c23 Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Mon, 21 Oct 2024 18:25:07 +0530 Subject: [PATCH 07/20] config parity --- pkg/storage/bucket/azure/bucket_client.go | 21 ++-- pkg/storage/bucket/azure/config.go | 30 ++--- pkg/storage/bucket/azure/config_test.go | 105 ------------------ .../blob_storage_thanos_object_client.go | 14 ++- pkg/storage/store.go | 1 + 5 files changed, 29 insertions(+), 142 deletions(-) delete mode 100644 pkg/storage/bucket/azure/config_test.go diff --git a/pkg/storage/bucket/azure/bucket_client.go b/pkg/storage/bucket/azure/bucket_client.go index 1207b71ffd1a..0cd5e6b3bacf 100644 --- a/pkg/storage/bucket/azure/bucket_client.go +++ b/pkg/storage/bucket/azure/bucket_client.go @@ -4,7 +4,6 @@ import ( "net/http" "github.com/go-kit/log" - "github.com/prometheus/common/model" "github.com/thanos-io/objstore" "github.com/thanos-io/objstore/providers/azure" ) @@ -19,26 +18,20 @@ func newBucketClient(cfg Config, name string, logger log.Logger, factory func(lo bucketConfig := azure.DefaultConfig bucketConfig.StorageAccountName = cfg.StorageAccountName bucketConfig.StorageAccountKey = cfg.StorageAccountKey.String() + bucketConfig.StorageConnectionString = cfg.StorageConnectionString.String() bucketConfig.ContainerName = cfg.ContainerName bucketConfig.MaxRetries = cfg.MaxRetries bucketConfig.UserAssignedID = cfg.UserAssignedID - bucketConfig.PipelineConfig.MaxRetryDelay = model.Duration(cfg.MaxRetryDelay) - - bucketConfig.HTTPConfig = azure.HTTPConfig{ - IdleConnTimeout: model.Duration(cfg.HTTP.IdleConnTimeout), - ResponseHeaderTimeout: model.Duration(cfg.HTTP.ResponseHeaderTimeout), - InsecureSkipVerify: cfg.HTTP.InsecureSkipVerify, - TLSHandshakeTimeout: model.Duration(cfg.HTTP.TLSHandshakeTimeout), - ExpectContinueTimeout: model.Duration(cfg.HTTP.ExpectContinueTimeout), - MaxIdleConns: cfg.HTTP.MaxIdleConns, - MaxIdleConnsPerHost: cfg.HTTP.MaxIdleConnsPerHost, - MaxConnsPerHost: cfg.HTTP.MaxConnsPerHost, - } if cfg.Endpoint != "" { // azure.DefaultConfig has the default Endpoint, overwrite it only if a different one was explicitly provided. bucketConfig.Endpoint = cfg.Endpoint } - return factory(logger, bucketConfig, name, nil) + var rt http.RoundTripper + if cfg.Transport != nil { + rt = cfg.Transport + } + + return factory(logger, bucketConfig, name, rt) } diff --git a/pkg/storage/bucket/azure/config.go b/pkg/storage/bucket/azure/config.go index 9edd41f9c3eb..ac8037b6b781 100644 --- a/pkg/storage/bucket/azure/config.go +++ b/pkg/storage/bucket/azure/config.go @@ -2,23 +2,11 @@ package azure import ( "flag" - "time" - "net/http" "github.com/grafana/dskit/flagext" - - bucket_http "github.com/grafana/loki/v3/pkg/storage/bucket/http" ) -// HTTPConfig stores the http.Transport configuration for the blob storage client. -type HTTPConfig struct { - bucket_http.Config `yaml:",inline"` - - // Allow upstream callers to inject a round tripper - Transport http.RoundTripper `yaml:"-"` -} - // Config holds the config options for an Azure backend type Config struct { StorageAccountName string `yaml:"account_name"` @@ -26,11 +14,11 @@ type Config struct { StorageConnectionString flagext.Secret `yaml:"connection_string"` ContainerName string `yaml:"container_name"` Endpoint string `yaml:"endpoint_suffix"` - UserAssignedID string `yaml:"user_assigned_id"` MaxRetries int `yaml:"max_retries"` - MaxRetryDelay time.Duration `yaml:"max_retry_delay"` + UserAssignedID string `yaml:"user_assigned_id"` - HTTP HTTPConfig `yaml:"http"` + // Allow upstream callers to inject a round tripper + Transport http.RoundTripper `yaml:"-"` } // RegisterFlags registers the flags for Azure storage @@ -41,12 +29,10 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { // RegisterFlagsWithPrefix registers the flags for Azure storage func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.StringVar(&cfg.StorageAccountName, prefix+"azure.account-name", "", "Azure storage account name") - f.Var(&cfg.StorageAccountKey, prefix+"azure.account-key", "Azure storage account key") - f.Var(&cfg.StorageConnectionString, prefix+"azure.connection-string", "If `connection-string` is set, the values of `account-name` and `endpoint-suffix` values will not be used. Use this method over `account-key` if you need to authenticate via a SAS token. Or if you use the Azurite emulator.") - f.StringVar(&cfg.ContainerName, prefix+"azure.container-name", "loki", "Azure storage container name") - f.StringVar(&cfg.Endpoint, prefix+"azure.endpoint-suffix", "", "Azure storage endpoint suffix without schema. The account name will be prefixed to this value to create the FQDN") - f.StringVar(&cfg.UserAssignedID, prefix+"azure.user-assigned-id", "", "User assigned identity ID to authenticate to the Azure storage account.") + f.Var(&cfg.StorageAccountKey, prefix+"azure.account-key", "Azure storage account key. If unset, Azure managed identities will be used for authentication instead.") + f.Var(&cfg.StorageConnectionString, prefix+"azure.connection-string", "If `connection-string` is set, the value of `endpoint-suffix` will not be used. Use this method over `account-key` if you need to authenticate via a SAS token. Or if you use the Azurite emulator.") + f.StringVar(&cfg.ContainerName, prefix+"azure.container-name", "", "Azure storage container name") + f.StringVar(&cfg.Endpoint, prefix+"azure.endpoint-suffix", "", "Azure storage endpoint suffix without schema. The account name will be prefixed to this value to create the FQDN. If set to empty string, default endpoint suffix is used.") f.IntVar(&cfg.MaxRetries, prefix+"azure.max-retries", 20, "Number of retries for recoverable errors") - f.DurationVar(&cfg.MaxRetryDelay, prefix+"azure.max-retry-delay", 500*time.Millisecond, "Maximum time to wait before retrying a request.") - cfg.HTTP.RegisterFlagsWithPrefix(prefix+"azure.http", f) + f.StringVar(&cfg.UserAssignedID, prefix+"azure.user-assigned-id", "", "User assigned managed identity. If empty, then System assigned identity is used.") } diff --git a/pkg/storage/bucket/azure/config_test.go b/pkg/storage/bucket/azure/config_test.go deleted file mode 100644 index fc91f2b892f5..000000000000 --- a/pkg/storage/bucket/azure/config_test.go +++ /dev/null @@ -1,105 +0,0 @@ -package azure - -import ( - "testing" - "time" - - "github.com/grafana/dskit/flagext" - "github.com/stretchr/testify/require" - yaml "gopkg.in/yaml.v2" - - "github.com/grafana/loki/v3/pkg/storage/bucket/http" -) - -// defaultConfig should match the default flag values defined in RegisterFlagsWithPrefix. -var defaultConfig = Config{ - ContainerName: "loki", - MaxRetries: 20, - MaxRetryDelay: 500000000, - HTTP: HTTPConfig{ - Config: http.Config{ - IdleConnTimeout: 90 * time.Second, - ResponseHeaderTimeout: 2 * time.Minute, - InsecureSkipVerify: false, - TLSHandshakeTimeout: 10 * time.Second, - ExpectContinueTimeout: 1 * time.Second, - MaxIdleConns: 100, - MaxIdleConnsPerHost: 100, - MaxConnsPerHost: 0, - }, - }, -} - -func TestConfig(t *testing.T) { - t.Parallel() - - tests := map[string]struct { - config string - expectedConfig Config - expectedErr error - }{ - "default config": { - config: "", - expectedConfig: defaultConfig, - expectedErr: nil, - }, - "custom config": { - config: ` -account_name: test-account-name -account_key: test-account-key -connection_string: test-connection-string -container_name: test-container-name -endpoint_suffix: test-endpoint-suffix -max_retries: 1 -max_retry_delay: 500000000 -http: - idle_conn_timeout: 2s - response_header_timeout: 3s - insecure_skip_verify: true - tls_handshake_timeout: 4s - expect_continue_timeout: 5s - max_idle_connections: 6 - max_idle_connections_per_host: 7 - max_connections_per_host: 8 -`, - expectedConfig: Config{ - StorageAccountName: "test-account-name", - StorageAccountKey: flagext.SecretWithValue("test-account-key"), - StorageConnectionString: flagext.SecretWithValue("test-connection-string"), - ContainerName: "test-container-name", - Endpoint: "test-endpoint-suffix", - MaxRetries: 1, - MaxRetryDelay: 500000000, - HTTP: HTTPConfig{ - Config: http.Config{ - IdleConnTimeout: 2 * time.Second, - ResponseHeaderTimeout: 3 * time.Second, - InsecureSkipVerify: true, - TLSHandshakeTimeout: 4 * time.Second, - ExpectContinueTimeout: 5 * time.Second, - MaxIdleConns: 6, - MaxIdleConnsPerHost: 7, - MaxConnsPerHost: 8, - }, - }, - }, - expectedErr: nil, - }, - "invalid type": { - config: `max_retries: foo`, - expectedConfig: defaultConfig, - expectedErr: &yaml.TypeError{Errors: []string{"line 1: cannot unmarshal !!str `foo` into int"}}, - }, - } - - for testName, testData := range tests { - t.Run(testName, func(t *testing.T) { - cfg := Config{} - flagext.DefaultValues(&cfg) - - err := yaml.Unmarshal([]byte(testData.config), &cfg) - require.Equal(t, testData.expectedErr, err) - require.Equal(t, testData.expectedConfig, cfg) - }) - } -} diff --git a/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go b/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go index f5dc9738c482..dc1c837cc05d 100644 --- a/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go +++ b/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go @@ -43,7 +43,7 @@ func newBlobStorageThanosObjClient(ctx context.Context, cfg bucket.Config, compo return nil, err } - cfg.Azure.HTTP.Transport = hedgedTrasport + cfg.Azure.Transport = hedgedTrasport } return bucket.NewClient(ctx, bucket.Azure, cfg, component, logger) @@ -120,6 +120,18 @@ func (s *BlobStorageThanosObjectClient) List(ctx context.Context, prefix, delimi return storageObjects, commonPrefixes, nil } +// GetAttributes returns the attributes of the specified object key from the configured GCS bucket. +func (s *BlobStorageThanosObjectClient) GetAttributes(ctx context.Context, objectKey string) (client.ObjectAttributes, error) { + attr := client.ObjectAttributes{} + thanosAttr, err := s.hedgedClient.Attributes(ctx, objectKey) + if err != nil { + return attr, err + } + + attr.Size = thanosAttr.Size + return attr, nil +} + // IsObjectNotFoundErr returns true if error means that object is not found. Relevant to GetObject and DeleteObject operations. func (s *BlobStorageThanosObjectClient) IsObjectNotFoundErr(err error) bool { return s.client.IsObjNotFoundErr(err) diff --git a/pkg/storage/store.go b/pkg/storage/store.go index 84c0da6d1e57..a8e6a1add323 100644 --- a/pkg/storage/store.go +++ b/pkg/storage/store.go @@ -228,6 +228,7 @@ func (s *LokiStore) chunkClientForPeriod(p config.PeriodConfig) (client.Client, if objectStoreType == "" { objectStoreType = p.IndexType } + var cc congestion.Controller ccCfg := s.cfg.CongestionControl From 8d92fbdd21699df8f9bd566f34195726b3df7007 Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Mon, 21 Oct 2024 18:29:09 +0530 Subject: [PATCH 08/20] add missing methods to azure thanos adapter --- .../chunk/client/azure/blob_storage_thanos_object_client.go | 6 +++++- pkg/storage/factory.go | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go b/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go index dc1c837cc05d..a3c05a31756a 100644 --- a/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go +++ b/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go @@ -58,7 +58,7 @@ func (s *BlobStorageThanosObjectClient) ObjectExists(ctx context.Context, object } // PutObject into the store -func (s *BlobStorageThanosObjectClient) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error { +func (s *BlobStorageThanosObjectClient) PutObject(ctx context.Context, objectKey string, object io.Reader) error { return s.client.Upload(ctx, objectKey, object) } @@ -82,6 +82,10 @@ func (s *BlobStorageThanosObjectClient) GetObject(ctx context.Context, objectKey return reader, attr.Size, err } +func (s *BlobStorageThanosObjectClient) GetObjectRange(ctx context.Context, objectKey string, offset, length int64) (io.ReadCloser, error) { + return s.hedgedClient.GetRange(ctx, objectKey, offset, length) +} + // List implements chunk.ObjectClient. func (s *BlobStorageThanosObjectClient) List(ctx context.Context, prefix, delimiter string) ([]client.StorageObject, []client.StorageCommonPrefix, error) { var storageObjects []client.StorageObject diff --git a/pkg/storage/factory.go b/pkg/storage/factory.go index f0d5d172a875..9e3f6cdb8410 100644 --- a/pkg/storage/factory.go +++ b/pkg/storage/factory.go @@ -695,7 +695,7 @@ func internalNewObjectClient(storeName, component string, cfg Config, clientMetr } if cfg.UseThanosObjstore { clientMetrics.Unregister() - azure.NewBlobStorageThanosObjectClient(context.Background(), cfg.ObjectStore, component, util_log.Logger, cfg.Hedging) + return azure.NewBlobStorageThanosObjectClient(context.Background(), cfg.ObjectStore, component, util_log.Logger, cfg.Hedging) } return azure.NewBlobStorage(&azureCfg, clientMetrics.AzureMetrics, cfg.Hedging) From 76c41fecaabf8ab580d6156d7221d4b93a904fff Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Tue, 22 Oct 2024 11:47:29 +0530 Subject: [PATCH 09/20] use objectclient adapter --- pkg/storage/bucket/object_client_adapter.go | 142 +++++++++++++ .../object_client_adapter_test.go} | 13 +- .../blob_storage_thanos_object_client.go | 123 +----------- .../blob_storage_thanos_object_client_test.go | 123 ------------ .../chunk/client/gcp/gcs_object_client.go | 11 +- .../client/gcp/gcs_object_client_test.go | 6 +- .../client/gcp/gcs_thanos_object_client.go | 190 +----------------- 7 files changed, 181 insertions(+), 427 deletions(-) create mode 100644 pkg/storage/bucket/object_client_adapter.go rename pkg/storage/{chunk/client/gcp/gcs_thanos_object_client_test.go => bucket/object_client_adapter_test.go} (92%) delete mode 100644 pkg/storage/chunk/client/azure/blob_storage_thanos_object_client_test.go diff --git a/pkg/storage/bucket/object_client_adapter.go b/pkg/storage/bucket/object_client_adapter.go new file mode 100644 index 000000000000..0bcd97ac51af --- /dev/null +++ b/pkg/storage/bucket/object_client_adapter.go @@ -0,0 +1,142 @@ +package bucket + +import ( + "context" + "io" + "strings" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/pkg/errors" + + "github.com/grafana/loki/v3/pkg/storage/chunk/client" + "github.com/thanos-io/objstore" +) + +type ObjectClientAdapter struct { + bucket, hedgedBucket objstore.Bucket + logger log.Logger + isRetryableErr func(err error) bool +} + +func NewObjectClientAdapter(bucket, hedgedBucket objstore.Bucket, logger log.Logger) *ObjectClientAdapter { + if hedgedBucket == nil { + hedgedBucket = bucket + } + + return &ObjectClientAdapter{ + bucket: bucket, + hedgedBucket: hedgedBucket, + logger: logger, + // default to no retryable errors. Override with WithRetryableErr + isRetryableErr: func(err error) bool { + return false + }, + } +} + +func WithRetryableErrFunc(f func(err error) bool) func(*ObjectClientAdapter) { + return func(o *ObjectClientAdapter) { + o.isRetryableErr = f + } +} + +func (t *ObjectClientAdapter) Stop() { +} + +// ObjectExists checks if a given objectKey exists in the GCS bucket +func (s *ObjectClientAdapter) ObjectExists(ctx context.Context, objectKey string) (bool, error) { + return s.bucket.Exists(ctx, objectKey) +} + +// GetAttributes returns the attributes of the specified object key from the configured GCS bucket. +func (s *ObjectClientAdapter) GetAttributes(ctx context.Context, objectKey string) (client.ObjectAttributes, error) { + attr := client.ObjectAttributes{} + thanosAttr, err := s.hedgedBucket.Attributes(ctx, objectKey) + if err != nil { + return attr, err + } + + attr.Size = thanosAttr.Size + return attr, nil +} + +// PutObject puts the specified bytes into the configured GCS bucket at the provided key +func (s *ObjectClientAdapter) PutObject(ctx context.Context, objectKey string, object io.Reader) error { + return s.bucket.Upload(ctx, objectKey, object) +} + +// GetObject returns a reader and the size for the specified object key from the configured GCS bucket. +// size is set to -1 if it cannot be succefully determined, it is up to the caller to check this value before using it. +func (s *ObjectClientAdapter) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, int64, error) { + reader, err := s.hedgedBucket.Get(ctx, objectKey) + if err != nil { + return nil, 0, err + } + + size, err := objstore.TryToGetSize(reader) + if err != nil { + size = -1 + level.Warn(s.logger).Log("msg", "failed to get size of object", "err", err) + } + + return reader, size, err +} + +func (s *ObjectClientAdapter) GetObjectRange(ctx context.Context, objectKey string, offset, length int64) (io.ReadCloser, error) { + return s.hedgedBucket.GetRange(ctx, objectKey, offset, length) +} + +// List objects with given prefix. +func (s *ObjectClientAdapter) List(ctx context.Context, prefix, delimiter string) ([]client.StorageObject, []client.StorageCommonPrefix, error) { + var storageObjects []client.StorageObject + var commonPrefixes []client.StorageCommonPrefix + var iterParams []objstore.IterOption + + // If delimiter is empty we want to list all files + if delimiter == "" { + iterParams = append(iterParams, objstore.WithRecursiveIter) + } + + err := s.bucket.Iter(ctx, prefix, func(objectKey string) error { + // CommonPrefixes are keys that have the prefix and have the delimiter + // as a suffix + if delimiter != "" && strings.HasSuffix(objectKey, delimiter) { + commonPrefixes = append(commonPrefixes, client.StorageCommonPrefix(objectKey)) + return nil + } + + // TODO: remove this once thanos support IterWithAttributes + attr, err := s.bucket.Attributes(ctx, objectKey) + if err != nil { + return errors.Wrapf(err, "failed to get attributes for %s", objectKey) + } + + storageObjects = append(storageObjects, client.StorageObject{ + Key: objectKey, + ModifiedAt: attr.LastModified, + }) + + return nil + }, iterParams...) + if err != nil { + return nil, nil, err + } + + return storageObjects, commonPrefixes, nil +} + +// DeleteObject deletes the specified object key from the configured GCS bucket. +func (s *ObjectClientAdapter) DeleteObject(ctx context.Context, objectKey string) error { + return s.bucket.Delete(ctx, objectKey) +} + +// IsObjectNotFoundErr returns true if error means that object is not found. Relevant to GetObject and DeleteObject operations. +func (s *ObjectClientAdapter) IsObjectNotFoundErr(err error) bool { + return s.bucket.IsObjNotFoundErr(err) +} + +// IsRetryableErr returns true if the request failed due to some retryable server-side scenario +func (s *ObjectClientAdapter) IsRetryableErr(err error) bool { + return s.isRetryableErr(err) +} diff --git a/pkg/storage/chunk/client/gcp/gcs_thanos_object_client_test.go b/pkg/storage/bucket/object_client_adapter_test.go similarity index 92% rename from pkg/storage/chunk/client/gcp/gcs_thanos_object_client_test.go rename to pkg/storage/bucket/object_client_adapter_test.go index d8b824a6e5d0..28fe64e38a08 100644 --- a/pkg/storage/chunk/client/gcp/gcs_thanos_object_client_test.go +++ b/pkg/storage/bucket/object_client_adapter_test.go @@ -1,4 +1,4 @@ -package gcp +package bucket import ( "bytes" @@ -6,13 +6,12 @@ import ( "sort" "testing" - "github.com/stretchr/testify/require" - "github.com/grafana/loki/v3/pkg/storage/bucket/filesystem" "github.com/grafana/loki/v3/pkg/storage/chunk/client" + "github.com/stretchr/testify/require" ) -func TestGCSThanosObjStore_List(t *testing.T) { +func TestAdapter_List(t *testing.T) { tests := []struct { name string prefix string @@ -95,10 +94,10 @@ func TestGCSThanosObjStore_List(t *testing.T) { require.NoError(t, newBucket.Upload(context.Background(), "depply/nested/folder/b", buff)) require.NoError(t, newBucket.Upload(context.Background(), "depply/nested/folder/c", buff)) - gcpClient := &GCSThanosObjectClient{} - gcpClient.client = newBucket + client := NewObjectClientAdapter(newBucket, nil, nil) + client.bucket = newBucket - storageObj, storageCommonPref, err := gcpClient.List(context.Background(), tt.prefix, tt.delimiter) + storageObj, storageCommonPref, err := client.List(context.Background(), tt.prefix, tt.delimiter) if tt.wantErr != nil { require.Equal(t, tt.wantErr.Error(), err.Error()) continue diff --git a/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go b/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go index a3c05a31756a..4bf213743306 100644 --- a/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go +++ b/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client.go @@ -2,11 +2,8 @@ package azure import ( "context" - "io" - "strings" "github.com/go-kit/log" - "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/thanos-io/objstore" @@ -15,25 +12,22 @@ import ( "github.com/grafana/loki/v3/pkg/storage/chunk/client/hedging" ) -type BlobStorageThanosObjectClient struct { - client objstore.Bucket - hedgedClient objstore.Bucket -} - // NewBlobStorageObjectClient makes a new BlobStorage-backed ObjectClient. -func NewBlobStorageThanosObjectClient(ctx context.Context, cfg bucket.Config, component string, logger log.Logger, hedgingCfg hedging.Config) (*BlobStorageThanosObjectClient, error) { - client, err := newBlobStorageThanosObjClient(ctx, cfg, component, logger, false, hedgingCfg) +func NewBlobStorageThanosObjectClient(ctx context.Context, cfg bucket.Config, component string, logger log.Logger, hedgingCfg hedging.Config) (client.ObjectClient, error) { + b, err := newBlobStorageThanosObjClient(ctx, cfg, component, logger, false, hedgingCfg) if err != nil { return nil, err } - hedgedClient, err := newBlobStorageThanosObjClient(ctx, cfg, component, logger, true, hedgingCfg) - if err != nil { - return nil, err + + var hedged objstore.Bucket + if hedgingCfg.At != 0 { + hedged, err = newBlobStorageThanosObjClient(ctx, cfg, component, logger, true, hedgingCfg) + if err != nil { + return nil, err + } } - return &BlobStorageThanosObjectClient{ - client: client, - hedgedClient: hedgedClient, - }, nil + + return bucket.NewObjectClientAdapter(b, hedged, logger), nil } func newBlobStorageThanosObjClient(ctx context.Context, cfg bucket.Config, component string, logger log.Logger, hedging bool, hedgingCfg hedging.Config) (objstore.Bucket, error) { @@ -48,98 +42,3 @@ func newBlobStorageThanosObjClient(ctx context.Context, cfg bucket.Config, compo return bucket.NewClient(ctx, bucket.Azure, cfg, component, logger) } - -// Stop fulfills the chunk.ObjectClient interface -func (s *BlobStorageThanosObjectClient) Stop() {} - -// ObjectExists checks if a given objectKey exists in the AWS bucket -func (s *BlobStorageThanosObjectClient) ObjectExists(ctx context.Context, objectKey string) (bool, error) { - return s.hedgedClient.Exists(ctx, objectKey) -} - -// PutObject into the store -func (s *BlobStorageThanosObjectClient) PutObject(ctx context.Context, objectKey string, object io.Reader) error { - return s.client.Upload(ctx, objectKey, object) -} - -// DeleteObject deletes the specified objectKey from the appropriate BlobStorage bucket -func (s *BlobStorageThanosObjectClient) DeleteObject(ctx context.Context, objectKey string) error { - return s.client.Delete(ctx, objectKey) -} - -// GetObject returns a reader and the size for the specified object key from the configured BlobStorage bucket. -func (s *BlobStorageThanosObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, int64, error) { - reader, err := s.hedgedClient.Get(ctx, objectKey) - if err != nil { - return nil, 0, err - } - - attr, err := s.hedgedClient.Attributes(ctx, objectKey) - if err != nil { - return nil, 0, errors.Wrapf(err, "failed to get attributes for %s", objectKey) - } - - return reader, attr.Size, err -} - -func (s *BlobStorageThanosObjectClient) GetObjectRange(ctx context.Context, objectKey string, offset, length int64) (io.ReadCloser, error) { - return s.hedgedClient.GetRange(ctx, objectKey, offset, length) -} - -// List implements chunk.ObjectClient. -func (s *BlobStorageThanosObjectClient) List(ctx context.Context, prefix, delimiter string) ([]client.StorageObject, []client.StorageCommonPrefix, error) { - var storageObjects []client.StorageObject - var commonPrefixes []client.StorageCommonPrefix - var iterParams []objstore.IterOption - - // If delimiter is empty we want to list all files - if delimiter == "" { - iterParams = append(iterParams, objstore.WithRecursiveIter) - } - - err := s.client.Iter(ctx, prefix, func(objectKey string) error { - // CommonPrefixes are keys that have the prefix and have the delimiter - // as a suffix - if delimiter != "" && strings.HasSuffix(objectKey, delimiter) { - commonPrefixes = append(commonPrefixes, client.StorageCommonPrefix(objectKey)) - return nil - } - attr, err := s.client.Attributes(ctx, objectKey) - if err != nil { - return errors.Wrapf(err, "failed to get attributes for %s", objectKey) - } - - storageObjects = append(storageObjects, client.StorageObject{ - Key: objectKey, - ModifiedAt: attr.LastModified, - }) - - return nil - - }, iterParams...) - if err != nil { - return nil, nil, err - } - - return storageObjects, commonPrefixes, nil -} - -// GetAttributes returns the attributes of the specified object key from the configured GCS bucket. -func (s *BlobStorageThanosObjectClient) GetAttributes(ctx context.Context, objectKey string) (client.ObjectAttributes, error) { - attr := client.ObjectAttributes{} - thanosAttr, err := s.hedgedClient.Attributes(ctx, objectKey) - if err != nil { - return attr, err - } - - attr.Size = thanosAttr.Size - return attr, nil -} - -// IsObjectNotFoundErr returns true if error means that object is not found. Relevant to GetObject and DeleteObject operations. -func (s *BlobStorageThanosObjectClient) IsObjectNotFoundErr(err error) bool { - return s.client.IsObjNotFoundErr(err) -} - -// TODO(dannyk): implement for client -func (s *BlobStorageThanosObjectClient) IsRetryableErr(error) bool { return false } diff --git a/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client_test.go b/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client_test.go deleted file mode 100644 index fe8455b629f3..000000000000 --- a/pkg/storage/chunk/client/azure/blob_storage_thanos_object_client_test.go +++ /dev/null @@ -1,123 +0,0 @@ -package azure - -import ( - "bytes" - "context" - "sort" - "testing" - - "github.com/stretchr/testify/require" - - "github.com/grafana/loki/v3/pkg/storage/bucket/filesystem" - "github.com/grafana/loki/v3/pkg/storage/chunk/client" -) - -func TestAzureThanosObjStore_List(t *testing.T) { - tests := []struct { - name string - prefix string - delimiter string - storageObjKeys []string - storageCommonPref []client.StorageCommonPrefix - wantErr error - }{ - { - "list_top_level_only", - "", - "/", - []string{"top-level-file-1", "top-level-file-2"}, - []client.StorageCommonPrefix{"dir-1/", "dir-2/", "depply/"}, - nil, - }, - { - "list_all_dir_1", - "dir-1", - "", - []string{"dir-1/file-1", "dir-1/file-2"}, - nil, - nil, - }, - { - "list_recursive", - "", - "", - []string{ - "top-level-file-1", - "top-level-file-2", - "dir-1/file-1", - "dir-1/file-2", - "dir-2/file-3", - "dir-2/file-4", - "dir-2/file-5", - "depply/nested/folder/a", - "depply/nested/folder/b", - "depply/nested/folder/c", - }, - nil, - nil, - }, - { - "unknown_prefix", - "test", - "", - []string{}, - nil, - nil, - }, - { - "only_storage_common_prefix", - "depply/", - "/", - []string{}, - []client.StorageCommonPrefix{ - "depply/nested/", - }, - nil, - }, - } - - for _, tt := range tests { - config := filesystem.Config{ - Directory: t.TempDir(), - } - newBucket, err := filesystem.NewBucketClient(config) - require.NoError(t, err) - - buff := bytes.NewBufferString("foo") - require.NoError(t, newBucket.Upload(context.Background(), "top-level-file-1", buff)) - require.NoError(t, newBucket.Upload(context.Background(), "top-level-file-2", buff)) - require.NoError(t, newBucket.Upload(context.Background(), "dir-1/file-1", buff)) - require.NoError(t, newBucket.Upload(context.Background(), "dir-1/file-2", buff)) - require.NoError(t, newBucket.Upload(context.Background(), "dir-2/file-3", buff)) - require.NoError(t, newBucket.Upload(context.Background(), "dir-2/file-4", buff)) - require.NoError(t, newBucket.Upload(context.Background(), "dir-2/file-5", buff)) - require.NoError(t, newBucket.Upload(context.Background(), "depply/nested/folder/a", buff)) - require.NoError(t, newBucket.Upload(context.Background(), "depply/nested/folder/b", buff)) - require.NoError(t, newBucket.Upload(context.Background(), "depply/nested/folder/c", buff)) - - gcpClient := &BlobStorageThanosObjectClient{} - gcpClient.client = newBucket - - storageObj, storageCommonPref, err := gcpClient.List(context.Background(), tt.prefix, tt.delimiter) - if tt.wantErr != nil { - require.Equal(t, tt.wantErr.Error(), err.Error()) - continue - } - - keys := []string{} - for _, key := range storageObj { - keys = append(keys, key.Key) - } - - sort.Slice(tt.storageObjKeys, func(i, j int) bool { - return tt.storageObjKeys[i] < tt.storageObjKeys[j] - }) - sort.Slice(tt.storageCommonPref, func(i, j int) bool { - return tt.storageCommonPref[i] < tt.storageCommonPref[j] - }) - - require.NoError(t, err) - require.Equal(t, tt.storageObjKeys, keys) - require.Equal(t, tt.storageCommonPref, storageCommonPref) - } -} diff --git a/pkg/storage/chunk/client/gcp/gcs_object_client.go b/pkg/storage/chunk/client/gcp/gcs_object_client.go index 9b05b57404c4..1d44659b3f3c 100644 --- a/pkg/storage/chunk/client/gcp/gcs_object_client.go +++ b/pkg/storage/chunk/client/gcp/gcs_object_client.go @@ -279,7 +279,7 @@ func isContextErr(err error) bool { } // IsStorageTimeoutErr returns true if error means that object cannot be retrieved right now due to server-side timeouts. -func (s *GCSObjectClient) IsStorageTimeoutErr(err error) bool { +func IsStorageTimeoutErr(err error) bool { // TODO(dannyk): move these out to be generic // context errors are all client-side if isContextErr(err) { @@ -315,7 +315,7 @@ func (s *GCSObjectClient) IsStorageTimeoutErr(err error) bool { } // IsStorageThrottledErr returns true if error means that object cannot be retrieved right now due to throttling. -func (s *GCSObjectClient) IsStorageThrottledErr(err error) bool { +func IsStorageThrottledErr(err error) bool { if gerr, ok := err.(*googleapi.Error); ok { // https://cloud.google.com/storage/docs/retry-strategy return gerr.Code == http.StatusTooManyRequests || @@ -325,9 +325,14 @@ func (s *GCSObjectClient) IsStorageThrottledErr(err error) bool { return false } +// IsRetryableErr returns true if the request failed due to some retryable server-side scenario +func IsRetryableErr(err error) bool { + return IsStorageTimeoutErr(err) || IsStorageThrottledErr(err) +} + // IsRetryableErr returns true if the request failed due to some retryable server-side scenario func (s *GCSObjectClient) IsRetryableErr(err error) bool { - return s.IsStorageTimeoutErr(err) || s.IsStorageThrottledErr(err) + return IsRetryableErr(err) } func gcsTransport(ctx context.Context, scope string, insecure bool, http2 bool, serviceAccount flagext.Secret) (http.RoundTripper, error) { diff --git a/pkg/storage/chunk/client/gcp/gcs_object_client_test.go b/pkg/storage/chunk/client/gcp/gcs_object_client_test.go index c885c4c1d780..a0e6313f7ce4 100644 --- a/pkg/storage/chunk/client/gcp/gcs_object_client_test.go +++ b/pkg/storage/chunk/client/gcp/gcs_object_client_test.go @@ -147,8 +147,8 @@ func TestUpstreamRetryableErrs(t *testing.T) { require.NoError(t, err) _, _, err = cli.GetObject(ctx, "foo") - require.Equal(t, tc.isThrottledErr, cli.IsStorageThrottledErr(err)) - require.Equal(t, tc.isTimeoutErr, cli.IsStorageTimeoutErr(err)) + require.Equal(t, tc.isThrottledErr, IsStorageThrottledErr(err)) + require.Equal(t, tc.isTimeoutErr, IsStorageTimeoutErr(err)) }) } } @@ -229,7 +229,7 @@ func TestTCPErrs(t *testing.T) { _, _, err = cli.GetObject(ctx, "foo") require.Error(t, err) - require.Equal(t, tc.retryable, cli.IsStorageTimeoutErr(err)) + require.Equal(t, tc.retryable, IsStorageTimeoutErr(err)) }) } } diff --git a/pkg/storage/chunk/client/gcp/gcs_thanos_object_client.go b/pkg/storage/chunk/client/gcp/gcs_thanos_object_client.go index af0ae55b82cc..1395413cf4ca 100644 --- a/pkg/storage/chunk/client/gcp/gcs_thanos_object_client.go +++ b/pkg/storage/chunk/client/gcp/gcs_thanos_object_client.go @@ -2,54 +2,33 @@ package gcp import ( "context" - "io" - "net" - "net/http" - "strings" "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/thanos-io/objstore" - "google.golang.org/api/googleapi" - amnet "k8s.io/apimachinery/pkg/util/net" "github.com/grafana/loki/v3/pkg/storage/bucket" "github.com/grafana/loki/v3/pkg/storage/chunk/client" "github.com/grafana/loki/v3/pkg/storage/chunk/client/hedging" ) -type GCSThanosObjectClient struct { - client objstore.Bucket - hedgedClient objstore.Bucket - logger log.Logger -} - -func NewGCSThanosObjectClient(ctx context.Context, cfg bucket.Config, component string, logger log.Logger, hedgingCfg hedging.Config) (*GCSThanosObjectClient, error) { - client, err := newGCSThanosObjectClient(ctx, cfg, component, logger, false, hedgingCfg) +func NewGCSThanosObjectClient(ctx context.Context, cfg bucket.Config, component string, logger log.Logger, hedgingCfg hedging.Config) (client.ObjectClient, error) { + b, err := newGCSThanosObjectClient(ctx, cfg, component, logger, false, hedgingCfg) if err != nil { return nil, err } - if hedgingCfg.At == 0 { - return &GCSThanosObjectClient{ - client: client, - hedgedClient: client, - logger: logger, - }, nil - } - - hedgedClient, err := newGCSThanosObjectClient(ctx, cfg, component, logger, true, hedgingCfg) - if err != nil { - return nil, err + var hedged objstore.Bucket + if hedgingCfg.At != 0 { + hedged, err = newGCSThanosObjectClient(ctx, cfg, component, logger, true, hedgingCfg) + if err != nil { + return nil, err + } } - return &GCSThanosObjectClient{ - client: client, - hedgedClient: hedgedClient, - logger: logger, - }, nil + o := bucket.NewObjectClientAdapter(b, hedged, logger) + bucket.WithRetryableErrFunc(IsRetryableErr)(o) + return o, nil } func newGCSThanosObjectClient(ctx context.Context, cfg bucket.Config, component string, logger log.Logger, hedging bool, hedgingCfg hedging.Config) (objstore.Bucket, error) { @@ -64,150 +43,3 @@ func newGCSThanosObjectClient(ctx context.Context, cfg bucket.Config, component return bucket.NewClient(ctx, bucket.GCS, cfg, component, logger) } - -func (s *GCSThanosObjectClient) Stop() { -} - -// ObjectExists checks if a given objectKey exists in the GCS bucket -func (s *GCSThanosObjectClient) ObjectExists(ctx context.Context, objectKey string) (bool, error) { - return s.client.Exists(ctx, objectKey) -} - -// GetAttributes returns the attributes of the specified object key from the configured GCS bucket. -func (s *GCSThanosObjectClient) GetAttributes(ctx context.Context, objectKey string) (client.ObjectAttributes, error) { - attr := client.ObjectAttributes{} - thanosAttr, err := s.hedgedClient.Attributes(ctx, objectKey) - if err != nil { - return attr, err - } - - attr.Size = thanosAttr.Size - return attr, nil -} - -// PutObject puts the specified bytes into the configured GCS bucket at the provided key -func (s *GCSThanosObjectClient) PutObject(ctx context.Context, objectKey string, object io.Reader) error { - return s.client.Upload(ctx, objectKey, object) -} - -// GetObject returns a reader and the size for the specified object key from the configured GCS bucket. -// size is set to -1 if it cannot be succefully determined, it is up to the caller to check this value before using it. -func (s *GCSThanosObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, int64, error) { - reader, err := s.hedgedClient.Get(ctx, objectKey) - if err != nil { - return nil, 0, err - } - - size, err := objstore.TryToGetSize(reader) - if err != nil { - size = -1 - level.Warn(s.logger).Log("msg", "failed to get size of object", "err", err) - } - - return reader, size, err -} - -func (s *GCSThanosObjectClient) GetObjectRange(ctx context.Context, objectKey string, offset, length int64) (io.ReadCloser, error) { - return s.hedgedClient.GetRange(ctx, objectKey, offset, length) -} - -// List objects with given prefix. -func (s *GCSThanosObjectClient) List(ctx context.Context, prefix, delimiter string) ([]client.StorageObject, []client.StorageCommonPrefix, error) { - var storageObjects []client.StorageObject - var commonPrefixes []client.StorageCommonPrefix - var iterParams []objstore.IterOption - - // If delimiter is empty we want to list all files - if delimiter == "" { - iterParams = append(iterParams, objstore.WithRecursiveIter) - } - - err := s.client.Iter(ctx, prefix, func(objectKey string) error { - // CommonPrefixes are keys that have the prefix and have the delimiter - // as a suffix - if delimiter != "" && strings.HasSuffix(objectKey, delimiter) { - commonPrefixes = append(commonPrefixes, client.StorageCommonPrefix(objectKey)) - return nil - } - - // TODO: remove this once thanos support IterWithAttributes - attr, err := s.client.Attributes(ctx, objectKey) - if err != nil { - return errors.Wrapf(err, "failed to get attributes for %s", objectKey) - } - - storageObjects = append(storageObjects, client.StorageObject{ - Key: objectKey, - ModifiedAt: attr.LastModified, - }) - - return nil - }, iterParams...) - if err != nil { - return nil, nil, err - } - - return storageObjects, commonPrefixes, nil -} - -// DeleteObject deletes the specified object key from the configured GCS bucket. -func (s *GCSThanosObjectClient) DeleteObject(ctx context.Context, objectKey string) error { - return s.client.Delete(ctx, objectKey) -} - -// IsObjectNotFoundErr returns true if error means that object is not found. Relevant to GetObject and DeleteObject operations. -func (s *GCSThanosObjectClient) IsObjectNotFoundErr(err error) bool { - return s.client.IsObjNotFoundErr(err) -} - -// IsStorageTimeoutErr returns true if error means that object cannot be retrieved right now due to server-side timeouts. -func (s *GCSThanosObjectClient) IsStorageTimeoutErr(err error) bool { - // TODO(dannyk): move these out to be generic - // context errors are all client-side - if isContextErr(err) { - // Go 1.23 changed the type of the error returned by the http client when a timeout occurs - // while waiting for headers. This is a server side timeout. - return strings.Contains(err.Error(), "Client.Timeout") - } - - // connection misconfiguration, or writing on a closed connection - // do NOT retry; this is not a server-side issue - if errors.Is(err, net.ErrClosed) || amnet.IsConnectionRefused(err) { - return false - } - - // this is a server-side timeout - if isTimeoutError(err) { - return true - } - - // connection closed (closed before established) or reset (closed after established) - // this is a server-side issue - if errors.Is(err, io.EOF) || amnet.IsConnectionReset(err) { - return true - } - - if gerr, ok := err.(*googleapi.Error); ok { - // https://cloud.google.com/storage/docs/retry-strategy - return gerr.Code == http.StatusRequestTimeout || - gerr.Code == http.StatusGatewayTimeout - } - - return false -} - -// IsStorageThrottledErr returns true if error means that object cannot be retrieved right now due to throttling. -func (s *GCSThanosObjectClient) IsStorageThrottledErr(err error) bool { - if gerr, ok := err.(*googleapi.Error); ok { - // https://cloud.google.com/storage/docs/retry-strategy - return gerr.Code == http.StatusTooManyRequests || - (gerr.Code/100 == 5) // all 5xx errors are retryable - } - - return false -} - -// IsRetryableErr returns true if the request failed due to some retryable server-side scenario -func (s *GCSThanosObjectClient) IsRetryableErr(err error) bool { - return s.IsStorageTimeoutErr(err) || s.IsStorageThrottledErr(err) -} From 0557d69e20a8b908d5f66a983ff47a243003dc93 Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Tue, 22 Oct 2024 12:05:56 +0530 Subject: [PATCH 10/20] lint --- pkg/storage/bucket/object_client_adapter.go | 48 +++++++++---------- .../bucket/object_client_adapter_test.go | 2 +- pkg/storage/factory.go | 1 - 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/pkg/storage/bucket/object_client_adapter.go b/pkg/storage/bucket/object_client_adapter.go index 0bcd97ac51af..1c7a354950c0 100644 --- a/pkg/storage/bucket/object_client_adapter.go +++ b/pkg/storage/bucket/object_client_adapter.go @@ -27,9 +27,9 @@ func NewObjectClientAdapter(bucket, hedgedBucket objstore.Bucket, logger log.Log return &ObjectClientAdapter{ bucket: bucket, hedgedBucket: hedgedBucket, - logger: logger, - // default to no retryable errors. Override with WithRetryableErr - isRetryableErr: func(err error) bool { + logger: log.With(logger, "component", "bucket_to_object_client_adapter"), + // default to no retryable errors. Override with WithRetryableErrFunc + isRetryableErr: func(_ error) bool { return false }, } @@ -41,18 +41,18 @@ func WithRetryableErrFunc(f func(err error) bool) func(*ObjectClientAdapter) { } } -func (t *ObjectClientAdapter) Stop() { +func (o *ObjectClientAdapter) Stop() { } // ObjectExists checks if a given objectKey exists in the GCS bucket -func (s *ObjectClientAdapter) ObjectExists(ctx context.Context, objectKey string) (bool, error) { - return s.bucket.Exists(ctx, objectKey) +func (o *ObjectClientAdapter) ObjectExists(ctx context.Context, objectKey string) (bool, error) { + return o.bucket.Exists(ctx, objectKey) } // GetAttributes returns the attributes of the specified object key from the configured GCS bucket. -func (s *ObjectClientAdapter) GetAttributes(ctx context.Context, objectKey string) (client.ObjectAttributes, error) { +func (o *ObjectClientAdapter) GetAttributes(ctx context.Context, objectKey string) (client.ObjectAttributes, error) { attr := client.ObjectAttributes{} - thanosAttr, err := s.hedgedBucket.Attributes(ctx, objectKey) + thanosAttr, err := o.hedgedBucket.Attributes(ctx, objectKey) if err != nil { return attr, err } @@ -62,14 +62,14 @@ func (s *ObjectClientAdapter) GetAttributes(ctx context.Context, objectKey strin } // PutObject puts the specified bytes into the configured GCS bucket at the provided key -func (s *ObjectClientAdapter) PutObject(ctx context.Context, objectKey string, object io.Reader) error { - return s.bucket.Upload(ctx, objectKey, object) +func (o *ObjectClientAdapter) PutObject(ctx context.Context, objectKey string, object io.Reader) error { + return o.bucket.Upload(ctx, objectKey, object) } // GetObject returns a reader and the size for the specified object key from the configured GCS bucket. // size is set to -1 if it cannot be succefully determined, it is up to the caller to check this value before using it. -func (s *ObjectClientAdapter) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, int64, error) { - reader, err := s.hedgedBucket.Get(ctx, objectKey) +func (o *ObjectClientAdapter) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, int64, error) { + reader, err := o.hedgedBucket.Get(ctx, objectKey) if err != nil { return nil, 0, err } @@ -77,18 +77,18 @@ func (s *ObjectClientAdapter) GetObject(ctx context.Context, objectKey string) ( size, err := objstore.TryToGetSize(reader) if err != nil { size = -1 - level.Warn(s.logger).Log("msg", "failed to get size of object", "err", err) + level.Warn(o.logger).Log("msg", "failed to get size of object", "err", err) } return reader, size, err } -func (s *ObjectClientAdapter) GetObjectRange(ctx context.Context, objectKey string, offset, length int64) (io.ReadCloser, error) { - return s.hedgedBucket.GetRange(ctx, objectKey, offset, length) +func (o *ObjectClientAdapter) GetObjectRange(ctx context.Context, objectKey string, offset, length int64) (io.ReadCloser, error) { + return o.hedgedBucket.GetRange(ctx, objectKey, offset, length) } // List objects with given prefix. -func (s *ObjectClientAdapter) List(ctx context.Context, prefix, delimiter string) ([]client.StorageObject, []client.StorageCommonPrefix, error) { +func (o *ObjectClientAdapter) List(ctx context.Context, prefix, delimiter string) ([]client.StorageObject, []client.StorageCommonPrefix, error) { var storageObjects []client.StorageObject var commonPrefixes []client.StorageCommonPrefix var iterParams []objstore.IterOption @@ -98,7 +98,7 @@ func (s *ObjectClientAdapter) List(ctx context.Context, prefix, delimiter string iterParams = append(iterParams, objstore.WithRecursiveIter) } - err := s.bucket.Iter(ctx, prefix, func(objectKey string) error { + err := o.bucket.Iter(ctx, prefix, func(objectKey string) error { // CommonPrefixes are keys that have the prefix and have the delimiter // as a suffix if delimiter != "" && strings.HasSuffix(objectKey, delimiter) { @@ -107,7 +107,7 @@ func (s *ObjectClientAdapter) List(ctx context.Context, prefix, delimiter string } // TODO: remove this once thanos support IterWithAttributes - attr, err := s.bucket.Attributes(ctx, objectKey) + attr, err := o.bucket.Attributes(ctx, objectKey) if err != nil { return errors.Wrapf(err, "failed to get attributes for %s", objectKey) } @@ -127,16 +127,16 @@ func (s *ObjectClientAdapter) List(ctx context.Context, prefix, delimiter string } // DeleteObject deletes the specified object key from the configured GCS bucket. -func (s *ObjectClientAdapter) DeleteObject(ctx context.Context, objectKey string) error { - return s.bucket.Delete(ctx, objectKey) +func (o *ObjectClientAdapter) DeleteObject(ctx context.Context, objectKey string) error { + return o.bucket.Delete(ctx, objectKey) } // IsObjectNotFoundErr returns true if error means that object is not found. Relevant to GetObject and DeleteObject operations. -func (s *ObjectClientAdapter) IsObjectNotFoundErr(err error) bool { - return s.bucket.IsObjNotFoundErr(err) +func (o *ObjectClientAdapter) IsObjectNotFoundErr(err error) bool { + return o.bucket.IsObjNotFoundErr(err) } // IsRetryableErr returns true if the request failed due to some retryable server-side scenario -func (s *ObjectClientAdapter) IsRetryableErr(err error) bool { - return s.isRetryableErr(err) +func (o *ObjectClientAdapter) IsRetryableErr(err error) bool { + return o.isRetryableErr(err) } diff --git a/pkg/storage/bucket/object_client_adapter_test.go b/pkg/storage/bucket/object_client_adapter_test.go index 28fe64e38a08..3443ead78a6d 100644 --- a/pkg/storage/bucket/object_client_adapter_test.go +++ b/pkg/storage/bucket/object_client_adapter_test.go @@ -11,7 +11,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestAdapter_List(t *testing.T) { +func TestObjectClientAdapter_List(t *testing.T) { tests := []struct { name string prefix string diff --git a/pkg/storage/factory.go b/pkg/storage/factory.go index 9e3f6cdb8410..230466fc2557 100644 --- a/pkg/storage/factory.go +++ b/pkg/storage/factory.go @@ -694,7 +694,6 @@ func internalNewObjectClient(storeName, component string, cfg Config, clientMetr azureCfg = (azure.BlobStorageConfig)(nsCfg) } if cfg.UseThanosObjstore { - clientMetrics.Unregister() return azure.NewBlobStorageThanosObjectClient(context.Background(), cfg.ObjectStore, component, util_log.Logger, cfg.Hedging) } return azure.NewBlobStorage(&azureCfg, clientMetrics.AzureMetrics, cfg.Hedging) From a57c1403df2c0e63af89d671da6e3feb04873158 Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Tue, 22 Oct 2024 12:33:22 +0530 Subject: [PATCH 11/20] make format --- pkg/storage/bucket/object_client_adapter.go | 3 ++- pkg/storage/bucket/object_client_adapter_test.go | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/storage/bucket/object_client_adapter.go b/pkg/storage/bucket/object_client_adapter.go index 1c7a354950c0..ca071d206141 100644 --- a/pkg/storage/bucket/object_client_adapter.go +++ b/pkg/storage/bucket/object_client_adapter.go @@ -9,8 +9,9 @@ import ( "github.com/go-kit/log/level" "github.com/pkg/errors" - "github.com/grafana/loki/v3/pkg/storage/chunk/client" "github.com/thanos-io/objstore" + + "github.com/grafana/loki/v3/pkg/storage/chunk/client" ) type ObjectClientAdapter struct { diff --git a/pkg/storage/bucket/object_client_adapter_test.go b/pkg/storage/bucket/object_client_adapter_test.go index 3443ead78a6d..1ce6de26856b 100644 --- a/pkg/storage/bucket/object_client_adapter_test.go +++ b/pkg/storage/bucket/object_client_adapter_test.go @@ -6,9 +6,10 @@ import ( "sort" "testing" + "github.com/stretchr/testify/require" + "github.com/grafana/loki/v3/pkg/storage/bucket/filesystem" "github.com/grafana/loki/v3/pkg/storage/chunk/client" - "github.com/stretchr/testify/require" ) func TestObjectClientAdapter_List(t *testing.T) { From a7da0f431b6ea5adc85dee851c61f70f2a8d3005 Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Tue, 22 Oct 2024 12:40:52 +0530 Subject: [PATCH 12/20] remove gcs comment --- pkg/storage/bucket/object_client_adapter.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/storage/bucket/object_client_adapter.go b/pkg/storage/bucket/object_client_adapter.go index ca071d206141..bf076d1b8249 100644 --- a/pkg/storage/bucket/object_client_adapter.go +++ b/pkg/storage/bucket/object_client_adapter.go @@ -45,12 +45,12 @@ func WithRetryableErrFunc(f func(err error) bool) func(*ObjectClientAdapter) { func (o *ObjectClientAdapter) Stop() { } -// ObjectExists checks if a given objectKey exists in the GCS bucket +// ObjectExists checks if a given objectKey exists in the bucket func (o *ObjectClientAdapter) ObjectExists(ctx context.Context, objectKey string) (bool, error) { return o.bucket.Exists(ctx, objectKey) } -// GetAttributes returns the attributes of the specified object key from the configured GCS bucket. +// GetAttributes returns the attributes of the specified object key from the configured bucket. func (o *ObjectClientAdapter) GetAttributes(ctx context.Context, objectKey string) (client.ObjectAttributes, error) { attr := client.ObjectAttributes{} thanosAttr, err := o.hedgedBucket.Attributes(ctx, objectKey) @@ -62,12 +62,12 @@ func (o *ObjectClientAdapter) GetAttributes(ctx context.Context, objectKey strin return attr, nil } -// PutObject puts the specified bytes into the configured GCS bucket at the provided key +// PutObject puts the specified bytes into the configured bucket at the provided key func (o *ObjectClientAdapter) PutObject(ctx context.Context, objectKey string, object io.Reader) error { return o.bucket.Upload(ctx, objectKey, object) } -// GetObject returns a reader and the size for the specified object key from the configured GCS bucket. +// GetObject returns a reader and the size for the specified object key from the configured bucket. // size is set to -1 if it cannot be succefully determined, it is up to the caller to check this value before using it. func (o *ObjectClientAdapter) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, int64, error) { reader, err := o.hedgedBucket.Get(ctx, objectKey) @@ -127,7 +127,7 @@ func (o *ObjectClientAdapter) List(ctx context.Context, prefix, delimiter string return storageObjects, commonPrefixes, nil } -// DeleteObject deletes the specified object key from the configured GCS bucket. +// DeleteObject deletes the specified object key from the configured bucket. func (o *ObjectClientAdapter) DeleteObject(ctx context.Context, objectKey string) error { return o.bucket.Delete(ctx, objectKey) } From 4337f02f36b59e7e35ed2ca6b3992c341be7dde6 Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Tue, 22 Oct 2024 15:14:36 +0530 Subject: [PATCH 13/20] s3: add support for thanos client --- pkg/storage/bucket/s3/bucket_client.go | 44 ++- pkg/storage/bucket/s3/config.go | 165 +++++++++-- pkg/storage/bucket/s3/config_test.go | 263 +++++++++--------- .../chunk/client/aws/s3_storage_client.go | 11 +- .../client/aws/s3_thanos_object_client.go | 45 +++ pkg/storage/factory.go | 4 + pkg/util/http.go | 9 + 7 files changed, 362 insertions(+), 179 deletions(-) create mode 100644 pkg/storage/chunk/client/aws/s3_thanos_object_client.go diff --git a/pkg/storage/bucket/s3/bucket_client.go b/pkg/storage/bucket/s3/bucket_client.go index 107e18b3c7bb..4d16caf10a44 100644 --- a/pkg/storage/bucket/s3/bucket_client.go +++ b/pkg/storage/bucket/s3/bucket_client.go @@ -4,6 +4,7 @@ import ( "github.com/go-kit/log" "github.com/prometheus/common/model" "github.com/thanos-io/objstore" + "github.com/thanos-io/objstore/exthttp" "github.com/thanos-io/objstore/providers/s3" ) @@ -38,17 +39,28 @@ func newS3Config(cfg Config) (s3.Config, error) { return s3.Config{}, err } + putUserMetadata := map[string]string{} + + if cfg.StorageClass != "" { + putUserMetadata[awsStorageClassHeader] = cfg.StorageClass + } + return s3.Config{ - Bucket: cfg.BucketName, - Endpoint: cfg.Endpoint, - Region: cfg.Region, - AccessKey: cfg.AccessKeyID, - SecretKey: cfg.SecretAccessKey.String(), - SessionToken: cfg.SessionToken.String(), - Insecure: cfg.Insecure, - DisableDualstack: cfg.DisableDualstack, - SSEConfig: sseCfg, - PutUserMetadata: map[string]string{awsStorageClassHeader: cfg.StorageClass}, + Bucket: cfg.BucketName, + Endpoint: cfg.Endpoint, + Region: cfg.Region, + AccessKey: cfg.AccessKeyID, + SecretKey: cfg.SecretAccessKey.String(), + SessionToken: cfg.SessionToken.String(), + Insecure: cfg.Insecure, + PutUserMetadata: putUserMetadata, + SendContentMd5: cfg.SendContentMd5, + SSEConfig: sseCfg, + DisableDualstack: !cfg.DualstackEnabled, + ListObjectsVersion: cfg.ListObjectsVersion, + BucketLookupType: cfg.BucketLookupType, + AWSSDKAuth: cfg.NativeAWSAuthEnabled, + PartSize: cfg.PartSize, HTTPConfig: s3.HTTPConfig{ IdleConnTimeout: model.Duration(cfg.HTTP.IdleConnTimeout), ResponseHeaderTimeout: model.Duration(cfg.HTTP.ResponseHeaderTimeout), @@ -59,6 +71,18 @@ func newS3Config(cfg Config) (s3.Config, error) { MaxIdleConnsPerHost: cfg.HTTP.MaxIdleConnsPerHost, MaxConnsPerHost: cfg.HTTP.MaxConnsPerHost, Transport: cfg.HTTP.Transport, + TLSConfig: exthttp.TLSConfig{ + CAFile: cfg.HTTP.TLSConfig.CAPath, + CertFile: cfg.HTTP.TLSConfig.CertPath, + KeyFile: cfg.HTTP.TLSConfig.KeyPath, + ServerName: cfg.HTTP.TLSConfig.ServerName, + }, + }, + TraceConfig: s3.TraceConfig{ + Enable: cfg.TraceConfig.Enabled, }, + // Enforce signature version 2 if CLI flag is set + SignatureV2: cfg.SignatureVersion == SignatureVersionV2, + STSEndpoint: cfg.STSEndpoint, }, nil } diff --git a/pkg/storage/bucket/s3/config.go b/pkg/storage/bucket/s3/config.go index 32db169f450f..edea882169e9 100644 --- a/pkg/storage/bucket/s3/config.go +++ b/pkg/storage/bucket/s3/config.go @@ -5,22 +5,21 @@ import ( "flag" "fmt" "net/http" + "slices" "strings" + "time" + s3_service "github.com/aws/aws-sdk-go/service/s3" "github.com/grafana/dskit/flagext" + "github.com/grafana/loki/v3/pkg/util" "github.com/minio/minio-go/v7/pkg/encrypt" "github.com/pkg/errors" "github.com/thanos-io/objstore/providers/s3" - - bucket_http "github.com/grafana/loki/v3/pkg/storage/bucket/http" - "github.com/grafana/loki/v3/pkg/storage/common/aws" - "github.com/grafana/loki/v3/pkg/util" ) const ( - // Signature Version 2 is being turned off (deprecated) in Amazon S3. Amazon S3 will then only accept API requests that are signed using Signature Version 4. - // https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingAWSSDK.html#UsingAWSSDK-sig2-deprecation SignatureVersionV4 = "v4" + SignatureVersionV2 = "v2" // SSEKMS config type constant to configure S3 server side encryption using KMS // https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html @@ -32,41 +31,102 @@ const ( ) var ( - supportedSignatureVersions = []string{SignatureVersionV4} - supportedSSETypes = []string{SSEKMS, SSES3} - errUnsupportedSignatureVersion = errors.New("unsupported signature version") + supportedSignatureVersions = []string{SignatureVersionV4, SignatureVersionV2} + supportedSSETypes = []string{SSEKMS, SSES3} + supportedStorageClasses = s3_service.ObjectStorageClass_Values() + supportedBucketLookupTypes = thanosS3BucketLookupTypesValues() + + errUnsupportedSignatureVersion = fmt.Errorf("unsupported signature version (supported values: %s)", strings.Join(supportedSignatureVersions, ", ")) errUnsupportedSSEType = errors.New("unsupported S3 SSE type") + errUnsupportedStorageClass = fmt.Errorf("unsupported S3 storage class (supported values: %s)", strings.Join(supportedStorageClasses, ", ")) errInvalidSSEContext = errors.New("invalid S3 SSE encryption context") + errInvalidEndpointPrefix = errors.New("the endpoint must not prefixed with the bucket name") + errInvalidSTSEndpoint = errors.New("sts-endpoint must be a valid url") ) +var thanosS3BucketLookupTypes = map[string]s3.BucketLookupType{ + s3.AutoLookup.String(): s3.AutoLookup, + s3.VirtualHostLookup.String(): s3.VirtualHostLookup, + s3.PathLookup.String(): s3.PathLookup, +} + +func thanosS3BucketLookupTypesValues() (list []string) { + for k := range thanosS3BucketLookupTypes { + list = append(list, k) + } + // sort the list for consistent output in help, where it's used + slices.Sort(list) + return list +} + // HTTPConfig stores the http.Transport configuration for the s3 minio client. type HTTPConfig struct { - bucket_http.Config `yaml:",inline"` + IdleConnTimeout time.Duration `yaml:"idle_conn_timeout" category:"advanced"` + ResponseHeaderTimeout time.Duration `yaml:"response_header_timeout" category:"advanced"` + InsecureSkipVerify bool `yaml:"insecure_skip_verify" category:"advanced"` + TLSHandshakeTimeout time.Duration `yaml:"tls_handshake_timeout" category:"advanced"` + ExpectContinueTimeout time.Duration `yaml:"expect_continue_timeout" category:"advanced"` + MaxIdleConns int `yaml:"max_idle_connections" category:"advanced"` + MaxIdleConnsPerHost int `yaml:"max_idle_connections_per_host" category:"advanced"` + MaxConnsPerHost int `yaml:"max_connections_per_host" category:"advanced"` // Allow upstream callers to inject a round tripper Transport http.RoundTripper `yaml:"-"` + + TLSConfig TLSConfig `yaml:",inline"` +} + +// TLSConfig configures the options for TLS connections. +type TLSConfig struct { + CAPath string `yaml:"tls_ca_path" category:"advanced"` + CertPath string `yaml:"tls_cert_path" category:"advanced"` + KeyPath string `yaml:"tls_key_path" category:"advanced"` + ServerName string `yaml:"tls_server_name" category:"advanced"` } // RegisterFlagsWithPrefix registers the flags for s3 storage with the provided prefix func (cfg *HTTPConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { - cfg.Config.RegisterFlagsWithPrefix(prefix+"s3.", f) + f.DurationVar(&cfg.IdleConnTimeout, prefix+"s3.http.idle-conn-timeout", 90*time.Second, "The time an idle connection will remain idle before closing.") + f.DurationVar(&cfg.ResponseHeaderTimeout, prefix+"s3.http.response-header-timeout", 2*time.Minute, "The amount of time the client will wait for a servers response headers.") + f.BoolVar(&cfg.InsecureSkipVerify, prefix+"s3.http.insecure-skip-verify", false, "If the client connects to S3 via HTTPS and this option is enabled, the client will accept any certificate and hostname.") + f.DurationVar(&cfg.TLSHandshakeTimeout, prefix+"s3.tls-handshake-timeout", 10*time.Second, "Maximum time to wait for a TLS handshake. 0 means no limit.") + f.DurationVar(&cfg.ExpectContinueTimeout, prefix+"s3.expect-continue-timeout", 1*time.Second, "The time to wait for a server's first response headers after fully writing the request headers if the request has an Expect header. 0 to send the request body immediately.") + f.IntVar(&cfg.MaxIdleConns, prefix+"s3.max-idle-connections", 100, "Maximum number of idle (keep-alive) connections across all hosts. 0 means no limit.") + f.IntVar(&cfg.MaxIdleConnsPerHost, prefix+"s3.max-idle-connections-per-host", 100, "Maximum number of idle (keep-alive) connections to keep per-host. If 0, a built-in default value is used.") + f.IntVar(&cfg.MaxConnsPerHost, prefix+"s3.max-connections-per-host", 0, "Maximum number of connections per host. 0 means no limit.") + cfg.TLSConfig.RegisterFlagsWithPrefix(prefix, f) +} + +// RegisterFlagsWithPrefix registers the flags for s3 storage with the provided prefix. +func (cfg *TLSConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { + f.StringVar(&cfg.CAPath, prefix+"s3.http.tls-ca-path", "", "Path to the CA certificates to validate server certificate against. If not set, the host's root CA certificates are used.") + f.StringVar(&cfg.CertPath, prefix+"s3.http.tls-cert-path", "", "Path to the client certificate, which will be used for authenticating with the server. Also requires the key path to be configured.") + f.StringVar(&cfg.KeyPath, prefix+"s3.http.tls-key-path", "", "Path to the key for the client certificate. Also requires the client certificate to be configured.") + f.StringVar(&cfg.ServerName, prefix+"s3.http.tls-server-name", "", "Override the expected name on the server certificate.") } // Config holds the config options for an S3 backend type Config struct { - Endpoint string `yaml:"endpoint"` - Region string `yaml:"region"` - BucketName string `yaml:"bucket_name"` - SecretAccessKey flagext.Secret `yaml:"secret_access_key"` - SessionToken flagext.Secret `yaml:"session_token"` - AccessKeyID string `yaml:"access_key_id"` - Insecure bool `yaml:"insecure"` - DisableDualstack bool `yaml:"disable_dualstack"` - SignatureVersion string `yaml:"signature_version"` - StorageClass string `yaml:"storage_class"` + Endpoint string `yaml:"endpoint"` + Region string `yaml:"region"` + BucketName string `yaml:"bucket_name"` + SecretAccessKey flagext.Secret `yaml:"secret_access_key"` + AccessKeyID string `yaml:"access_key_id"` + SessionToken flagext.Secret `yaml:"session_token"` + Insecure bool `yaml:"insecure" category:"advanced"` + SignatureVersion string `yaml:"signature_version" category:"advanced"` + ListObjectsVersion string `yaml:"list_objects_version" category:"advanced"` + BucketLookupType s3.BucketLookupType `yaml:"bucket_lookup_type" category:"advanced"` + DualstackEnabled bool `yaml:"dualstack_enabled" category:"experimental"` + StorageClass string `yaml:"storage_class" category:"experimental"` + NativeAWSAuthEnabled bool `yaml:"native_aws_auth_enabled" category:"experimental"` + PartSize uint64 `yaml:"part_size" category:"experimental"` + SendContentMd5 bool `yaml:"send_content_md5" category:"experimental"` + STSEndpoint string `yaml:"sts_endpoint"` - SSE SSEConfig `yaml:"sse"` - HTTP HTTPConfig `yaml:"http"` + SSE SSEConfig `yaml:"sse"` + HTTP HTTPConfig `yaml:"http"` + TraceConfig TraceConfig `yaml:"trace"` } // RegisterFlags registers the flags for s3 storage with the provided prefix @@ -83,21 +143,36 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.StringVar(&cfg.Region, prefix+"s3.region", "", "S3 region. If unset, the client will issue a S3 GetBucketLocation API call to autodetect it.") f.StringVar(&cfg.Endpoint, prefix+"s3.endpoint", "", "The S3 bucket endpoint. It could be an AWS S3 endpoint listed at https://docs.aws.amazon.com/general/latest/gr/s3.html or the address of an S3-compatible service in hostname:port format.") f.BoolVar(&cfg.Insecure, prefix+"s3.insecure", false, "If enabled, use http:// for the S3 endpoint instead of https://. This could be useful in local dev/test environments while using an S3-compatible backend storage, like Minio.") - f.BoolVar(&cfg.DisableDualstack, prefix+"s3.disable-dualstack", false, "Disable forcing S3 dualstack endpoint usage.") f.StringVar(&cfg.SignatureVersion, prefix+"s3.signature-version", SignatureVersionV4, fmt.Sprintf("The signature version to use for authenticating against S3. Supported values are: %s.", strings.Join(supportedSignatureVersions, ", "))) - f.StringVar(&cfg.StorageClass, prefix+"s3.storage-class", aws.StorageClassStandard, "The S3 storage class to use. Details can be found at https://aws.amazon.com/s3/storage-classes/.") + f.StringVar(&cfg.ListObjectsVersion, prefix+"s3.list-objects-version", "", "Use a specific version of the S3 list object API. Supported values are v1 or v2. Default is unset.") + f.StringVar(&cfg.StorageClass, prefix+"s3.storage-class", "", "The S3 storage class to use, not set by default. Details can be found at https://aws.amazon.com/s3/storage-classes/. Supported values are: "+strings.Join(supportedStorageClasses, ", ")) + f.BoolVar(&cfg.NativeAWSAuthEnabled, prefix+"s3.native-aws-auth-enabled", false, "If enabled, it will use the default authentication methods of the AWS SDK for go based on known environment variables and known AWS config files.") + f.Uint64Var(&cfg.PartSize, prefix+"s3.part-size", 0, "The minimum file size in bytes used for multipart uploads. If 0, the value is optimally computed for each object.") + f.BoolVar(&cfg.SendContentMd5, prefix+"s3.send-content-md5", false, "If enabled, a Content-MD5 header is sent with S3 Put Object requests. Consumes more resources to compute the MD5, but may improve compatibility with object storage services that do not support checksums.") + f.Var(newBucketLookupTypeValue(s3.AutoLookup, &cfg.BucketLookupType), prefix+"s3.bucket-lookup-type", fmt.Sprintf("Bucket lookup style type, used to access bucket in S3-compatible service. Default is auto. Supported values are: %s.", strings.Join(supportedBucketLookupTypes, ", "))) + f.BoolVar(&cfg.DualstackEnabled, prefix+"s3.dualstack-enabled", true, "When enabled, direct all AWS S3 requests to the dual-stack IPv4/IPv6 endpoint for the configured region.") + f.StringVar(&cfg.STSEndpoint, prefix+"s3.sts-endpoint", "", "Accessing S3 resources using temporary, secure credentials provided by AWS Security Token Service.") cfg.SSE.RegisterFlagsWithPrefix(prefix+"s3.sse.", f) cfg.HTTP.RegisterFlagsWithPrefix(prefix, f) + cfg.TraceConfig.RegisterFlagsWithPrefix(prefix+"s3.trace.", f) } // Validate config and returns error on failure func (cfg *Config) Validate() error { - if !util.StringsContain(supportedSignatureVersions, cfg.SignatureVersion) { + if !slices.Contains(supportedSignatureVersions, cfg.SignatureVersion) { return errUnsupportedSignatureVersion } - - if err := aws.ValidateStorageClass(cfg.StorageClass); err != nil { - return err + if cfg.Endpoint != "" { + endpoint := strings.Split(cfg.Endpoint, ".") + if cfg.BucketName != "" && endpoint[0] != "" && endpoint[0] == cfg.BucketName { + return errInvalidEndpointPrefix + } + } + if cfg.STSEndpoint != "" && !util.IsValidURL(cfg.STSEndpoint) { + return errInvalidSTSEndpoint + } + if !slices.Contains(supportedStorageClasses, cfg.StorageClass) && cfg.StorageClass != "" { + return errUnsupportedStorageClass } return cfg.SSE.Validate() @@ -191,3 +266,35 @@ func parseKMSEncryptionContext(data string) (map[string]string, error) { err := errors.Wrap(json.Unmarshal([]byte(data), &decoded), "unable to parse KMS encryption context") return decoded, err } + +type TraceConfig struct { + Enabled bool `yaml:"enabled" category:"advanced"` +} + +func (cfg *TraceConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { + f.BoolVar(&cfg.Enabled, prefix+"enabled", false, "When enabled, low-level S3 HTTP operation information is logged at the debug level.") +} + +// bucketLookupTypeValue is an adapter between s3.BucketLookupType and flag.Value. +type bucketLookupTypeValue s3.BucketLookupType + +func newBucketLookupTypeValue(value s3.BucketLookupType, p *s3.BucketLookupType) *bucketLookupTypeValue { + *p = value + return (*bucketLookupTypeValue)(p) +} + +func (v *bucketLookupTypeValue) String() string { + if v == nil { + return s3.AutoLookup.String() + } + return s3.BucketLookupType(*v).String() +} + +func (v *bucketLookupTypeValue) Set(s string) error { + t, ok := thanosS3BucketLookupTypes[s] + if !ok { + return fmt.Errorf("unsupported bucket lookup type: %s", s) + } + *v = bucketLookupTypeValue(t) + return nil +} diff --git a/pkg/storage/bucket/s3/config_test.go b/pkg/storage/bucket/s3/config_test.go index 3f32e8f84793..b3477d06dba3 100644 --- a/pkg/storage/bucket/s3/config_test.go +++ b/pkg/storage/bucket/s3/config_test.go @@ -1,127 +1,23 @@ +// SPDX-License-Identifier: AGPL-3.0-only +// Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/storage/bucket/s3/config_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: The Cortex Authors. + package s3 import ( + "bytes" "encoding/base64" - "fmt" "net/http" - "strings" "testing" - "time" + s3_service "github.com/aws/aws-sdk-go/service/s3" "github.com/grafana/dskit/flagext" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "gopkg.in/yaml.v2" - - bucket_http "github.com/grafana/loki/v3/pkg/storage/bucket/http" - "github.com/grafana/loki/v3/pkg/storage/common/aws" + "gopkg.in/yaml.v3" ) -// defaultConfig should match the default flag values defined in RegisterFlagsWithPrefix. -var defaultConfig = Config{ - SignatureVersion: SignatureVersionV4, - StorageClass: aws.StorageClassStandard, - HTTP: HTTPConfig{ - Config: bucket_http.Config{ - IdleConnTimeout: 90 * time.Second, - ResponseHeaderTimeout: 2 * time.Minute, - InsecureSkipVerify: false, - TLSHandshakeTimeout: 10 * time.Second, - ExpectContinueTimeout: 1 * time.Second, - MaxIdleConns: 100, - MaxIdleConnsPerHost: 100, - MaxConnsPerHost: 0, - }, - }, -} - -func TestConfig(t *testing.T) { - t.Parallel() - - tests := map[string]struct { - config string - expectedConfig Config - expectedErr error - }{ - "default config": { - config: "", - expectedConfig: defaultConfig, - expectedErr: nil, - }, - "custom config": { - config: ` -endpoint: test-endpoint -region: test-region -bucket_name: test-bucket-name -secret_access_key: test-secret-access-key -access_key_id: test-access-key-id -insecure: true -signature_version: test-signature-version -storage_class: test-storage-class -disable_dualstack: true -sse: - type: test-type - kms_key_id: test-kms-key-id - kms_encryption_context: test-kms-encryption-context -http: - idle_conn_timeout: 2s - response_header_timeout: 3s - insecure_skip_verify: true - tls_handshake_timeout: 4s - expect_continue_timeout: 5s - max_idle_connections: 6 - max_idle_connections_per_host: 7 - max_connections_per_host: 8 -`, - expectedConfig: Config{ - Endpoint: "test-endpoint", - Region: "test-region", - BucketName: "test-bucket-name", - SecretAccessKey: flagext.SecretWithValue("test-secret-access-key"), - AccessKeyID: "test-access-key-id", - Insecure: true, - SignatureVersion: "test-signature-version", - StorageClass: "test-storage-class", - DisableDualstack: true, - SSE: SSEConfig{ - Type: "test-type", - KMSKeyID: "test-kms-key-id", - KMSEncryptionContext: "test-kms-encryption-context", - }, - HTTP: HTTPConfig{ - Config: bucket_http.Config{ - IdleConnTimeout: 2 * time.Second, - ResponseHeaderTimeout: 3 * time.Second, - InsecureSkipVerify: true, - TLSHandshakeTimeout: 4 * time.Second, - ExpectContinueTimeout: 5 * time.Second, - MaxIdleConns: 6, - MaxIdleConnsPerHost: 7, - MaxConnsPerHost: 8, - }, - }, - }, - expectedErr: nil, - }, - "invalid type": { - config: `insecure: foo`, - expectedConfig: defaultConfig, - expectedErr: &yaml.TypeError{Errors: []string{"line 1: cannot unmarshal !!str `foo` into bool"}}, - }, - } - - for testName, testData := range tests { - t.Run(testName, func(t *testing.T) { - cfg := Config{} - flagext.DefaultValues(&cfg) - - err := yaml.Unmarshal([]byte(testData.config), &cfg) - require.Equal(t, testData.expectedErr, err) - require.Equal(t, testData.expectedConfig, cfg) - }) - } -} - func TestSSEConfig_Validate(t *testing.T) { tests := map[string]struct { setup func() *SSEConfig @@ -169,6 +65,98 @@ func TestSSEConfig_Validate(t *testing.T) { } } +func TestConfig_Validate(t *testing.T) { + tests := map[string]struct { + setup func() *Config + expected error + }{ + "should pass with default config": { + setup: func() *Config { + sseCfg := &SSEConfig{} + flagext.DefaultValues(sseCfg) + cfg := &Config{ + Endpoint: "s3.eu-central-1.amazonaws.com", + BucketName: "mimir-block", + SSE: *sseCfg, + SignatureVersion: SignatureVersionV4, + StorageClass: s3_service.StorageClassStandard, + } + return cfg + }, + }, + "should fail if invalid storage class is set": { + setup: func() *Config { + return &Config{ + SignatureVersion: SignatureVersionV2, + StorageClass: "foo", + } + }, + expected: errUnsupportedStorageClass, + }, + "should pass if valid storage signature version is set": { + setup: func() *Config { + return &Config{ + SignatureVersion: SignatureVersionV4, StorageClass: s3_service.StorageClassStandard, + } + }, + }, + "should fail on invalid endpoint prefix": { + setup: func() *Config { + return &Config{ + Endpoint: "mimir-blocks.s3.eu-central-1.amazonaws.com", + BucketName: "mimir-blocks", + SignatureVersion: SignatureVersionV4, + StorageClass: s3_service.StorageClassStandard, + } + }, + expected: errInvalidEndpointPrefix, + }, + "should pass if native_aws_auth_enabled is set": { + setup: func() *Config { + return &Config{ + SignatureVersion: SignatureVersionV4, + NativeAWSAuthEnabled: true, + } + }, + }, + "should pass with using sts endpoint": { + setup: func() *Config { + sseCfg := &SSEConfig{} + flagext.DefaultValues(sseCfg) + cfg := &Config{ + BucketName: "mimir-block", + SSE: *sseCfg, + SignatureVersion: SignatureVersionV4, + StorageClass: s3_service.StorageClassStandard, + STSEndpoint: "https://sts.eu-central-1.amazonaws.com", + } + return cfg + }, + }, + "should not pass with using sts endpoint as its using an invalid url": { + setup: func() *Config { + sseCfg := &SSEConfig{} + flagext.DefaultValues(sseCfg) + cfg := &Config{ + BucketName: "mimir-block", + SSE: *sseCfg, + SignatureVersion: SignatureVersionV4, + StorageClass: s3_service.StorageClassStandard, + STSEndpoint: "sts.eu-central-1.amazonaws.com", + } + return cfg + }, + expected: errInvalidSTSEndpoint, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + assert.Equal(t, testData.expected, testData.setup().Validate()) + }) + } +} + func TestSSEConfig_BuildMinioConfig(t *testing.T) { tests := map[string]struct { cfg *SSEConfig @@ -225,31 +213,32 @@ func TestParseKMSEncryptionContext(t *testing.T) { assert.Equal(t, expected, actual) } -func TestConfig_Validate(t *testing.T) { - tests := map[string]struct { - cfg Config - expectedErr error - }{ - "should fail if invalid signature version is set": { - Config{SignatureVersion: "foo"}, - errUnsupportedSignatureVersion, - }, - "should pass if valid signature version is set": { - defaultConfig, - nil, - }, - "should fail if invalid storage class is set": { - Config{SignatureVersion: SignatureVersionV4, StorageClass: "foo"}, - fmt.Errorf("unsupported S3 storage class: foo. Supported values: %s", strings.Join(aws.SupportedStorageClasses, ", ")), - }, - "should pass if valid storage signature version is set": { - Config{SignatureVersion: SignatureVersionV4, StorageClass: aws.StorageClassStandardInfrequentAccess}, - nil, - }, - } +func TestConfigParsesCredentialsInlineWithSessionToken(t *testing.T) { + var cfg = Config{} + yamlCfg := ` +access_key_id: access key id +secret_access_key: secret access key +session_token: session token +` + err := yaml.Unmarshal([]byte(yamlCfg), &cfg) + require.NoError(t, err) + + require.Equal(t, cfg.AccessKeyID, "access key id") + require.Equal(t, cfg.SecretAccessKey.String(), "secret access key") + require.Equal(t, cfg.SessionToken.String(), "session token") +} - for name, test := range tests { - actual := test.cfg.Validate() - assert.Equal(t, test.expectedErr, actual, name) +func TestConfigRedactsCredentials(t *testing.T) { + cfg := Config{ + AccessKeyID: "access key id", + SecretAccessKey: flagext.SecretWithValue("secret access key"), + SessionToken: flagext.SecretWithValue("session token"), } + + output, err := yaml.Marshal(cfg) + require.NoError(t, err) + + require.True(t, bytes.Contains(output, []byte("access key id"))) + require.False(t, bytes.Contains(output, []byte("secret access id"))) + require.False(t, bytes.Contains(output, []byte("session token"))) } diff --git a/pkg/storage/chunk/client/aws/s3_storage_client.go b/pkg/storage/chunk/client/aws/s3_storage_client.go index 9ab8c9116339..3e9e368d6e46 100644 --- a/pkg/storage/chunk/client/aws/s3_storage_client.go +++ b/pkg/storage/chunk/client/aws/s3_storage_client.go @@ -563,7 +563,7 @@ func isContextErr(err error) bool { } // IsStorageTimeoutErr returns true if error means that object cannot be retrieved right now due to server-side timeouts. -func (a *S3ObjectClient) IsStorageTimeoutErr(err error) bool { +func IsStorageTimeoutErr(err error) bool { // TODO(dannyk): move these out to be generic // context errors are all client-side if isContextErr(err) { @@ -599,7 +599,7 @@ func (a *S3ObjectClient) IsStorageTimeoutErr(err error) bool { } // IsStorageThrottledErr returns true if error means that object cannot be retrieved right now due to throttling. -func (a *S3ObjectClient) IsStorageThrottledErr(err error) bool { +func IsStorageThrottledErr(err error) bool { if rerr, ok := err.(awserr.RequestFailure); ok { // https://docs.aws.amazon.com/sdkref/latest/guide/feature-retry-behavior.html @@ -609,6 +609,11 @@ func (a *S3ObjectClient) IsStorageThrottledErr(err error) bool { return false } + +func IsRetryableErr(err error) bool { + return IsStorageTimeoutErr(err) || IsStorageThrottledErr(err) +} + func (a *S3ObjectClient) IsRetryableErr(err error) bool { - return a.IsStorageTimeoutErr(err) || a.IsStorageThrottledErr(err) + return IsStorageTimeoutErr(err) || IsStorageThrottledErr(err) } diff --git a/pkg/storage/chunk/client/aws/s3_thanos_object_client.go b/pkg/storage/chunk/client/aws/s3_thanos_object_client.go new file mode 100644 index 000000000000..1b9ce7859a2e --- /dev/null +++ b/pkg/storage/chunk/client/aws/s3_thanos_object_client.go @@ -0,0 +1,45 @@ +package aws + +import ( + "context" + + "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" + "github.com/thanos-io/objstore" + + "github.com/grafana/loki/v3/pkg/storage/bucket" + "github.com/grafana/loki/v3/pkg/storage/chunk/client" + "github.com/grafana/loki/v3/pkg/storage/chunk/client/hedging" +) + +func NewS3ThanosObjectClient(ctx context.Context, cfg bucket.Config, component string, logger log.Logger, hedgingCfg hedging.Config) (client.ObjectClient, error) { + b, err := newS3ThanosObjectClient(ctx, cfg, component, logger, false, hedgingCfg) + if err != nil { + return nil, err + } + + var hedged objstore.Bucket + if hedgingCfg.At != 0 { + hedged, err = newS3ThanosObjectClient(ctx, cfg, component, logger, true, hedgingCfg) + if err != nil { + return nil, err + } + } + + o := bucket.NewObjectClientAdapter(b, hedged, logger) + bucket.WithRetryableErrFunc(IsRetryableErr)(o) + return o, nil +} + +func newS3ThanosObjectClient(ctx context.Context, cfg bucket.Config, component string, logger log.Logger, hedging bool, hedgingCfg hedging.Config) (objstore.Bucket, error) { + if hedging { + hedgedTrasport, err := hedgingCfg.RoundTripperWithRegisterer(nil, prometheus.WrapRegistererWithPrefix("loki_", prometheus.DefaultRegisterer)) + if err != nil { + return nil, err + } + + cfg.S3.HTTP.Transport = hedgedTrasport + } + + return bucket.NewClient(ctx, bucket.S3, cfg, component, logger) +} diff --git a/pkg/storage/factory.go b/pkg/storage/factory.go index 230466fc2557..4824c196df34 100644 --- a/pkg/storage/factory.go +++ b/pkg/storage/factory.go @@ -650,6 +650,10 @@ func internalNewObjectClient(storeName, component string, cfg Config, clientMetr } s3Cfg = awsCfg.S3Config } + + if cfg.UseThanosObjstore { + return aws.NewS3ThanosObjectClient(context.Background(), cfg.ObjectStore, component, util_log.Logger, cfg.Hedging) + } return aws.NewS3ObjectClient(s3Cfg, cfg.Hedging) case types.StorageTypeAlibabaCloud: diff --git a/pkg/util/http.go b/pkg/util/http.go index c3c64ea1e3a8..3fdfca6df24f 100644 --- a/pkg/util/http.go +++ b/pkg/util/http.go @@ -298,3 +298,12 @@ func FlagFromValues(values url.Values, key string, d bool) bool { return d } } + +func IsValidURL(endpoint string) bool { + u, err := url.Parse(endpoint) + if err != nil { + return false + } + + return u.Scheme != "" && u.Host != "" +} From b0326544e620dac91913e1d700e60e4c93bdb7d4 Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Tue, 22 Oct 2024 15:27:27 +0530 Subject: [PATCH 14/20] make format --- pkg/storage/bucket/s3/config.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/storage/bucket/s3/config.go b/pkg/storage/bucket/s3/config.go index edea882169e9..09c3d9ad3a65 100644 --- a/pkg/storage/bucket/s3/config.go +++ b/pkg/storage/bucket/s3/config.go @@ -11,10 +11,11 @@ import ( s3_service "github.com/aws/aws-sdk-go/service/s3" "github.com/grafana/dskit/flagext" - "github.com/grafana/loki/v3/pkg/util" "github.com/minio/minio-go/v7/pkg/encrypt" "github.com/pkg/errors" "github.com/thanos-io/objstore/providers/s3" + + "github.com/grafana/loki/v3/pkg/util" ) const ( From f99e5e7b56e9fcdefa3c7aa97926640824ca2439 Mon Sep 17 00:00:00 2001 From: Ashwanth Date: Thu, 24 Oct 2024 15:37:05 +0530 Subject: [PATCH 15/20] Update pkg/storage/chunk/client/aws/s3_storage_client.go Co-authored-by: Joao Marcal --- pkg/storage/chunk/client/aws/s3_storage_client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/storage/chunk/client/aws/s3_storage_client.go b/pkg/storage/chunk/client/aws/s3_storage_client.go index 3e9e368d6e46..65817f38c9d9 100644 --- a/pkg/storage/chunk/client/aws/s3_storage_client.go +++ b/pkg/storage/chunk/client/aws/s3_storage_client.go @@ -615,5 +615,5 @@ func IsRetryableErr(err error) bool { } func (a *S3ObjectClient) IsRetryableErr(err error) bool { - return IsStorageTimeoutErr(err) || IsStorageThrottledErr(err) + return IsRetryableErr(err) } From c9b0c10e6f768de85c55597093096c0f6719cc05 Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Thu, 24 Oct 2024 15:41:12 +0530 Subject: [PATCH 16/20] review suggestions --- pkg/storage/bucket/client.go | 7 +++---- pkg/storage/bucket/s3/config.go | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pkg/storage/bucket/client.go b/pkg/storage/bucket/client.go index 0bd8ec4e92e5..06f8d128f850 100644 --- a/pkg/storage/bucket/client.go +++ b/pkg/storage/bucket/client.go @@ -84,10 +84,9 @@ func (cfg *StorageBackendConfig) RegisterFlagsWithPrefix(prefix string, f *flag. } func (cfg *StorageBackendConfig) Validate() error { - // TODO: enable validation when s3 flags are registered - // if err := cfg.S3.Validate(); err != nil { - // return err - //} + if err := cfg.S3.Validate(); err != nil { + return err + } return nil } diff --git a/pkg/storage/bucket/s3/config.go b/pkg/storage/bucket/s3/config.go index 09c3d9ad3a65..9a91cd7b5071 100644 --- a/pkg/storage/bucket/s3/config.go +++ b/pkg/storage/bucket/s3/config.go @@ -20,7 +20,6 @@ import ( const ( SignatureVersionV4 = "v4" - SignatureVersionV2 = "v2" // SSEKMS config type constant to configure S3 server side encryption using KMS // https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html @@ -32,7 +31,7 @@ const ( ) var ( - supportedSignatureVersions = []string{SignatureVersionV4, SignatureVersionV2} + supportedSignatureVersions = []string{SignatureVersionV4} supportedSSETypes = []string{SSEKMS, SSES3} supportedStorageClasses = s3_service.ObjectStorageClass_Values() supportedBucketLookupTypes = thanosS3BucketLookupTypesValues() From 65a0aeb1cd26fef11ecf7601ecb9f73d4f567cfe Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Fri, 25 Oct 2024 10:37:58 +0530 Subject: [PATCH 17/20] fixup! review suggestions --- pkg/storage/bucket/s3/bucket_client.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/storage/bucket/s3/bucket_client.go b/pkg/storage/bucket/s3/bucket_client.go index 4d16caf10a44..5d904d8e5fe9 100644 --- a/pkg/storage/bucket/s3/bucket_client.go +++ b/pkg/storage/bucket/s3/bucket_client.go @@ -81,8 +81,6 @@ func newS3Config(cfg Config) (s3.Config, error) { TraceConfig: s3.TraceConfig{ Enable: cfg.TraceConfig.Enabled, }, - // Enforce signature version 2 if CLI flag is set - SignatureV2: cfg.SignatureVersion == SignatureVersionV2, STSEndpoint: cfg.STSEndpoint, }, nil } From 3cd8596bcf04937b711d8cf91bbe1e1c3dfd5128 Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Fri, 25 Oct 2024 19:52:47 +0530 Subject: [PATCH 18/20] use retryfunc option --- pkg/storage/chunk/client/aws/s3_thanos_object_client.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/storage/chunk/client/aws/s3_thanos_object_client.go b/pkg/storage/chunk/client/aws/s3_thanos_object_client.go index 1b9ce7859a2e..e00ded920d55 100644 --- a/pkg/storage/chunk/client/aws/s3_thanos_object_client.go +++ b/pkg/storage/chunk/client/aws/s3_thanos_object_client.go @@ -26,8 +26,7 @@ func NewS3ThanosObjectClient(ctx context.Context, cfg bucket.Config, component s } } - o := bucket.NewObjectClientAdapter(b, hedged, logger) - bucket.WithRetryableErrFunc(IsRetryableErr)(o) + o := bucket.NewObjectClientAdapter(b, hedged, logger, bucket.WithRetryableErrFunc(IsRetryableErr)) return o, nil } From cf4efc4e47c8b8fa2ead17752f6f823b8a893a32 Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Fri, 25 Oct 2024 20:08:40 +0530 Subject: [PATCH 19/20] fix config_test --- pkg/storage/bucket/s3/config_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/storage/bucket/s3/config_test.go b/pkg/storage/bucket/s3/config_test.go index b3477d06dba3..c2404ca203cd 100644 --- a/pkg/storage/bucket/s3/config_test.go +++ b/pkg/storage/bucket/s3/config_test.go @@ -87,8 +87,8 @@ func TestConfig_Validate(t *testing.T) { "should fail if invalid storage class is set": { setup: func() *Config { return &Config{ - SignatureVersion: SignatureVersionV2, StorageClass: "foo", + SignatureVersion: SignatureVersionV4, } }, expected: errUnsupportedStorageClass, From 4752c12e5faa69d00bc730827f8487f4fcc17c8b Mon Sep 17 00:00:00 2001 From: Ashwanth Goli Date: Fri, 25 Oct 2024 20:14:12 +0530 Subject: [PATCH 20/20] remove signature version. not configurable anymore, defaults to v4 --- pkg/storage/bucket/s3/config.go | 19 ++++-------- pkg/storage/bucket/s3/config_test.go | 45 ++++++++++------------------ 2 files changed, 21 insertions(+), 43 deletions(-) diff --git a/pkg/storage/bucket/s3/config.go b/pkg/storage/bucket/s3/config.go index 9a91cd7b5071..792f93f752b3 100644 --- a/pkg/storage/bucket/s3/config.go +++ b/pkg/storage/bucket/s3/config.go @@ -19,8 +19,6 @@ import ( ) const ( - SignatureVersionV4 = "v4" - // SSEKMS config type constant to configure S3 server side encryption using KMS // https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html SSEKMS = "SSE-KMS" @@ -31,17 +29,15 @@ const ( ) var ( - supportedSignatureVersions = []string{SignatureVersionV4} supportedSSETypes = []string{SSEKMS, SSES3} supportedStorageClasses = s3_service.ObjectStorageClass_Values() supportedBucketLookupTypes = thanosS3BucketLookupTypesValues() - errUnsupportedSignatureVersion = fmt.Errorf("unsupported signature version (supported values: %s)", strings.Join(supportedSignatureVersions, ", ")) - errUnsupportedSSEType = errors.New("unsupported S3 SSE type") - errUnsupportedStorageClass = fmt.Errorf("unsupported S3 storage class (supported values: %s)", strings.Join(supportedStorageClasses, ", ")) - errInvalidSSEContext = errors.New("invalid S3 SSE encryption context") - errInvalidEndpointPrefix = errors.New("the endpoint must not prefixed with the bucket name") - errInvalidSTSEndpoint = errors.New("sts-endpoint must be a valid url") + errUnsupportedSSEType = errors.New("unsupported S3 SSE type") + errUnsupportedStorageClass = fmt.Errorf("unsupported S3 storage class (supported values: %s)", strings.Join(supportedStorageClasses, ", ")) + errInvalidSSEContext = errors.New("invalid S3 SSE encryption context") + errInvalidEndpointPrefix = errors.New("the endpoint must not prefixed with the bucket name") + errInvalidSTSEndpoint = errors.New("sts-endpoint must be a valid url") ) var thanosS3BucketLookupTypes = map[string]s3.BucketLookupType{ @@ -114,7 +110,6 @@ type Config struct { AccessKeyID string `yaml:"access_key_id"` SessionToken flagext.Secret `yaml:"session_token"` Insecure bool `yaml:"insecure" category:"advanced"` - SignatureVersion string `yaml:"signature_version" category:"advanced"` ListObjectsVersion string `yaml:"list_objects_version" category:"advanced"` BucketLookupType s3.BucketLookupType `yaml:"bucket_lookup_type" category:"advanced"` DualstackEnabled bool `yaml:"dualstack_enabled" category:"experimental"` @@ -143,7 +138,6 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.StringVar(&cfg.Region, prefix+"s3.region", "", "S3 region. If unset, the client will issue a S3 GetBucketLocation API call to autodetect it.") f.StringVar(&cfg.Endpoint, prefix+"s3.endpoint", "", "The S3 bucket endpoint. It could be an AWS S3 endpoint listed at https://docs.aws.amazon.com/general/latest/gr/s3.html or the address of an S3-compatible service in hostname:port format.") f.BoolVar(&cfg.Insecure, prefix+"s3.insecure", false, "If enabled, use http:// for the S3 endpoint instead of https://. This could be useful in local dev/test environments while using an S3-compatible backend storage, like Minio.") - f.StringVar(&cfg.SignatureVersion, prefix+"s3.signature-version", SignatureVersionV4, fmt.Sprintf("The signature version to use for authenticating against S3. Supported values are: %s.", strings.Join(supportedSignatureVersions, ", "))) f.StringVar(&cfg.ListObjectsVersion, prefix+"s3.list-objects-version", "", "Use a specific version of the S3 list object API. Supported values are v1 or v2. Default is unset.") f.StringVar(&cfg.StorageClass, prefix+"s3.storage-class", "", "The S3 storage class to use, not set by default. Details can be found at https://aws.amazon.com/s3/storage-classes/. Supported values are: "+strings.Join(supportedStorageClasses, ", ")) f.BoolVar(&cfg.NativeAWSAuthEnabled, prefix+"s3.native-aws-auth-enabled", false, "If enabled, it will use the default authentication methods of the AWS SDK for go based on known environment variables and known AWS config files.") @@ -159,9 +153,6 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { // Validate config and returns error on failure func (cfg *Config) Validate() error { - if !slices.Contains(supportedSignatureVersions, cfg.SignatureVersion) { - return errUnsupportedSignatureVersion - } if cfg.Endpoint != "" { endpoint := strings.Split(cfg.Endpoint, ".") if cfg.BucketName != "" && endpoint[0] != "" && endpoint[0] == cfg.BucketName { diff --git a/pkg/storage/bucket/s3/config_test.go b/pkg/storage/bucket/s3/config_test.go index c2404ca203cd..078353b68bd8 100644 --- a/pkg/storage/bucket/s3/config_test.go +++ b/pkg/storage/bucket/s3/config_test.go @@ -75,11 +75,10 @@ func TestConfig_Validate(t *testing.T) { sseCfg := &SSEConfig{} flagext.DefaultValues(sseCfg) cfg := &Config{ - Endpoint: "s3.eu-central-1.amazonaws.com", - BucketName: "mimir-block", - SSE: *sseCfg, - SignatureVersion: SignatureVersionV4, - StorageClass: s3_service.StorageClassStandard, + Endpoint: "s3.eu-central-1.amazonaws.com", + BucketName: "mimir-block", + SSE: *sseCfg, + StorageClass: s3_service.StorageClassStandard, } return cfg }, @@ -87,26 +86,17 @@ func TestConfig_Validate(t *testing.T) { "should fail if invalid storage class is set": { setup: func() *Config { return &Config{ - StorageClass: "foo", - SignatureVersion: SignatureVersionV4, + StorageClass: "foo", } }, expected: errUnsupportedStorageClass, }, - "should pass if valid storage signature version is set": { - setup: func() *Config { - return &Config{ - SignatureVersion: SignatureVersionV4, StorageClass: s3_service.StorageClassStandard, - } - }, - }, "should fail on invalid endpoint prefix": { setup: func() *Config { return &Config{ - Endpoint: "mimir-blocks.s3.eu-central-1.amazonaws.com", - BucketName: "mimir-blocks", - SignatureVersion: SignatureVersionV4, - StorageClass: s3_service.StorageClassStandard, + Endpoint: "mimir-blocks.s3.eu-central-1.amazonaws.com", + BucketName: "mimir-blocks", + StorageClass: s3_service.StorageClassStandard, } }, expected: errInvalidEndpointPrefix, @@ -114,7 +104,6 @@ func TestConfig_Validate(t *testing.T) { "should pass if native_aws_auth_enabled is set": { setup: func() *Config { return &Config{ - SignatureVersion: SignatureVersionV4, NativeAWSAuthEnabled: true, } }, @@ -124,11 +113,10 @@ func TestConfig_Validate(t *testing.T) { sseCfg := &SSEConfig{} flagext.DefaultValues(sseCfg) cfg := &Config{ - BucketName: "mimir-block", - SSE: *sseCfg, - SignatureVersion: SignatureVersionV4, - StorageClass: s3_service.StorageClassStandard, - STSEndpoint: "https://sts.eu-central-1.amazonaws.com", + BucketName: "mimir-block", + SSE: *sseCfg, + StorageClass: s3_service.StorageClassStandard, + STSEndpoint: "https://sts.eu-central-1.amazonaws.com", } return cfg }, @@ -138,11 +126,10 @@ func TestConfig_Validate(t *testing.T) { sseCfg := &SSEConfig{} flagext.DefaultValues(sseCfg) cfg := &Config{ - BucketName: "mimir-block", - SSE: *sseCfg, - SignatureVersion: SignatureVersionV4, - StorageClass: s3_service.StorageClassStandard, - STSEndpoint: "sts.eu-central-1.amazonaws.com", + BucketName: "mimir-block", + SSE: *sseCfg, + StorageClass: s3_service.StorageClassStandard, + STSEndpoint: "sts.eu-central-1.amazonaws.com", } return cfg },