Skip to content

Commit

Permalink
feat(storage): AWS backend using thanos.io/objstore (#11221)
Browse files Browse the repository at this point in the history
Co-authored-by: Ashwanth Goli <[email protected]>
(cherry picked from commit b872246)
  • Loading branch information
JoaoBraveCoding authored and grafana-delivery-bot[bot] committed Oct 28, 2024
1 parent 51c42e8 commit a09f216
Show file tree
Hide file tree
Showing 8 changed files with 344 additions and 187 deletions.
7 changes: 3 additions & 4 deletions pkg/storage/bucket/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,9 @@ func (cfg *StorageBackendConfig) RegisterFlagsWithPrefix(prefix string, f *flag.
}

func (cfg *StorageBackendConfig) Validate() error {
// TODO: enable validation when s3 flags are registered
// if err := cfg.S3.Validate(); err != nil {
// return err
//}
if err := cfg.S3.Validate(); err != nil {
return err
}

return nil
}
Expand Down
42 changes: 32 additions & 10 deletions pkg/storage/bucket/s3/bucket_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"github.com/go-kit/log"
"github.com/prometheus/common/model"
"github.com/thanos-io/objstore"
"github.com/thanos-io/objstore/exthttp"
"github.com/thanos-io/objstore/providers/s3"
)

Expand Down Expand Up @@ -38,17 +39,28 @@ func newS3Config(cfg Config) (s3.Config, error) {
return s3.Config{}, err
}

putUserMetadata := map[string]string{}

if cfg.StorageClass != "" {
putUserMetadata[awsStorageClassHeader] = cfg.StorageClass
}

return s3.Config{
Bucket: cfg.BucketName,
Endpoint: cfg.Endpoint,
Region: cfg.Region,
AccessKey: cfg.AccessKeyID,
SecretKey: cfg.SecretAccessKey.String(),
SessionToken: cfg.SessionToken.String(),
Insecure: cfg.Insecure,
DisableDualstack: cfg.DisableDualstack,
SSEConfig: sseCfg,
PutUserMetadata: map[string]string{awsStorageClassHeader: cfg.StorageClass},
Bucket: cfg.BucketName,
Endpoint: cfg.Endpoint,
Region: cfg.Region,
AccessKey: cfg.AccessKeyID,
SecretKey: cfg.SecretAccessKey.String(),
SessionToken: cfg.SessionToken.String(),
Insecure: cfg.Insecure,
PutUserMetadata: putUserMetadata,
SendContentMd5: cfg.SendContentMd5,
SSEConfig: sseCfg,
DisableDualstack: !cfg.DualstackEnabled,
ListObjectsVersion: cfg.ListObjectsVersion,
BucketLookupType: cfg.BucketLookupType,
AWSSDKAuth: cfg.NativeAWSAuthEnabled,
PartSize: cfg.PartSize,
HTTPConfig: s3.HTTPConfig{
IdleConnTimeout: model.Duration(cfg.HTTP.IdleConnTimeout),
ResponseHeaderTimeout: model.Duration(cfg.HTTP.ResponseHeaderTimeout),
Expand All @@ -59,6 +71,16 @@ func newS3Config(cfg Config) (s3.Config, error) {
MaxIdleConnsPerHost: cfg.HTTP.MaxIdleConnsPerHost,
MaxConnsPerHost: cfg.HTTP.MaxConnsPerHost,
Transport: cfg.HTTP.Transport,
TLSConfig: exthttp.TLSConfig{
CAFile: cfg.HTTP.TLSConfig.CAPath,
CertFile: cfg.HTTP.TLSConfig.CertPath,
KeyFile: cfg.HTTP.TLSConfig.KeyPath,
ServerName: cfg.HTTP.TLSConfig.ServerName,
},
},
TraceConfig: s3.TraceConfig{
Enable: cfg.TraceConfig.Enabled,
},
STSEndpoint: cfg.STSEndpoint,
}, nil
}
164 changes: 131 additions & 33 deletions pkg/storage/bucket/s3/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,20 @@ import (
"flag"
"fmt"
"net/http"
"slices"
"strings"
"time"

s3_service "github.com/aws/aws-sdk-go/service/s3"
"github.com/grafana/dskit/flagext"
"github.com/minio/minio-go/v7/pkg/encrypt"
"github.com/pkg/errors"
"github.com/thanos-io/objstore/providers/s3"

bucket_http "github.com/grafana/loki/v3/pkg/storage/bucket/http"
"github.com/grafana/loki/v3/pkg/storage/common/aws"
"github.com/grafana/loki/v3/pkg/util"
)

const (
// Signature Version 2 is being turned off (deprecated) in Amazon S3. Amazon S3 will then only accept API requests that are signed using Signature Version 4.
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingAWSSDK.html#UsingAWSSDK-sig2-deprecation
SignatureVersionV4 = "v4"

// SSEKMS config type constant to configure S3 server side encryption using KMS
// https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html
SSEKMS = "SSE-KMS"
Expand All @@ -32,41 +29,99 @@ const (
)

var (
supportedSignatureVersions = []string{SignatureVersionV4}
supportedSSETypes = []string{SSEKMS, SSES3}
errUnsupportedSignatureVersion = errors.New("unsupported signature version")
errUnsupportedSSEType = errors.New("unsupported S3 SSE type")
errInvalidSSEContext = errors.New("invalid S3 SSE encryption context")
supportedSSETypes = []string{SSEKMS, SSES3}
supportedStorageClasses = s3_service.ObjectStorageClass_Values()
supportedBucketLookupTypes = thanosS3BucketLookupTypesValues()

errUnsupportedSSEType = errors.New("unsupported S3 SSE type")
errUnsupportedStorageClass = fmt.Errorf("unsupported S3 storage class (supported values: %s)", strings.Join(supportedStorageClasses, ", "))
errInvalidSSEContext = errors.New("invalid S3 SSE encryption context")
errInvalidEndpointPrefix = errors.New("the endpoint must not prefixed with the bucket name")
errInvalidSTSEndpoint = errors.New("sts-endpoint must be a valid url")
)

var thanosS3BucketLookupTypes = map[string]s3.BucketLookupType{
s3.AutoLookup.String(): s3.AutoLookup,
s3.VirtualHostLookup.String(): s3.VirtualHostLookup,
s3.PathLookup.String(): s3.PathLookup,
}

func thanosS3BucketLookupTypesValues() (list []string) {
for k := range thanosS3BucketLookupTypes {
list = append(list, k)
}
// sort the list for consistent output in help, where it's used
slices.Sort(list)
return list
}

// HTTPConfig stores the http.Transport configuration for the s3 minio client.
type HTTPConfig struct {
bucket_http.Config `yaml:",inline"`
IdleConnTimeout time.Duration `yaml:"idle_conn_timeout" category:"advanced"`
ResponseHeaderTimeout time.Duration `yaml:"response_header_timeout" category:"advanced"`
InsecureSkipVerify bool `yaml:"insecure_skip_verify" category:"advanced"`
TLSHandshakeTimeout time.Duration `yaml:"tls_handshake_timeout" category:"advanced"`
ExpectContinueTimeout time.Duration `yaml:"expect_continue_timeout" category:"advanced"`
MaxIdleConns int `yaml:"max_idle_connections" category:"advanced"`
MaxIdleConnsPerHost int `yaml:"max_idle_connections_per_host" category:"advanced"`
MaxConnsPerHost int `yaml:"max_connections_per_host" category:"advanced"`

// Allow upstream callers to inject a round tripper
Transport http.RoundTripper `yaml:"-"`

TLSConfig TLSConfig `yaml:",inline"`
}

// TLSConfig configures the options for TLS connections.
type TLSConfig struct {
CAPath string `yaml:"tls_ca_path" category:"advanced"`
CertPath string `yaml:"tls_cert_path" category:"advanced"`
KeyPath string `yaml:"tls_key_path" category:"advanced"`
ServerName string `yaml:"tls_server_name" category:"advanced"`
}

// RegisterFlagsWithPrefix registers the flags for s3 storage with the provided prefix
func (cfg *HTTPConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
cfg.Config.RegisterFlagsWithPrefix(prefix+"s3.", f)
f.DurationVar(&cfg.IdleConnTimeout, prefix+"s3.http.idle-conn-timeout", 90*time.Second, "The time an idle connection will remain idle before closing.")
f.DurationVar(&cfg.ResponseHeaderTimeout, prefix+"s3.http.response-header-timeout", 2*time.Minute, "The amount of time the client will wait for a servers response headers.")
f.BoolVar(&cfg.InsecureSkipVerify, prefix+"s3.http.insecure-skip-verify", false, "If the client connects to S3 via HTTPS and this option is enabled, the client will accept any certificate and hostname.")
f.DurationVar(&cfg.TLSHandshakeTimeout, prefix+"s3.tls-handshake-timeout", 10*time.Second, "Maximum time to wait for a TLS handshake. 0 means no limit.")
f.DurationVar(&cfg.ExpectContinueTimeout, prefix+"s3.expect-continue-timeout", 1*time.Second, "The time to wait for a server's first response headers after fully writing the request headers if the request has an Expect header. 0 to send the request body immediately.")
f.IntVar(&cfg.MaxIdleConns, prefix+"s3.max-idle-connections", 100, "Maximum number of idle (keep-alive) connections across all hosts. 0 means no limit.")
f.IntVar(&cfg.MaxIdleConnsPerHost, prefix+"s3.max-idle-connections-per-host", 100, "Maximum number of idle (keep-alive) connections to keep per-host. If 0, a built-in default value is used.")
f.IntVar(&cfg.MaxConnsPerHost, prefix+"s3.max-connections-per-host", 0, "Maximum number of connections per host. 0 means no limit.")
cfg.TLSConfig.RegisterFlagsWithPrefix(prefix, f)
}

// RegisterFlagsWithPrefix registers the flags for s3 storage with the provided prefix.
func (cfg *TLSConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
f.StringVar(&cfg.CAPath, prefix+"s3.http.tls-ca-path", "", "Path to the CA certificates to validate server certificate against. If not set, the host's root CA certificates are used.")
f.StringVar(&cfg.CertPath, prefix+"s3.http.tls-cert-path", "", "Path to the client certificate, which will be used for authenticating with the server. Also requires the key path to be configured.")
f.StringVar(&cfg.KeyPath, prefix+"s3.http.tls-key-path", "", "Path to the key for the client certificate. Also requires the client certificate to be configured.")
f.StringVar(&cfg.ServerName, prefix+"s3.http.tls-server-name", "", "Override the expected name on the server certificate.")
}

// Config holds the config options for an S3 backend
type Config struct {
Endpoint string `yaml:"endpoint"`
Region string `yaml:"region"`
BucketName string `yaml:"bucket_name"`
SecretAccessKey flagext.Secret `yaml:"secret_access_key"`
SessionToken flagext.Secret `yaml:"session_token"`
AccessKeyID string `yaml:"access_key_id"`
Insecure bool `yaml:"insecure"`
DisableDualstack bool `yaml:"disable_dualstack"`
SignatureVersion string `yaml:"signature_version"`
StorageClass string `yaml:"storage_class"`
Endpoint string `yaml:"endpoint"`
Region string `yaml:"region"`
BucketName string `yaml:"bucket_name"`
SecretAccessKey flagext.Secret `yaml:"secret_access_key"`
AccessKeyID string `yaml:"access_key_id"`
SessionToken flagext.Secret `yaml:"session_token"`
Insecure bool `yaml:"insecure" category:"advanced"`
ListObjectsVersion string `yaml:"list_objects_version" category:"advanced"`
BucketLookupType s3.BucketLookupType `yaml:"bucket_lookup_type" category:"advanced"`
DualstackEnabled bool `yaml:"dualstack_enabled" category:"experimental"`
StorageClass string `yaml:"storage_class" category:"experimental"`
NativeAWSAuthEnabled bool `yaml:"native_aws_auth_enabled" category:"experimental"`
PartSize uint64 `yaml:"part_size" category:"experimental"`
SendContentMd5 bool `yaml:"send_content_md5" category:"experimental"`
STSEndpoint string `yaml:"sts_endpoint"`

SSE SSEConfig `yaml:"sse"`
HTTP HTTPConfig `yaml:"http"`
SSE SSEConfig `yaml:"sse"`
HTTP HTTPConfig `yaml:"http"`
TraceConfig TraceConfig `yaml:"trace"`
}

// RegisterFlags registers the flags for s3 storage with the provided prefix
Expand All @@ -83,21 +138,32 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
f.StringVar(&cfg.Region, prefix+"s3.region", "", "S3 region. If unset, the client will issue a S3 GetBucketLocation API call to autodetect it.")
f.StringVar(&cfg.Endpoint, prefix+"s3.endpoint", "", "The S3 bucket endpoint. It could be an AWS S3 endpoint listed at https://docs.aws.amazon.com/general/latest/gr/s3.html or the address of an S3-compatible service in hostname:port format.")
f.BoolVar(&cfg.Insecure, prefix+"s3.insecure", false, "If enabled, use http:// for the S3 endpoint instead of https://. This could be useful in local dev/test environments while using an S3-compatible backend storage, like Minio.")
f.BoolVar(&cfg.DisableDualstack, prefix+"s3.disable-dualstack", false, "Disable forcing S3 dualstack endpoint usage.")
f.StringVar(&cfg.SignatureVersion, prefix+"s3.signature-version", SignatureVersionV4, fmt.Sprintf("The signature version to use for authenticating against S3. Supported values are: %s.", strings.Join(supportedSignatureVersions, ", ")))
f.StringVar(&cfg.StorageClass, prefix+"s3.storage-class", aws.StorageClassStandard, "The S3 storage class to use. Details can be found at https://aws.amazon.com/s3/storage-classes/.")
f.StringVar(&cfg.ListObjectsVersion, prefix+"s3.list-objects-version", "", "Use a specific version of the S3 list object API. Supported values are v1 or v2. Default is unset.")
f.StringVar(&cfg.StorageClass, prefix+"s3.storage-class", "", "The S3 storage class to use, not set by default. Details can be found at https://aws.amazon.com/s3/storage-classes/. Supported values are: "+strings.Join(supportedStorageClasses, ", "))
f.BoolVar(&cfg.NativeAWSAuthEnabled, prefix+"s3.native-aws-auth-enabled", false, "If enabled, it will use the default authentication methods of the AWS SDK for go based on known environment variables and known AWS config files.")
f.Uint64Var(&cfg.PartSize, prefix+"s3.part-size", 0, "The minimum file size in bytes used for multipart uploads. If 0, the value is optimally computed for each object.")
f.BoolVar(&cfg.SendContentMd5, prefix+"s3.send-content-md5", false, "If enabled, a Content-MD5 header is sent with S3 Put Object requests. Consumes more resources to compute the MD5, but may improve compatibility with object storage services that do not support checksums.")
f.Var(newBucketLookupTypeValue(s3.AutoLookup, &cfg.BucketLookupType), prefix+"s3.bucket-lookup-type", fmt.Sprintf("Bucket lookup style type, used to access bucket in S3-compatible service. Default is auto. Supported values are: %s.", strings.Join(supportedBucketLookupTypes, ", ")))
f.BoolVar(&cfg.DualstackEnabled, prefix+"s3.dualstack-enabled", true, "When enabled, direct all AWS S3 requests to the dual-stack IPv4/IPv6 endpoint for the configured region.")
f.StringVar(&cfg.STSEndpoint, prefix+"s3.sts-endpoint", "", "Accessing S3 resources using temporary, secure credentials provided by AWS Security Token Service.")
cfg.SSE.RegisterFlagsWithPrefix(prefix+"s3.sse.", f)
cfg.HTTP.RegisterFlagsWithPrefix(prefix, f)
cfg.TraceConfig.RegisterFlagsWithPrefix(prefix+"s3.trace.", f)
}

// Validate config and returns error on failure
func (cfg *Config) Validate() error {
if !util.StringsContain(supportedSignatureVersions, cfg.SignatureVersion) {
return errUnsupportedSignatureVersion
if cfg.Endpoint != "" {
endpoint := strings.Split(cfg.Endpoint, ".")
if cfg.BucketName != "" && endpoint[0] != "" && endpoint[0] == cfg.BucketName {
return errInvalidEndpointPrefix
}
}

if err := aws.ValidateStorageClass(cfg.StorageClass); err != nil {
return err
if cfg.STSEndpoint != "" && !util.IsValidURL(cfg.STSEndpoint) {
return errInvalidSTSEndpoint
}
if !slices.Contains(supportedStorageClasses, cfg.StorageClass) && cfg.StorageClass != "" {
return errUnsupportedStorageClass
}

return cfg.SSE.Validate()
Expand Down Expand Up @@ -191,3 +257,35 @@ func parseKMSEncryptionContext(data string) (map[string]string, error) {
err := errors.Wrap(json.Unmarshal([]byte(data), &decoded), "unable to parse KMS encryption context")
return decoded, err
}

type TraceConfig struct {
Enabled bool `yaml:"enabled" category:"advanced"`
}

func (cfg *TraceConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
f.BoolVar(&cfg.Enabled, prefix+"enabled", false, "When enabled, low-level S3 HTTP operation information is logged at the debug level.")
}

// bucketLookupTypeValue is an adapter between s3.BucketLookupType and flag.Value.
type bucketLookupTypeValue s3.BucketLookupType

func newBucketLookupTypeValue(value s3.BucketLookupType, p *s3.BucketLookupType) *bucketLookupTypeValue {
*p = value
return (*bucketLookupTypeValue)(p)
}

func (v *bucketLookupTypeValue) String() string {
if v == nil {
return s3.AutoLookup.String()
}
return s3.BucketLookupType(*v).String()
}

func (v *bucketLookupTypeValue) Set(s string) error {
t, ok := thanosS3BucketLookupTypes[s]
if !ok {
return fmt.Errorf("unsupported bucket lookup type: %s", s)
}
*v = bucketLookupTypeValue(t)
return nil
}
Loading

0 comments on commit a09f216

Please sign in to comment.