From 3a508a5b33bfb5bd8547ad9cebd741bb713d6e04 Mon Sep 17 00:00:00 2001 From: Dan Salmon Date: Thu, 2 Jan 2025 21:22:51 -0600 Subject: [PATCH] feat: add wasabi support (#379) --- cmd/regioncheck/digitalocean.go | 57 +++++++++++++ cmd/regioncheck/dreamhost.go | 5 ++ cmd/regioncheck/linode.go | 66 +++++++++++++++ cmd/regioncheck/regioncheck.go | 141 +------------------------------- cmd/regioncheck/scaleway.go | 32 ++++++++ cmd/regioncheck/wasabi.go | 36 ++++++++ provider/providers.go | 46 +++++++++++ provider/providers_test.go | 10 +++ provider/wasabi.go | 117 ++++++++++++++++++++++++++ provider/wasabi_test.go | 30 +++++++ 10 files changed, 400 insertions(+), 140 deletions(-) create mode 100644 cmd/regioncheck/digitalocean.go create mode 100644 cmd/regioncheck/dreamhost.go create mode 100644 cmd/regioncheck/linode.go create mode 100644 cmd/regioncheck/scaleway.go create mode 100644 cmd/regioncheck/wasabi.go create mode 100644 provider/wasabi.go create mode 100644 provider/wasabi_test.go diff --git a/cmd/regioncheck/digitalocean.go b/cmd/regioncheck/digitalocean.go new file mode 100644 index 0000000..76c8e24 --- /dev/null +++ b/cmd/regioncheck/digitalocean.go @@ -0,0 +1,57 @@ +package main + +import ( + "fmt" + "github.com/PuerkitoBio/goquery" + "net/http" + "strings" +) + +// GetRegionsDO fetches regions from the DigitalOcean docs HTML page. +func GetRegionsDO() ([]string, error) { + requestURL := "https://docs.digitalocean.com/platform/regional-availability/" + res, err := http.Get(requestURL) + if err != nil { + return nil, err + } + defer res.Body.Close() + if res.StatusCode != 200 { + return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status) + } + + doc, err := goquery.NewDocumentFromReader(res.Body) + if err != nil { + return nil, err + } + + regions := []string{} + doc.Find("h2#other-digitalocean-products + table thead tr th").Each(func(_ int, t *goquery.Selection) { + regions = append(regions, t.Text()) + }) + + spacesSupported := []bool{} + doc.Find("h2#other-digitalocean-products + table tbody tr").Each(func(_ int, t *goquery.Selection) { + // For each row, check the first cell for a value of "Spaces" + rowHeader := t.Find("td").First().Text() + if rowHeader == "Spaces" { + // For each cell in the "Spaces" row, check if the contents are not empty - meaning Spaces is supported + t.Find("td").Each(func(_ int, v *goquery.Selection) { + supported := v.Text() != "" + spacesSupported = append(spacesSupported, supported) + }) + } + }) + + supportedRegions := []string{} + for i := 0; i < len(regions); i++ { + if regions[i] == "Product" { + continue + } + if spacesSupported[i] { + supportedRegions = append(supportedRegions, strings.ToLower(regions[i])) + } + } + + // Return slice of region names + return supportedRegions, nil +} diff --git a/cmd/regioncheck/dreamhost.go b/cmd/regioncheck/dreamhost.go new file mode 100644 index 0000000..b258e73 --- /dev/null +++ b/cmd/regioncheck/dreamhost.go @@ -0,0 +1,5 @@ +package main + +func GetRegionsDreamhost() ([]string, error) { + return []string{"us-east-1"}, nil +} diff --git a/cmd/regioncheck/linode.go b/cmd/regioncheck/linode.go new file mode 100644 index 0000000..3d43f2b --- /dev/null +++ b/cmd/regioncheck/linode.go @@ -0,0 +1,66 @@ +package main + +import ( + "compress/gzip" + "fmt" + "github.com/PuerkitoBio/goquery" + "io" + "net/http" + "strings" +) + +// GetRegionsLinode fetches region names from Linode docs HTML page. Linode also provides this info via +// unauthenticated API (https://api.linode.com/v4/regions) but the region names do not include the trailing digit "-1". +func GetRegionsLinode() ([]string, error) { + // Akamai docs return a strange HTTP2 internal error if you don't request HTTP/2 with compression + req, err := http.NewRequest(http.MethodGet, "https://techdocs.akamai.com/cloud-computing/docs/object-storage", nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:128.0) Gecko/20100101 Firefox/128.0") + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8") + req.Header.Set("Accept-Language", "en-US,en;q=0.5") + req.Header.Set("Accept-Encoding", "gzip, deflate, br, zstd") + req.Header.Set("Connection", "keep-alive") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("status code error: %d %s", resp.StatusCode, resp.Status) + } + + // Check that the server actually sent compressed data + var reader io.ReadCloser + switch resp.Header.Get("Content-Encoding") { + case "gzip": + reader, err = gzip.NewReader(resp.Body) + if err != nil { + return nil, err + } + defer reader.Close() + default: + reader = resp.Body + } + + buf := new(strings.Builder) + _, err = io.Copy(buf, reader) //nolint:gosec + if err != nil { + return nil, err + } + + doc, err := goquery.NewDocumentFromReader(strings.NewReader(buf.String())) + if err != nil { + return nil, err + } + + regions := []string{} + doc.Find(".rdmd-table:nth-of-type(1) tbody tr td:nth-of-type(2)").Each(func(_ int, t *goquery.Selection) { + regions = append(regions, t.Text()) + }) + + return regions, nil +} diff --git a/cmd/regioncheck/regioncheck.go b/cmd/regioncheck/regioncheck.go index ac10a34..85aaf62 100644 --- a/cmd/regioncheck/regioncheck.go +++ b/cmd/regioncheck/regioncheck.go @@ -1,17 +1,10 @@ package main import ( - "compress/gzip" - "fmt" - "github.com/PuerkitoBio/goquery" "github.com/sa7mon/s3scanner/provider" - "io" "log" - "net/http" "os" - "regexp" "sort" - "strings" "sync" ) @@ -28,139 +21,6 @@ func eq(f []string, s []string) bool { return true } -// GetRegionsDO fetches regions from the DigitalOcean docs HTML page. -func GetRegionsDO() ([]string, error) { - requestURL := "https://docs.digitalocean.com/platform/regional-availability/" - res, err := http.Get(requestURL) - if err != nil { - return nil, err - } - defer res.Body.Close() - if res.StatusCode != 200 { - return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status) - } - - doc, err := goquery.NewDocumentFromReader(res.Body) - if err != nil { - return nil, err - } - - regions := []string{} - doc.Find("h2#other-digitalocean-products + table thead tr th").Each(func(_ int, t *goquery.Selection) { - regions = append(regions, t.Text()) - }) - - spacesSupported := []bool{} - doc.Find("h2#other-digitalocean-products + table tbody tr").Each(func(_ int, t *goquery.Selection) { - // For each row, check the first cell for a value of "Spaces" - rowHeader := t.Find("td").First().Text() - if rowHeader == "Spaces" { - // For each cell in the "Spaces" row, check if the contents are not empty - meaning Spaces is supported - t.Find("td").Each(func(_ int, v *goquery.Selection) { - supported := v.Text() != "" - spacesSupported = append(spacesSupported, supported) - }) - } - }) - - supportedRegions := []string{} - for i := 0; i < len(regions); i++ { - if regions[i] == "Product" { - continue - } - if spacesSupported[i] { - supportedRegions = append(supportedRegions, strings.ToLower(regions[i])) - } - } - - // Return slice of region names - return supportedRegions, nil -} - -func GetRegionsDreamhost() ([]string, error) { - return []string{"us-east-1"}, nil -} - -// GetRegionsLinode fetches region names from Linode docs HTML page. Linode also provides this info via -// unauthenticated API (https://api.linode.com/v4/regions) but the region names do not include the trailing digit "-1". -func GetRegionsLinode() ([]string, error) { - // Akamai docs return a strange HTTP2 internal error if you don't request HTTP/2 with compression - req, err := http.NewRequest(http.MethodGet, "https://techdocs.akamai.com/cloud-computing/docs/object-storage", nil) - if err != nil { - return nil, err - } - req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:128.0) Gecko/20100101 Firefox/128.0") - req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8") - req.Header.Set("Accept-Language", "en-US,en;q=0.5") - req.Header.Set("Accept-Encoding", "gzip, deflate, br, zstd") - req.Header.Set("Connection", "keep-alive") - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - if resp.StatusCode != 200 { - return nil, fmt.Errorf("status code error: %d %s", resp.StatusCode, resp.Status) - } - - // Check that the server actually sent compressed data - var reader io.ReadCloser - switch resp.Header.Get("Content-Encoding") { - case "gzip": - reader, err = gzip.NewReader(resp.Body) - if err != nil { - return nil, err - } - defer reader.Close() - default: - reader = resp.Body - } - - buf := new(strings.Builder) - _, err = io.Copy(buf, reader) //nolint:gosec - if err != nil { - return nil, err - } - - doc, err := goquery.NewDocumentFromReader(strings.NewReader(buf.String())) - if err != nil { - return nil, err - } - - regions := []string{} - doc.Find(".rdmd-table:nth-of-type(1) tbody tr td:nth-of-type(2)").Each(func(_ int, t *goquery.Selection) { - regions = append(regions, t.Text()) - }) - - return regions, nil -} - -func GetRegionsScaleway() ([]string, error) { - var re = regexp.MustCompile(`Region: \x60(.+)\x60`) - requestURL := "https://raw.githubusercontent.com/scaleway/docs-content/main/storage/object/how-to/create-a-bucket.mdx" - res, err := http.Get(requestURL) - if err != nil { - return nil, err - } - defer res.Body.Close() - if res.StatusCode != 200 { - return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status) - } - - bytes, bErr := io.ReadAll(res.Body) - if bErr != nil { - return nil, bErr - } - - var regions []string - for _, a := range re.FindAllSubmatch(bytes, -1) { - regions = append(regions, string(a[1])) - } - return regions, nil -} - func main() { results := map[string][]string{} errors := map[string]error{} @@ -170,6 +30,7 @@ func main() { "dreamhost": GetRegionsDreamhost, "linode": GetRegionsLinode, "scaleway": GetRegionsScaleway, + "wasabi": getRegionsWasabi, } wg := sync.WaitGroup{} diff --git a/cmd/regioncheck/scaleway.go b/cmd/regioncheck/scaleway.go new file mode 100644 index 0000000..cd4ba72 --- /dev/null +++ b/cmd/regioncheck/scaleway.go @@ -0,0 +1,32 @@ +package main + +import ( + "fmt" + "io" + "net/http" + "regexp" +) + +func GetRegionsScaleway() ([]string, error) { + var re = regexp.MustCompile(`Region: \x60(.+)\x60`) + requestURL := "https://raw.githubusercontent.com/scaleway/docs-content/main/storage/object/how-to/create-a-bucket.mdx" + res, err := http.Get(requestURL) + if err != nil { + return nil, err + } + defer res.Body.Close() + if res.StatusCode != 200 { + return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status) + } + + bytes, bErr := io.ReadAll(res.Body) + if bErr != nil { + return nil, bErr + } + + var regions []string + for _, a := range re.FindAllSubmatch(bytes, -1) { + regions = append(regions, string(a[1])) + } + return regions, nil +} diff --git a/cmd/regioncheck/wasabi.go b/cmd/regioncheck/wasabi.go new file mode 100644 index 0000000..5b4aba9 --- /dev/null +++ b/cmd/regioncheck/wasabi.go @@ -0,0 +1,36 @@ +package main + +import ( + "fmt" + "github.com/PuerkitoBio/goquery" + "net/http" + "strings" +) + +func getRegionsWasabi() ([]string, error) { + requestURL := "https://wasabi.com/company/storage-regions" + res, err := http.Get(requestURL) + if err != nil { + return nil, err + } + defer res.Body.Close() + if res.StatusCode != 200 { + return nil, fmt.Errorf("status code error: %d %s", res.StatusCode, res.Status) + } + + doc, err := goquery.NewDocumentFromReader(res.Body) + if err != nil { + return nil, err + } + + regions := []string{} + doc.Find("main h3~p:has(b)").Contents().Each(func(_ int, t *goquery.Selection) { + if goquery.NodeName(t) == "#text" { + rangeNames := strings.Split(t.Text(), "&") + for _, r := range rangeNames { + regions = append(regions, strings.TrimSpace(r)) + } + } + }) + return regions, nil +} diff --git a/provider/providers.go b/provider/providers.go index efb2452..fe42a8a 100644 --- a/provider/providers.go +++ b/provider/providers.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "net/http" + "net/url" "time" "github.com/aws/aws-sdk-go-v2/aws" @@ -50,6 +51,9 @@ var ProviderRegions = map[string][]string{ "it-mil-1", "jp-osa-1", "nl-ams-1", "se-sto-1", "us-east-1", "us-iad-1", "us-lax-1", "us-mia-1", "us-ord-1", "us-sea-1", "us-southeast-1"}, "scaleway": {"fr-par", "nl-ams", "pl-waw"}, + "wasabi": {"us-west-1", "us-east-1", "us-east-2", "us-central-1", "ca-central-1", "eu-west-1", "eu-west-2", + "eu-west-3", "eu-central-1", "eu-central-2", "eu-south-1", "ap-northeast-1", "ap-northeast-2", "ap-southeast-2", + "ap-southeast-1"}, } func NewProvider(name string) (StorageProvider, error) { @@ -70,6 +74,8 @@ func NewProvider(name string) (StorageProvider, error) { provider, err = NewProviderLinode() case "scaleway": provider, err = NewProviderScaleway() + case "wasabi": + provider, err = NewProviderWasabi() default: err = fmt.Errorf("unknown provider: %s", name) } @@ -274,3 +280,43 @@ func bucketExists(clients *clientmap.ClientMap, b *bucket.Bucket) (bool, string, } return false, "", nil } + +// bucketExists301 takes a bucket name and checks if it exists. It assumes the server will respond with a 301 status +// and `x-amz-bucket-region` header pointing to the correct region if an incorrect region is specified. +func bucketExists301(client *s3.Client, region string, b *bucket.Bucket) (bool, string, error) { + logFields := log.Fields{ + "bucket_name": b.Name, + "region": region, + "method": "providers.bucketExists301()", + } + + bucketURL, err := url.JoinPath(*client.Options().BaseEndpoint, b.Name) + if err != nil { + return false, "", logErr(logFields, err) + } + req, reqErr := http.NewRequest("HEAD", bucketURL, nil) + if reqErr != nil { + return false, "", logErr(logFields, reqErr) + } + res, resErr := client.Options().HTTPClient.Do(req) + if resErr != nil { + return false, "", logErr(logFields, resErr) + } + + switch res.StatusCode { + case 200: + return true, region, nil + case 301: + return true, res.Header.Get("x-amz-bucket-region"), nil + case 403: + return true, region, nil + case 404: + return false, "", nil + } + return false, "", logErr(logFields, fmt.Errorf("unexpected status code: %d", res.StatusCode)) +} + +func logErr(fields log.Fields, err error) error { + log.WithFields(fields).Error(err.Error()) + return err +} diff --git a/provider/providers_test.go b/provider/providers_test.go index 27efbf1..0cc452a 100644 --- a/provider/providers_test.go +++ b/provider/providers_test.go @@ -58,6 +58,12 @@ func TestMain(m *testing.M) { } providers["scaleway"] = provider + provider, err = NewProviderWasabi() + if err != nil { + panic(err) + } + providers["wasabi"] = provider + code := m.Run() os.Exit(code) } @@ -120,6 +126,7 @@ func Test_StorageProvider_Statics(t *testing.T) { {name: "GCP", provider: providers["gcp"], insecure: false, addressStyle: PathStyle}, {name: "Linode", provider: providers["linode"], insecure: false, addressStyle: VirtualHostStyle}, {name: "Scaleway", provider: providers["scaleway"], insecure: false, addressStyle: PathStyle}, + {name: "Wasabi", provider: providers["wasabi"], insecure: false, addressStyle: PathStyle}, } for _, tt := range tests { @@ -145,6 +152,7 @@ func Test_StorageProvider_BucketExists(t *testing.T) { {name: "GCP", provider: providers["gcp"], goodBucket: bucket.NewBucket("books"), badBucket: bucket.NewBucket("s3scanner-no-exist")}, {name: "Linode", provider: providers["linode"], goodBucket: bucket.NewBucket("vantage"), badBucket: bucket.NewBucket("s3scanner-no-exist")}, {name: "Scaleway", provider: providers["scaleway"], goodBucket: bucket.NewBucket("2017"), badBucket: bucket.NewBucket("s3scanner-no-exist")}, + {name: "Wasabi", provider: providers["wasabi"], goodBucket: bucket.NewBucket("acp"), badBucket: bucket.NewBucket("s3scanner-no-exist")}, } for _, tt := range tests { @@ -177,6 +185,7 @@ func Test_StorageProvider_Enum(t *testing.T) { {name: "GCP", provider: providers["gcp"], goodBucket: bucket.NewBucket("assets"), numObjects: 3}, {name: "Linode", provider: providers["linode"], goodBucket: bucket.NewBucket("vantage"), numObjects: 50}, {name: "Scaleway", provider: providers["scaleway"], goodBucket: bucket.NewBucket("3d-builder"), numObjects: 1}, + {name: "Wasabi", provider: providers["wasabi"], goodBucket: bucket.NewBucket("animals"), numObjects: 102}, } for _, tt := range tests { @@ -210,6 +219,7 @@ func Test_StorageProvider_Scan(t *testing.T) { {name: "GCP", provider: providers["gcp"], bucket: bucket.NewBucket("hatrioua"), permissions: "AuthUsers: [] | AllUsers: []"}, {name: "Linode", provider: providers["linode"], bucket: bucket.NewBucket("vantage"), permissions: "AuthUsers: [] | AllUsers: [READ]"}, {name: "Scaleway", provider: providers["scaleway"], bucket: bucket.NewBucket("3d-builder"), permissions: "AuthUsers: [] | AllUsers: [READ]"}, + {name: "Wasabi", provider: providers["wasabi"], bucket: bucket.NewBucket("acceptance"), permissions: "AuthUsers: [] | AllUsers: [READ, READ_ACP]"}, } for _, tt := range tests { diff --git a/provider/wasabi.go b/provider/wasabi.go new file mode 100644 index 0000000..cb66a7b --- /dev/null +++ b/provider/wasabi.go @@ -0,0 +1,117 @@ +package provider + +import ( + "context" + "errors" + "fmt" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/sa7mon/s3scanner/bucket" + "github.com/sa7mon/s3scanner/provider/clientmap" + "net/http" +) + +type Wasabi struct { + clients *clientmap.ClientMap + existsClient *s3.Client +} + +func (w *Wasabi) Insecure() bool { + return false +} + +func (w *Wasabi) AddressStyle() int { + return PathStyle +} + +func (w *Wasabi) BucketExists(b *bucket.Bucket) (*bucket.Bucket, error) { + b.Provider = w.Name() + exists, region, err := bucketExists301(w.existsClient, "us-east-1", b) + if err != nil { + return b, err + } + if exists { + b.Exists = bucket.BucketExists + b.Region = region + } else { + b.Exists = bucket.BucketNotExist + } + + return b, nil +} + +func (w *Wasabi) Scan(bucket *bucket.Bucket, doDestructiveChecks bool) error { + client := w.clients.Get(bucket.Region, false) + return checkPermissions(client, bucket, doDestructiveChecks) +} + +func (w *Wasabi) Enumerate(b *bucket.Bucket) error { + if b.Exists != bucket.BucketExists { + return errors.New("bucket might not exist") + } + client := w.getRegionClient(b.Region) + enumErr := enumerateListObjectsV2(client, b) + if enumErr != nil { + return enumErr + } + return nil +} + +func (w *Wasabi) getRegionClient(region string) *s3.Client { + return w.clients.Get(region, false) +} + +func (w *Wasabi) newExistsClient() (*s3.Client, error) { + client := &http.Client{ + CheckRedirect: func(_ *http.Request, _ []*http.Request) error { // don't follow redirects + return http.ErrUseLastResponse + }, + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + }, + } + cfg, err := config.LoadDefaultConfig( + context.TODO(), + config.WithCredentialsProvider(aws.AnonymousCredentials{}), + config.WithHTTPClient(client), + config.WithRegion("auto"), + ) + if err != nil { + return nil, err + } + + cfg.BaseEndpoint = aws.String("https://s3.wasabisys.com") + return s3.NewFromConfig(cfg, func(o *s3.Options) { o.UsePathStyle = true }), nil +} + +func NewProviderWasabi() (*Wasabi, error) { + w := new(Wasabi) + clients, err := w.newClients() + if err != nil { + return w, err + } + w.clients = clients + + c, cErr := w.newExistsClient() + if cErr != nil { + return w, cErr + } + w.existsClient = c + return w, nil +} + +func (w *Wasabi) newClients() (*clientmap.ClientMap, error) { + clients := clientmap.WithCapacity(len(ProviderRegions[w.Name()])) + for _, r := range ProviderRegions[w.Name()] { + client, err := newNonAWSClient(w, fmt.Sprintf("https://s3.%s.wasabisys.com", r)) + if err != nil { + return nil, err + } + clients.Set(r, false, client) + } + + return clients, nil +} + +func (w *Wasabi) Name() string { return "wasabi" } diff --git a/provider/wasabi_test.go b/provider/wasabi_test.go new file mode 100644 index 0000000..bbfc168 --- /dev/null +++ b/provider/wasabi_test.go @@ -0,0 +1,30 @@ +package provider + +import ( + "github.com/sa7mon/s3scanner/bucket" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestWasabi_NewExistsClient(t *testing.T) { + t.Parallel() + w, wErr := NewProviderWasabi() + assert.Nil(t, wErr) + _, err := w.newExistsClient() + assert.Nil(t, err) +} + +func TestWasabi_BucketExists(t *testing.T) { + t.Parallel() + w, _ := NewProviderWasabi() + exists, err := w.BucketExists(&bucket.Bucket{Name: "images"}) + assert.Nil(t, err) + assert.Equal(t, bucket.BucketExists, exists.Exists) + assert.Equal(t, "us-central-1", exists.Region) + + // exists in the default region - check returns a 200 instead of redirect + exists, err = w.BucketExists(&bucket.Bucket{Name: "aedata"}) + assert.Nil(t, err) + assert.Equal(t, bucket.BucketExists, exists.Exists) + assert.Equal(t, "us-east-1", exists.Region) +}