diff --git a/README.md b/README.md index 05cb0236..6bfb1b4d 100644 --- a/README.md +++ b/README.md @@ -56,10 +56,11 @@ version numbers. One of the following two options must be specified: -* `regexp`: *Optional.* The pattern to match filenames against within S3. The first - grouped match is used to extract the version, or if a group is explicitly - named `version`, that group is used. At least one capture group must be - specified, with parentheses. +* `regexp`: *Optional.* The forward-slash (`/`) delimited sequence of patterns to + match against the sub-directories and filenames of the objects stored within + the S3 bucket. The first grouped match is used to extract the version, or if + a group is explicitly named `version`, that group is used. At least one + capture group must be specified, with parentheses. The version extracted from this pattern is used to version the resource. Semantic versions, or just numbers, are supported. Accordingly, full regular @@ -255,6 +256,14 @@ docker build . -t s3-resource --target tests -f dockerfiles/ubuntu/Dockerfile \ --build-arg S3_ENDPOINT="https://s3.amazonaws.com" ``` +##### Speeding up integration tests by skipping large file upload + +One of the integration tests uploads a large file (>40GB) and so can be slow. +It can be skipped by adding the following option when running the tests: +``` + --build-arg S3_TESTING_NO_LARGE_UPLOAD=true +``` + ##### Integration tests using role assumption If `S3_TESTING_AWS_ROLE_ARN` is set to a role ARN, this role will be assumed for accessing diff --git a/check/command_test.go b/check/command_test.go index c23f8a88..2f90d8a7 100644 --- a/check/command_test.go +++ b/check/command_test.go @@ -7,7 +7,7 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" - "github.com/concourse/s3-resource" + s3resource "github.com/concourse/s3-resource" "github.com/concourse/s3-resource/fakes" . "github.com/concourse/s3-resource/check" @@ -37,11 +37,24 @@ var _ = Describe("Check Command", func() { s3client = &fakes.FakeS3Client{} command = NewCommand(s3client) - s3client.BucketFilesReturns([]string{ - "files/abc-0.0.1.tgz", - "files/abc-2.33.333.tgz", - "files/abc-2.4.3.tgz", - "files/abc-3.53.tgz", + s3client.ChunkedBucketListReturnsOnCall(0, s3resource.BucketListChunk{ + Truncated: false, + ContinuationToken: nil, + CommonPrefixes: []string{"files/abc-3/"}, + Paths: []string{ + "files/abc-0.0.1.tgz", + "files/abc-2.33.333.tgz", + "files/abc-2.4.3.tgz", + "files/abc-3.53.tgz", + }, + }, nil) + s3client.ChunkedBucketListReturnsOnCall(1, s3resource.BucketListChunk{ + Truncated: false, + ContinuationToken: nil, + Paths: []string{ + "files/abc-3/53.tgz", + "files/abc-3/no-magic", + }, }, nil) }) @@ -123,6 +136,16 @@ var _ = Describe("Check Command", func() { Expect(response).To(ConsistOf(s3resource.Version{Path: "files/abc-2.33.333.tgz"})) }) }) + + Context("when the regexp does not contain any magic regexp char", func() { + It("does not explode", func() { + request.Source.Regexp = "files/abc-3/no-magic" + response, err := command.Run(request) + Ω(err).ShouldNot(HaveOccurred()) + + Ω(response).Should(HaveLen(0)) + }) + }) }) Context("when there is a previous version", func() { diff --git a/dockerfiles/alpine/Dockerfile b/dockerfiles/alpine/Dockerfile index 7905fc8b..0e477b3c 100644 --- a/dockerfiles/alpine/Dockerfile +++ b/dockerfiles/alpine/Dockerfile @@ -27,6 +27,7 @@ ARG S3_TESTING_AWS_ROLE_ARN ARG S3_VERSIONED_TESTING_BUCKET ARG S3_TESTING_BUCKET ARG S3_TESTING_REGION +ARG S3_TESTING_NO_LARGE_UPLOAD ARG S3_ENDPOINT ARG TEST_SESSION_TOKEN COPY --from=builder /tests /go-tests diff --git a/fakes/fake_s3client.go b/fakes/fake_s3client.go index 3037fba2..b4bd344a 100644 --- a/fakes/fake_s3client.go +++ b/fakes/fake_s3client.go @@ -36,6 +36,21 @@ type FakeS3Client struct { result1 []string result2 error } + ChunkedBucketListStub func(string, string, *string) (s3resource.BucketListChunk, error) + chunkedBucketListMutex sync.RWMutex + chunkedBucketListArgsForCall []struct { + arg1 string + arg2 string + arg3 *string + } + chunkedBucketListReturns struct { + result1 s3resource.BucketListChunk + result2 error + } + chunkedBucketListReturnsOnCall map[int]struct { + result1 s3resource.BucketListChunk + result2 error + } DeleteFileStub func(string, string) error deleteFileMutex sync.RWMutex deleteFileArgsForCall []struct { @@ -265,6 +280,72 @@ func (fake *FakeS3Client) BucketFilesReturnsOnCall(i int, result1 []string, resu }{result1, result2} } +func (fake *FakeS3Client) ChunkedBucketList(arg1 string, arg2 string, arg3 *string) (s3resource.BucketListChunk, error) { + fake.chunkedBucketListMutex.Lock() + ret, specificReturn := fake.chunkedBucketListReturnsOnCall[len(fake.chunkedBucketListArgsForCall)] + fake.chunkedBucketListArgsForCall = append(fake.chunkedBucketListArgsForCall, struct { + arg1 string + arg2 string + arg3 *string + }{arg1, arg2, arg3}) + stub := fake.ChunkedBucketListStub + fakeReturns := fake.chunkedBucketListReturns + fake.recordInvocation("ChunkedBucketList", []interface{}{arg1, arg2, arg3}) + fake.chunkedBucketListMutex.Unlock() + if stub != nil { + return stub(arg1, arg2, arg3) + } + if specificReturn { + return ret.result1, ret.result2 + } + return fakeReturns.result1, fakeReturns.result2 +} + +func (fake *FakeS3Client) ChunkedBucketListCallCount() int { + fake.chunkedBucketListMutex.RLock() + defer fake.chunkedBucketListMutex.RUnlock() + return len(fake.chunkedBucketListArgsForCall) +} + +func (fake *FakeS3Client) ChunkedBucketListCalls(stub func(string, string, *string) (s3resource.BucketListChunk, error)) { + fake.chunkedBucketListMutex.Lock() + defer fake.chunkedBucketListMutex.Unlock() + fake.ChunkedBucketListStub = stub +} + +func (fake *FakeS3Client) ChunkedBucketListArgsForCall(i int) (string, string, *string) { + fake.chunkedBucketListMutex.RLock() + defer fake.chunkedBucketListMutex.RUnlock() + argsForCall := fake.chunkedBucketListArgsForCall[i] + return argsForCall.arg1, argsForCall.arg2, argsForCall.arg3 +} + +func (fake *FakeS3Client) ChunkedBucketListReturns(result1 s3resource.BucketListChunk, result2 error) { + fake.chunkedBucketListMutex.Lock() + defer fake.chunkedBucketListMutex.Unlock() + fake.ChunkedBucketListStub = nil + fake.chunkedBucketListReturns = struct { + result1 s3resource.BucketListChunk + result2 error + }{result1, result2} +} + +func (fake *FakeS3Client) ChunkedBucketListReturnsOnCall(i int, result1 s3resource.BucketListChunk, result2 error) { + fake.chunkedBucketListMutex.Lock() + defer fake.chunkedBucketListMutex.Unlock() + fake.ChunkedBucketListStub = nil + if fake.chunkedBucketListReturnsOnCall == nil { + fake.chunkedBucketListReturnsOnCall = make(map[int]struct { + result1 s3resource.BucketListChunk + result2 error + }) + } + fake.chunkedBucketListReturnsOnCall[i] = struct { + result1 s3resource.BucketListChunk + result2 error + }{result1, result2} +} + func (fake *FakeS3Client) DeleteFile(arg1 string, arg2 string) error { fake.deleteFileMutex.Lock() ret, specificReturn := fake.deleteFileReturnsOnCall[len(fake.deleteFileArgsForCall)] @@ -713,6 +794,8 @@ func (fake *FakeS3Client) Invocations() map[string][][]interface{} { defer fake.bucketFileVersionsMutex.RUnlock() fake.bucketFilesMutex.RLock() defer fake.bucketFilesMutex.RUnlock() + fake.chunkedBucketListMutex.RLock() + defer fake.chunkedBucketListMutex.RUnlock() fake.deleteFileMutex.RLock() defer fake.deleteFileMutex.RUnlock() fake.deleteVersionedFileMutex.RLock() diff --git a/integration/out_test.go b/integration/out_test.go index ba67f939..36379d95 100644 --- a/integration/out_test.go +++ b/integration/out_test.go @@ -11,14 +11,14 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/s3" "github.com/aws/aws-sdk-go/service/s3/s3manager" - "github.com/concourse/s3-resource" + s3resource "github.com/concourse/s3-resource" "github.com/concourse/s3-resource/out" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" "github.com/onsi/gomega/gbytes" "github.com/onsi/gomega/gexec" - "github.com/nu7hatch/gouuid" + uuid "github.com/nu7hatch/gouuid" ) var _ = Describe("out", func() { @@ -300,6 +300,10 @@ var _ = Describe("out", func() { Context("with a large file that is multiple of MaxUploadParts", func() { BeforeEach(func() { + if os.Getenv("S3_TESTING_NO_LARGE_UPLOAD") != "" { + Skip("'S3_TESTING_NO_LARGE_UPLOAD' is set, skipping.") + } + path := filepath.Join(sourceDir, "large-file-to-upload") // touch the file diff --git a/s3client.go b/s3client.go index 099140f8..4a3e9d76 100644 --- a/s3client.go +++ b/s3client.go @@ -5,13 +5,14 @@ import ( "encoding/json" "errors" "fmt" - "github.com/aws/aws-sdk-go/aws/credentials/stscreds" "io" "io/ioutil" "os" "strings" "time" + "github.com/aws/aws-sdk-go/aws/credentials/stscreds" + "net/http" "github.com/aws/aws-sdk-go/aws" @@ -28,6 +29,8 @@ type S3Client interface { BucketFiles(bucketName string, prefixHint string) ([]string, error) BucketFileVersions(bucketName string, remotePath string) ([]string, error) + ChunkedBucketList(bucketName string, prefix string, continuationToken *string) (BucketListChunk, error) + UploadFile(bucketName string, remotePath string, localPath string, options UploadFileOptions) (string, error) DownloadFile(bucketName string, remotePath string, versionID string, localPath string) error @@ -149,17 +152,24 @@ func NewAwsConfig( return awsConfig } -func (client *s3client) BucketFiles(bucketName string, prefixHint string) ([]string, error) { - entries, err := client.getBucketContents(bucketName, prefixHint) - - if err != nil { - return []string{}, err - } - - paths := make([]string, 0, len(entries)) - - for _, entry := range entries { - paths = append(paths, *entry.Key) +// BucketFiles returns all the files in bucketName immediately under directoryPrefix +func (client *s3client) BucketFiles(bucketName string, directoryPrefix string) ([]string, error) { + if !strings.HasSuffix(directoryPrefix, "/") { + directoryPrefix = directoryPrefix + "/" + } + var ( + continuationToken *string + truncated bool + paths []string + ) + for continuationToken, truncated = nil, true; truncated; { + s3ListChunk, err := client.ChunkedBucketList(bucketName, directoryPrefix, continuationToken) + if err != nil { + return []string{}, err + } + truncated = s3ListChunk.Truncated + continuationToken = s3ListChunk.ContinuationToken + paths = append(paths, s3ListChunk.Paths...) } return paths, nil } @@ -189,6 +199,47 @@ func (client *s3client) BucketFileVersions(bucketName string, remotePath string) return versions, nil } +type BucketListChunk struct { + Truncated bool + ContinuationToken *string + CommonPrefixes []string + Paths []string +} + +// ChunkedBucketList lists the S3 bucket `bucketName` content's under `prefix` one chunk at a time +// +// The returned `BucketListChunk` contains part of the files and subdirectories +// present in `bucketName` under `prefix`. The files are listed in `Paths` and +// the subdirectories in `CommonPrefixes`. If the returned chunk does not +// include all the files and subdirectories, the `Truncated` flag will be set +// to `true` and the `ContinuationToken` can be used to retrieve the next chunk. +func (client *s3client) ChunkedBucketList(bucketName string, prefix string, continuationToken *string) (BucketListChunk, error) { + params := &s3.ListObjectsV2Input{ + Bucket: aws.String(bucketName), + ContinuationToken: continuationToken, + Delimiter: aws.String("/"), + Prefix: aws.String(prefix), + } + response, err := client.client.ListObjectsV2(params) + if err != nil { + return BucketListChunk{}, err + } + commonPrefixes := make([]string, 0, len(response.CommonPrefixes)) + paths := make([]string, 0, len(response.Contents)) + for _, commonPrefix := range response.CommonPrefixes { + commonPrefixes = append(commonPrefixes, *commonPrefix.Prefix) + } + for _, path := range response.Contents { + paths = append(paths, *path.Key) + } + return BucketListChunk{ + Truncated: *response.IsTruncated, + ContinuationToken: response.NextContinuationToken, + CommonPrefixes: commonPrefixes, + Paths: paths, + }, nil +} + func (client *s3client) UploadFile(bucketName string, remotePath string, localPath string, options UploadFileOptions) (string, error) { uploader := s3manager.NewUploaderWithClient(client.client) @@ -398,51 +449,6 @@ func (client *s3client) DeleteFile(bucketName string, remotePath string) error { return err } -func (client *s3client) getBucketContents(bucketName string, prefix string) (map[string]*s3.Object, error) { - bucketContents := map[string]*s3.Object{} - marker := "" - - for { - listObjectsResponse, err := client.client.ListObjects(&s3.ListObjectsInput{ - Bucket: aws.String(bucketName), - Prefix: aws.String(prefix), - Marker: aws.String(marker), - }) - - if err != nil { - return bucketContents, err - } - - lastKey := "" - - for _, key := range listObjectsResponse.Contents { - bucketContents[*key.Key] = key - - lastKey = *key.Key - } - - if *listObjectsResponse.IsTruncated { - prevMarker := marker - if listObjectsResponse.NextMarker == nil { - // From the s3 docs: If response does not include the - // NextMarker and it is truncated, you can use the value of the - // last Key in the response as the marker in the subsequent - // request to get the next set of object keys. - marker = lastKey - } else { - marker = *listObjectsResponse.NextMarker - } - if marker == prevMarker { - return nil, errors.New("Unable to list all bucket objects; perhaps this is a CloudFront S3 bucket that needs its `Query String Forwarding and Caching` set to `Forward all, cache based on all`?") - } - } else { - break - } - } - - return bucketContents, nil -} - func (client *s3client) getBucketVersioning(bucketName string) (bool, error) { params := &s3.GetBucketVersioningInput{ Bucket: aws.String(bucketName), diff --git a/versions/versions.go b/versions/versions.go index ae871c0e..519ac6b9 100644 --- a/versions/versions.go +++ b/versions/versions.go @@ -5,14 +5,10 @@ import ( "sort" "strings" - "github.com/concourse/s3-resource" + s3resource "github.com/concourse/s3-resource" "github.com/cppforlife/go-semi-semantic/version" ) -func Match(paths []string, pattern string) ([]string, error) { - return MatchUnanchored(paths, "^"+pattern+"$") -} - func MatchUnanchored(paths []string, pattern string) ([]string, error) { matched := []string{} @@ -101,48 +97,90 @@ type Extraction struct { VersionNumber string } -const regexpSpecialChars = `\\\*\.\[\]\(\)\{\}\?\|\^\$\+` - -func PrefixHint(regex string) string { - nonRE := regexp.MustCompile(`\\(?P[` + regexpSpecialChars + `])|(?P[^` + regexpSpecialChars + `])`) - re := regexp.MustCompile(`^(` + nonRE.String() + `)*$`) - - validSections := []string{} - - sections := strings.Split(regex, "/") +// GetMatchingPathsFromBucket gets all the paths in the S3 bucket `bucketName` which match all the sections of `regex` +// +// `regex` is a forward-slash (`/`) delimited list of regular expressions that +// must match each corresponding sub-directories and file name for the path to +// be retained. +// +// The function walks the file tree stored in the S3 bucket `bucketName` and +// collects the full paths that matches `regex` along the way. It takes care of +// following only the branches (prefix in S3 terms) that matches with the +// corresponding section of `regex`. +func GetMatchingPathsFromBucket(client s3resource.S3Client, bucketName string, regex string) ([]string, error) { + type work struct { + prefix string + remains []string + } - for _, section := range sections { - if re.MatchString(section) { - validSections = append(validSections, nonRE.ReplaceAllString(section, "${chr}")) + specialCharsRE := regexp.MustCompile(`[\\\*\.\[\]\(\)\{\}\?\|\^\$\+]`) + + matchingPaths := []string{} + queue := []work{{prefix: "", remains: strings.Split(regex, "/")}} + for len(queue) != 0 { + prefix := queue[0].prefix + remains := queue[0].remains + section := remains[0] + remains = remains[1:] + queue = queue[1:] + if !specialCharsRE.MatchString(section) && len(remains) != 0 { + // No special char so it can match a single string and we can just extend the prefix + // but only if some remains exists, i.e. the section is not a leaf. + prefix += section + "/" + queue = append(queue, work{prefix: prefix, remains: remains}) + continue + } + // Let's list what's under the current prefix and see if that matches with the section + var prefixRE *regexp.Regexp + if len(remains) != 0 { + // We need to look deeper so full prefix will end with a / + prefixRE = regexp.MustCompile(prefix + section + "/") } else { - break + prefixRE = regexp.MustCompile(prefix + section) + } + var ( + continuationToken *string + truncated bool + ) + for continuationToken, truncated = nil, true; truncated; { + s3ListChunk, err := client.ChunkedBucketList(bucketName, prefix, continuationToken) + if err != nil { + return []string{}, err + } + truncated = s3ListChunk.Truncated + continuationToken = s3ListChunk.ContinuationToken + + if len(remains) != 0 { + // We need to look deeper so full prefix will end with a / + for _, commonPrefix := range s3ListChunk.CommonPrefixes { + if prefixRE.MatchString(commonPrefix) { + queue = append(queue, work{prefix: commonPrefix, remains: remains}) + } + } + } else { + // We're looking for a leaf + for _, path := range s3ListChunk.Paths { + if prefixRE.MatchString(path) { + matchingPaths = append(matchingPaths, path) + } + } + } } } - - if len(validSections) == 0 { - return "" - } - - return strings.Join(validSections, "/") + "/" + return matchingPaths, nil } func GetBucketFileVersions(client s3resource.S3Client, source s3resource.Source) Extractions { - regexp := source.Regexp - hint := PrefixHint(regexp) + regex := source.Regexp - paths, err := client.BucketFiles(source.Bucket, hint) + matchingPaths, err := GetMatchingPathsFromBucket(client, source.Bucket, regex) if err != nil { s3resource.Fatal("listing files", err) } - matchingPaths, err := Match(paths, source.Regexp) - if err != nil { - s3resource.Fatal("finding matches", err) - } - var extractions = make(Extractions, 0, len(matchingPaths)) for _, path := range matchingPaths { - extraction, ok := Extract(path, regexp) + extraction, ok := Extract(path, regex) if ok { extractions = append(extractions, extraction) diff --git a/versions/versions_test.go b/versions/versions_test.go index 9653ed70..acf582bc 100644 --- a/versions/versions_test.go +++ b/versions/versions_test.go @@ -1,6 +1,10 @@ package versions_test import ( + "errors" + + s3resource "github.com/concourse/s3-resource" + "github.com/concourse/s3-resource/fakes" "github.com/concourse/s3-resource/versions" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" @@ -12,7 +16,7 @@ var ItMatchesPaths = func(matchFunc MatchFunc) { Describe("checking if paths in the bucket should be searched", func() { Context("when given an empty list of paths", func() { It("returns an empty list of matches", func() { - result, err := versions.Match([]string{}, "regex") + result, err := matchFunc([]string{}, "regex") Ω(err).ShouldNot(HaveOccurred()) Ω(result).Should(BeEmpty()) @@ -24,7 +28,7 @@ var ItMatchesPaths = func(matchFunc MatchFunc) { paths := []string{"abc"} regex := "abc" - result, err := versions.Match(paths, regex) + result, err := matchFunc(paths, regex) Ω(err).ShouldNot(HaveOccurred()) Ω(result).Should(ConsistOf("abc")) }) @@ -33,7 +37,7 @@ var ItMatchesPaths = func(matchFunc MatchFunc) { paths := []string{"abc"} regex := "ad" - result, err := versions.Match(paths, regex) + result, err := matchFunc(paths, regex) Ω(err).ShouldNot(HaveOccurred()) Ω(result).Should(BeEmpty()) }) @@ -42,7 +46,7 @@ var ItMatchesPaths = func(matchFunc MatchFunc) { paths := []string{"abc"} regex := "a.*c" - result, err := versions.Match(paths, regex) + result, err := matchFunc(paths, regex) Ω(err).ShouldNot(HaveOccurred()) Ω(result).Should(ConsistOf("abc")) }) @@ -51,7 +55,7 @@ var ItMatchesPaths = func(matchFunc MatchFunc) { paths := []string{"abc"} regex := "a(c" - _, err := versions.Match(paths, regex) + _, err := matchFunc(paths, regex) Ω(err).Should(HaveOccurred()) }) }) @@ -61,7 +65,7 @@ var ItMatchesPaths = func(matchFunc MatchFunc) { paths := []string{"abc", "bcd"} regex := ".*bc.*" - result, err := versions.Match(paths, regex) + result, err := matchFunc(paths, regex) Ω(err).ShouldNot(HaveOccurred()) Ω(result).Should(ConsistOf("abc", "bcd")) }) @@ -70,7 +74,7 @@ var ItMatchesPaths = func(matchFunc MatchFunc) { paths := []string{"abc", "def"} regex := "ge.*h" - result, err := versions.Match(paths, regex) + result, err := matchFunc(paths, regex) Ω(err).ShouldNot(HaveOccurred()) Ω(result).Should(BeEmpty()) }) @@ -78,46 +82,8 @@ var ItMatchesPaths = func(matchFunc MatchFunc) { }) } -var _ = Describe("Match", func() { - Describe("Match", func() { - ItMatchesPaths(versions.Match) - - It("does not contain files that are in some subdirectory that is not explicitly mentioned", func() { - paths := []string{"folder/abc", "abc"} - regex := "abc" - - result, err := versions.Match(paths, regex) - Ω(err).ShouldNot(HaveOccurred()) - Ω(result).Should(ConsistOf("abc")) - }) - }) - - Describe("MatchUnanchored", func() { - ItMatchesPaths(versions.MatchUnanchored) - }) -}) - -var _ = Describe("PrefixHint", func() { - It("turns a regexp into a limiter for s3", func() { - By("having a directory prefix in the simple case") - Ω(versions.PrefixHint("hello/(.*).tgz")).Should(Equal("hello/")) - Ω(versions.PrefixHint("hello/world-(.*)")).Should(Equal("hello/")) - Ω(versions.PrefixHint("hello-world/some-file-(.*)")).Should(Equal("hello-world/")) - - By("not having a prefix if there is no parent directory") - Ω(versions.PrefixHint("(.*).tgz")).Should(Equal("")) - Ω(versions.PrefixHint("hello-(.*).tgz")).Should(Equal("")) - - By("skipping regexp path names") - Ω(versions.PrefixHint("hello/(.*)/what.txt")).Should(Equal("hello/")) - - By("handling escaped regexp characters") - Ω(versions.PrefixHint(`hello/cruel\[\\\^\$\.\|\?\*\+\(\)world/fizz-(.*).tgz`)).Should(Equal(`hello/cruel[\^$.|?*+()world/`)) - - By("handling regexp-specific escapes") - Ω(versions.PrefixHint(`hello/\d{3}/fizz-(.*).tgz`)).Should(Equal(`hello/`)) - Ω(versions.PrefixHint(`hello/\d/fizz-(.*).tgz`)).Should(Equal(`hello/`)) - }) +var _ = Describe("MatchUnanchored", func() { + ItMatchesPaths(versions.MatchUnanchored) }) var _ = Describe("Extract", func() { @@ -175,3 +141,168 @@ var _ = Describe("Extract", func() { }) }) }) + +var _ = Describe("GetMatchingPathsFromBucket", func() { + var s3client *fakes.FakeS3Client + + BeforeEach(func() { + s3client = &fakes.FakeS3Client{} + }) + + Context("When the regexp has no '/'", func() { + Context("when the regexp has no special char", func() { + It("uses only the empty string as prefix", func() { + versions.GetMatchingPathsFromBucket(s3client, "bucket", "regexp") + Ω(s3client.ChunkedBucketListCallCount()).Should(Equal(1)) + _, prefix, _ := s3client.ChunkedBucketListArgsForCall(0) + Ω(prefix).Should(Equal("")) + }) + }) + Context("when the regexp has a special char", func() { + It("uses only the empty string as prefix", func() { + versions.GetMatchingPathsFromBucket(s3client, "bucket", "reg.xp") + Ω(s3client.ChunkedBucketListCallCount()).Should(Equal(1)) + _, prefix, _ := s3client.ChunkedBucketListArgsForCall(0) + Ω(prefix).Should(Equal("")) + }) + }) + }) + + Context("When regexp special char appears close to the leaves", func() { + It("starts directly with the longest prefix", func() { + versions.GetMatchingPathsFromBucket( + s3client, "bucket", "regexp/will/appear/only/close/tw?o+/leaves", + ) + Ω(s3client.ChunkedBucketListCallCount()).Should(Equal(1)) + _, prefix, _ := s3client.ChunkedBucketListArgsForCall(0) + Ω(prefix).Should(Equal("regexp/will/appear/only/close/")) + }) + + It("follows only the matching prefixes", func() { + s3client.ChunkedBucketListReturnsOnCall(0, s3resource.BucketListChunk{ + CommonPrefixes: []string{ + "regexp/will/appear/only/close/from/", + "regexp/will/appear/only/close/to/", + "regexp/will/appear/only/close/too/", + "regexp/will/appear/only/close/two/", + }, + }, nil) + s3client.ChunkedBucketListReturnsOnCall(1, s3resource.BucketListChunk{ + Paths: []string{ + "regexp/will/appear/only/close/to/the-end", + "regexp/will/appear/only/close/to/leaves", + }, + }, nil) + s3client.ChunkedBucketListReturnsOnCall(2, s3resource.BucketListChunk{ + CommonPrefixes: []string{"regexp/will/appear/only/close/too/late/"}, + Paths: []string{"regexp/will/appear/only/close/too/soon"}, + }, nil) + s3client.ChunkedBucketListReturnsOnCall(3, s3resource.BucketListChunk{ + Paths: []string{ + "regexp/will/appear/only/close/two/three", + }, + }, nil) + + matchingPaths, err := versions.GetMatchingPathsFromBucket( + s3client, "bucket", "regexp/will/appear/only/close/tw?o+/leaves", + ) + Ω(err).ShouldNot(HaveOccurred()) + Ω(s3client.ChunkedBucketListCallCount()).Should(Equal(4)) + for idx, expectedPrefix := range []string{ + "regexp/will/appear/only/close/", + "regexp/will/appear/only/close/to/", + "regexp/will/appear/only/close/too/", + "regexp/will/appear/only/close/two/", + } { + _, prefix, _ := s3client.ChunkedBucketListArgsForCall(idx) + Ω(prefix).Should(Equal(expectedPrefix)) + } + Ω(matchingPaths).Should(ConsistOf("regexp/will/appear/only/close/to/leaves")) + }) + }) + + Context("When there are too many leaves for a single request", func() { + It("continues requesting more", func() { + s3client.ChunkedBucketListReturnsOnCall(0, s3resource.BucketListChunk{ + Truncated: true, + Paths: []string{ + "prefix/leaf-0", + "prefix/leaf-1", + }, + }, nil) + s3client.ChunkedBucketListReturnsOnCall(1, s3resource.BucketListChunk{ + Truncated: false, + Paths: []string{ + "prefix/leaf-2", + "prefix/leaf-3", + }, + }, nil) + + matchingPaths, err := versions.GetMatchingPathsFromBucket( + s3client, "bucket", "prefix/leaf-(.*)", + ) + Ω(err).ShouldNot(HaveOccurred()) + Ω(matchingPaths).Should(ConsistOf( + "prefix/leaf-0", + "prefix/leaf-1", + "prefix/leaf-2", + "prefix/leaf-3", + )) + }) + }) + + Context("When there are too many prefixes for a single request", func() { + It("continues requesting more", func() { + s3client.ChunkedBucketListReturnsOnCall(0, s3resource.BucketListChunk{ + Truncated: true, + CommonPrefixes: []string{ + "prefix-0/", + "prefix-1/", + }, + }, nil) + s3client.ChunkedBucketListReturnsOnCall(1, s3resource.BucketListChunk{ + Truncated: false, + CommonPrefixes: []string{ + "prefix-2/", + "prefix-3/", + }, + }, nil) + s3client.ChunkedBucketListReturnsOnCall(2, s3resource.BucketListChunk{ + Paths: []string{"prefix-0/leaf-0"}, + }, nil) + s3client.ChunkedBucketListReturnsOnCall(3, s3resource.BucketListChunk{ + Paths: []string{"prefix-1/leaf-1"}, + }, nil) + s3client.ChunkedBucketListReturnsOnCall(4, s3resource.BucketListChunk{ + Paths: []string{"prefix-2/leaf-2"}, + }, nil) + s3client.ChunkedBucketListReturnsOnCall(5, s3resource.BucketListChunk{ + Paths: []string{"prefix-3/leaf-3"}, + }, nil) + + matchingPaths, err := versions.GetMatchingPathsFromBucket( + s3client, "bucket", "prefix-\\d+/leaf-(.*)", + ) + Ω(err).ShouldNot(HaveOccurred()) + Ω(matchingPaths).Should(ConsistOf( + "prefix-0/leaf-0", + "prefix-1/leaf-1", + "prefix-2/leaf-2", + "prefix-3/leaf-3", + )) + }) + }) + + Context("When S3 returns an error", func() { + BeforeEach(func() { + s3client.ChunkedBucketListReturns( + s3resource.BucketListChunk{}, + errors.New("S3 failure"), + ) + }) + It("fails", func() { + _, err := versions.GetMatchingPathsFromBucket(s3client, "bucket", "dummy") + Ω(err).Should(HaveOccurred()) + }) + }) +})