From ce972ec2eb88e65d3d1dba2bf3cc4665bfd0e5f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Thu, 17 Oct 2024 08:51:06 +0200 Subject: [PATCH] Add -strip-index-html Fixes #503 --- README.md | 12 ++++- lib/cloudfront.go | 2 +- lib/cloudfront_test.go | 1 + lib/config.go | 2 + lib/deployer.go | 8 ++-- lib/files.go | 18 +++++--- lib/files_test.go | 11 ++++- lib/url.go | 37 +++++++++++++++ lib/url_test.go | 36 +++++++++++++++ main_test.go | 65 ++++++++++++++++++++++++--- testscripts/flag_strip-index-html.txt | 27 +++++++++++ 11 files changed, 201 insertions(+), 18 deletions(-) create mode 100644 testscripts/flag_strip-index-html.txt diff --git a/README.md b/README.md index fd517b8..28bc310 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,8 @@ The list of flags from running `s3deploy -h`: regexp pattern of files to ignore when walking the local directory, repeat flag for multiple patterns, default "^(.*/)?/?.DS_Store$" -source string path of files to upload (default ".") +-strip-index-html + strip index.html from all directories expect for the root entry -try trial run, no remote updates -v enable verbose logging @@ -91,8 +93,6 @@ The list of flags from running `s3deploy -h`: number of workers to upload files (default -1) ``` -Note that `-skip-local-dirs` and `-skip-local-files` will match against a relative path from the source directory with Unix-style path separators. The source directory is represented by `.`, the rest starts with a `/`. - The flags can be set in one of (in priority order): 1. As a flag, e.g. `s3deploy -path public/` @@ -110,6 +110,14 @@ max-delete: "${MYVARS_MAX_DELETE@U}" Note the special `@U` (_Unquoute_) syntax for the int field. +#### Skip local files and directories + +The options `-skip-local-dirs` and `-skip-local-files` will match against a relative path from the source directory with Unix-style path separators. The source directory is represented by `.`, the rest starts with a `/`. + +#### Strip index.html + +The option `-strip-index-html`. This matches the option with (almost) same name in [hugo deploy](). See this [PR](https://github.com/gohugoio/hugo/pull/12608) for more information. + ### Routes The `.s3deploy.yml` configuration file can also contain one or more routes. A route matches files given a regexp. Each route can apply: diff --git a/lib/cloudfront.go b/lib/cloudfront.go index 5a52738..57d775a 100644 --- a/lib/cloudfront.go +++ b/lib/cloudfront.go @@ -176,7 +176,7 @@ func (c *cloudFrontClient) normalizeInvalidationPaths( var maxlevels int for _, p := range paths { - p = path.Clean(p) + p = pathClean(p) if !strings.HasPrefix(p, "/") { p = "/" + p } diff --git a/lib/cloudfront_test.go b/lib/cloudfront_test.go index 389bc2f..6b16354 100644 --- a/lib/cloudfront_test.go +++ b/lib/cloudfront_test.go @@ -27,6 +27,7 @@ func TestReduceInvalidationPaths(t *testing.T) { c.Assert(client.normalizeInvalidationPaths("", 5, false, "/index.html"), qt.DeepEquals, []string{"/"}) c.Assert(client.normalizeInvalidationPaths("", 5, true, "/a", "/b"), qt.DeepEquals, []string{"/*"}) c.Assert(client.normalizeInvalidationPaths("root", 5, true, "/a", "/b"), qt.DeepEquals, []string{"/root/*"}) + c.Assert(client.normalizeInvalidationPaths("root", 5, false, "/root/b/"), qt.DeepEquals, []string{"/root/b/"}) rootPlusMany := append([]string{"/index.html", "/styles.css"}, createFiles("css", false, 20)...) normalized := client.normalizeInvalidationPaths("", 5, false, rootPlusMany...) diff --git a/lib/config.go b/lib/config.go index 2b524b7..a7bace7 100644 --- a/lib/config.go +++ b/lib/config.go @@ -74,6 +74,7 @@ type Config struct { MaxDelete int ACL string PublicReadACL bool + StripIndexHTML bool Verbose bool Silent bool Force bool @@ -283,6 +284,7 @@ func flagsToConfig(f *flag.FlagSet) *Config { f.StringVar(&cfg.ConfigFile, "config", ".s3deploy.yml", "optional config file") f.IntVar(&cfg.MaxDelete, "max-delete", 256, "maximum number of files to delete per deploy") f.BoolVar(&cfg.PublicReadACL, "public-access", false, "DEPRECATED: please set -acl='public-read'") + f.BoolVar(&cfg.StripIndexHTML, "strip-index-html", false, "strip index.html from all directories expect for the root entry") f.StringVar(&cfg.ACL, "acl", "", "provide an ACL for uploaded objects. to make objects public, set to 'public-read'. all possible values are listed here: https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html#canned-acl (default \"private\")") f.BoolVar(&cfg.Force, "force", false, "upload even if the etags match") f.Var(&cfg.Ignore, "ignore", "regexp pattern for ignoring files, repeat flag for multiple patterns,") diff --git a/lib/deployer.go b/lib/deployer.go index af52c15..3556138 100644 --- a/lib/deployer.go +++ b/lib/deployer.go @@ -152,7 +152,7 @@ func (d *Deployer) printf(format string, a ...interface{}) { } func (d *Deployer) enqueueUpload(ctx context.Context, f *osFile) { - d.Printf("%s (%s) %s ", f.relPath, f.reason, up) + d.Printf("%s (%s) %s ", f.keyPath, f.reason, up) select { case <-ctx.Done(): case d.filesToUpload <- f: @@ -197,9 +197,9 @@ func (d *Deployer) plan(ctx context.Context) error { up := true reason := reasonNotFound - bucketPath := f.relPath + bucketPath := f.keyPath if d.cfg.BucketPath != "" { - bucketPath = path.Join(d.cfg.BucketPath, bucketPath) + bucketPath = pathJoin(d.cfg.BucketPath, bucketPath) } if remoteFile, ok := remoteFiles[bucketPath]; ok { @@ -274,7 +274,7 @@ func (d *Deployer) walk(ctx context.Context, basePath string, files chan<- *osFi return nil } - f, err := newOSFile(d.cfg.fileConf.Routes, d.cfg.BucketPath, rel, abs, info) + f, err := newOSFile(d.cfg, rel, abs, info) if err != nil { return err } diff --git a/lib/files.go b/lib/files.go index 0cb3e9f..20231b1 100644 --- a/lib/files.go +++ b/lib/files.go @@ -15,7 +15,6 @@ import ( "mime" "net/http" "os" - "path" "path/filepath" "regexp" "sync" @@ -55,6 +54,7 @@ type localFile interface { type osFile struct { relPath string + keyPath string // may be different from relPath if StripIndexHTML is set. // Filled when BucketPath is provided. Will store files in a sub-path // of the target file store. @@ -77,9 +77,9 @@ type osFile struct { func (f *osFile) Key() string { if f.targetRoot != "" { - return path.Join(f.targetRoot, f.relPath) + return pathJoin(f.targetRoot, f.keyPath) } - return f.relPath + return f.keyPath } func (f *osFile) UploadReason() uploadReason { @@ -177,7 +177,10 @@ func (f *osFile) shouldThisReplace(other file) (bool, uploadReason) { return false, "" } -func newOSFile(routes routes, targetRoot, relPath, absPath string, fi os.FileInfo) (*osFile, error) { +func newOSFile(cfg *Config, relPath, absPath string, fi os.FileInfo) (*osFile, error) { + targetRoot := cfg.BucketPath + routes := cfg.fileConf.Routes + relPath = filepath.ToSlash(relPath) file, err := os.Open(absPath) @@ -211,7 +214,12 @@ func newOSFile(routes routes, targetRoot, relPath, absPath string, fi os.FileInf mFile = memfile.New(b) } - of := &osFile{route: route, f: mFile, targetRoot: targetRoot, absPath: absPath, relPath: relPath, size: size} + keyPath := relPath + if cfg.StripIndexHTML { + keyPath = trimIndexHTML(keyPath) + } + + of := &osFile{route: route, f: mFile, targetRoot: targetRoot, absPath: absPath, relPath: relPath, keyPath: keyPath, size: size} if err := of.initContentType(peek); err != nil { return nil, err diff --git a/lib/files_test.go b/lib/files_test.go index 96c2267..067be56 100644 --- a/lib/files_test.go +++ b/lib/files_test.go @@ -91,5 +91,14 @@ func openTestFile(name string) (*osFile, error) { return nil, err } - return newOSFile(nil, "", relPath, absPath, fi) + args := []string{ + "-bucket=mybucket", + } + + cfg, err := ConfigFromArgs(args) + if err != nil { + return nil, err + } + + return newOSFile(cfg, relPath, absPath, fi) } diff --git a/lib/url.go b/lib/url.go index 314088f..06ad771 100644 --- a/lib/url.go +++ b/lib/url.go @@ -1,5 +1,10 @@ package lib +import ( + "path" + "strings" +) + // [RFC 1738](https://www.ietf.org/rfc/rfc1738.txt) // §2.2 func shouldEscape(c byte) bool { @@ -71,3 +76,35 @@ func pathEscapeRFC1738(s string) string { } return string(t) } + +// Like path.Join, but preserves trailing slash.. +func pathJoin(elem ...string) string { + if len(elem) == 0 { + return "" + } + hadSlash := strings.HasSuffix(elem[len(elem)-1], "/") + p := path.Join(elem...) + if hadSlash { + p += "/" + } + return p +} + +// pathClean works like path.Clean but will always preserve a trailing slash. +func pathClean(p string) string { + hadSlash := strings.HasSuffix(p, "/") + p = path.Clean(p) + if hadSlash && !strings.HasSuffix(p, "/") { + p += "/" + } + return p +} + +// trimIndexHTML remaps paths matching "/index.html" to "/". +func trimIndexHTML(p string) string { + const suffix = "/index.html" + if strings.HasSuffix(p, suffix) { + return p[:len(p)-len(suffix)+1] + } + return p +} diff --git a/lib/url_test.go b/lib/url_test.go index 6af1b11..2be7376 100644 --- a/lib/url_test.go +++ b/lib/url_test.go @@ -32,3 +32,39 @@ func TestPathEscapeRFC1738(t *testing.T) { c.Assert(actual, qt.Equals, tc.expected) } } + +func TestPathJoin(t *testing.T) { + c := qt.New(t) + + testCases := []struct { + elements []string + expected string + }{ + {[]string{"a", "b"}, "a/b"}, + {[]string{"a", "b/"}, "a/b/"}, + {[]string{"/a", "b/"}, "/a/b/"}, + } + + for _, tc := range testCases { + actual := pathJoin(tc.elements...) + c.Assert(actual, qt.Equals, tc.expected) + } +} + +func TestPathClean(t *testing.T) { + c := qt.New(t) + + testCases := []struct { + in string + expected string + }{ + {"/path/", "/path/"}, + {"/path/./", "/path/"}, + {"/path", "/path"}, + } + + for _, tc := range testCases { + actual := pathClean(tc.in) + c.Assert(actual, qt.Equals, tc.expected) + } +} diff --git a/main_test.go b/main_test.go index a0ba278..f66ae51 100644 --- a/main_test.go +++ b/main_test.go @@ -7,6 +7,7 @@ package main import ( "bytes" + "context" "fmt" "net/http" "os" @@ -14,6 +15,9 @@ import ( "strings" "testing" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/oklog/ulid/v2" "github.com/rogpeppe/go-internal/testscript" @@ -24,7 +28,6 @@ const s3IntegrationTestHttpRoot = "http://s3deployintegrationtest.s3-website.eu- func TestIntegration(t *testing.T) { if os.Getenv("S3DEPLOY_TEST_KEY") == "" { t.Skip("S3DEPLOY_TEST_KEY not set") - } p := commonTestScriptsParam p.Dir = "testscripts" @@ -39,7 +42,6 @@ func TestUnfinished(t *testing.T) { p := commonTestScriptsParam p.Dir = "testscripts/unfinished" testscript.Run(t, p) - } func TestMain(m *testing.M) { @@ -57,21 +59,75 @@ func TestMain(m *testing.M) { ) } +const ( + testBucket = "s3deployintegrationtest" + testRegion = "eu-north-1" +) + func setup(env *testscript.Env) error { env.Setenv("S3DEPLOY_TEST_KEY", os.Getenv("S3DEPLOY_TEST_KEY")) env.Setenv("S3DEPLOY_TEST_SECRET", os.Getenv("S3DEPLOY_TEST_SECRET")) - env.Setenv("S3DEPLOY_TEST_BUCKET", "s3deployintegrationtest") - env.Setenv("S3DEPLOY_TEST_REGION", "eu-north-1") + env.Setenv("S3DEPLOY_TEST_BUCKET", testBucket) + env.Setenv("S3DEPLOY_TEST_REGION", testRegion) env.Setenv("S3DEPLOY_TEST_URL", s3IntegrationTestHttpRoot) env.Setenv("S3DEPLOY_TEST_ID", strings.ToLower(ulid.Make().String())) return nil } +func gtKeySecret(ts *testscript.TestScript) (string, string) { + key := ts.Getenv("S3DEPLOY_TEST_KEY") + secret := ts.Getenv("S3DEPLOY_TEST_SECRET") + if key == "" || secret == "" { + ts.Fatalf("S3DEPLOY_TEST_KEY and S3DEPLOY_TEST_SECRET must be set") + } + return key, secret +} + var commonTestScriptsParam = testscript.Params{ Setup: func(env *testscript.Env) error { return setup(env) }, Cmds: map[string]func(ts *testscript.TestScript, neg bool, args []string){ + "s3get": func(ts *testscript.TestScript, neg bool, args []string) { + key := args[0] + testKey, testSecret := gtKeySecret(ts) + config := aws.Config{ + Region: testRegion, + Credentials: credentials.NewStaticCredentialsProvider(testKey, testSecret, os.Getenv("AWS_SESSION_TOKEN")), + } + + client := s3.NewFromConfig(config) + + obj, err := client.GetObject( + context.Background(), + &s3.GetObjectInput{ + Bucket: aws.String(testBucket), + Key: aws.String(key), + }, + ) + if err != nil { + ts.Fatalf("failed to get object: %v", err) + } + defer obj.Body.Close() + var buf bytes.Buffer + if _, err := buf.ReadFrom(obj.Body); err != nil { + ts.Fatalf("failed to read object: %v", err) + } + var ( + contentEncoding string + contentType string + ) + if obj.ContentEncoding != nil { + contentEncoding = *obj.ContentEncoding + } + if obj.ContentType != nil { + contentType = *obj.ContentType + } + fmt.Fprintf(ts.Stdout(), "s3get %s: ContentEncoding: %s ContentType: %s %s\n", key, contentEncoding, contentType, buf.String()) + for k, v := range obj.Metadata { + fmt.Fprintf(ts.Stdout(), "s3get metadata: %s: %s\n", k, v) + } + }, // head executes HTTP HEAD on the given URL and prints the response status code and // headers to stdout. @@ -91,7 +147,6 @@ var commonTestScriptsParam = testscript.Params{ } sort.Strings(headers) fmt.Fprintf(ts.Stdout(), "Headers: %s", strings.Join(headers, ";")) - }, // append appends to a file with a leaading newline. diff --git a/testscripts/flag_strip-index-html.txt b/testscripts/flag_strip-index-html.txt new file mode 100644 index 0000000..20b87c4 --- /dev/null +++ b/testscripts/flag_strip-index-html.txt @@ -0,0 +1,27 @@ +env AWS_ACCESS_KEY_ID=$S3DEPLOY_TEST_KEY +env AWS_SECRET_ACCESS_KEY=$S3DEPLOY_TEST_SECRET + +s3deploy -bucket $S3DEPLOY_TEST_BUCKET -region $S3DEPLOY_TEST_REGION -path $S3DEPLOY_TEST_ID -acl 'public-read' -source=public/ -strip-index-html + +stdout 'Deleted 0 of 0, uploaded 3, skipped 0.*100% changed' +stdout 'foo/ \(not found\) ↑ index.html \(not found\)' + +head /$S3DEPLOY_TEST_ID/index.html +stdout 'Status: 200' +s3get $S3DEPLOY_TEST_ID/foo/ +stdout 's3get.*/foo/:.*ContentType: text/html.*foo' +s3get $S3DEPLOY_TEST_ID/bar/ +stdout 's3get.*/bar/:.*ContentType: text/html.*bar' +s3get $S3DEPLOY_TEST_ID/index.html +stdout 's3get.*/index.html:.*ContentType: text/html.*root' + +# Repeat the same command without any changes +s3deploy -bucket $S3DEPLOY_TEST_BUCKET -region $S3DEPLOY_TEST_REGION -path $S3DEPLOY_TEST_ID -source=public/ -strip-index-html +stdout 'uploaded 0.*\(0% changed' + +-- public/index.html -- +root +-- public/foo/index.html -- +deliberately no HTML in foo +-- public/bar/index.html -- +

bar

\ No newline at end of file