diff --git a/README.md b/README.md index 54461bff..b06c03b5 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ Places the following files in the destination: #### Parameters -*None.* +* `unpack`: *Optional.* If true and the file is an archive (tar, gzipped tar, other gzipped file, or zip), unpack the file. Gzipped tarballs will be both ungzipped and untarred. ### `out`: Upload an object to the bucket. diff --git a/in/archive.go b/in/archive.go new file mode 100644 index 00000000..08e5af4e --- /dev/null +++ b/in/archive.go @@ -0,0 +1,76 @@ +package in + +import ( + "bufio" + "errors" + "fmt" + "io" + "os" + "os/exec" + "strings" + + "bitbucket.org/taruti/mimemagic" +) + +var archiveMimetypes = []string{ + "application/x-gzip", + "application/gzip", + "application/x-tar", + "application/zip", +} + +func mimetype(r *bufio.Reader) (string, error) { + bs, err := r.Peek(512) + if err != nil && err != io.EOF { + return "", err + } + + if len(bs) == 0 { + return "", errors.New("cannot determine mimetype from empty bytes") + } + + return mimemagic.Match("", bs), nil +} + +func archiveMimetype(filename string) string { + f, err := os.Open(filename) + if err != nil { + return "" + } + defer f.Close() + + mime, err := mimetype(bufio.NewReader(f)) + if err != nil { + return "" + } + + for i := range archiveMimetypes { + if strings.HasPrefix(mime, archiveMimetypes[i]) { + return archiveMimetypes[i] + } + } + + return "" +} + +func inflate(mime, path, destination string) error { + var cmd *exec.Cmd + + switch mime { + case "application/zip": + cmd = exec.Command("unzip", "-P", "", "-d", destination, path) + defer os.Remove(path) + + case "application/x-tar": + cmd = exec.Command("tar", "xf", path, "-C", destination) + defer os.Remove(path) + + case "application/gzip", "application/x-gzip": + cmd = exec.Command("gunzip", path) + + default: + return fmt.Errorf("don't know how to extract %s", mime) + } + + return cmd.Run() +} diff --git a/in/in_command.go b/in/in_command.go index df8ec203..c7846aad 100644 --- a/in/in_command.go +++ b/in/in_command.go @@ -45,106 +45,86 @@ func (command *InCommand) Run(destinationDir string, request InRequest) (InRespo return InResponse{}, errors.New(message) } - err := command.createDirectory(destinationDir) + err := os.MkdirAll(destinationDir, 0755) if err != nil { return InResponse{}, err } - if request.Source.Regexp != "" { - return command.inByRegex(destinationDir, request) - } else { - return command.inByVersionedFile(destinationDir, request) - } -} + var remotePath string + var versionNumber string + var versionID string -func (command *InCommand) inByRegex(destinationDir string, request InRequest) (InResponse, error) { - if request.Version.Path == "" { - return InResponse{}, ErrMissingPath - } + if request.Source.Regexp != "" { + if request.Version.Path == "" { + return InResponse{}, ErrMissingPath + } - remotePath := request.Version.Path + remotePath = request.Version.Path - extraction, ok := versions.Extract(remotePath, request.Source.Regexp) - if !ok { - return InResponse{}, fmt.Errorf("regex does not match provided version: %#v", request.Version) + extraction, ok := versions.Extract(remotePath, request.Source.Regexp) + if !ok { + return InResponse{}, fmt.Errorf("regex does not match provided version: %#v", request.Version) + } - } - - err := command.writeVersionFile(extraction.VersionNumber, destinationDir) - if err != nil { - return InResponse{}, err + versionNumber = extraction.VersionNumber + } else { + remotePath = request.Source.VersionedFile + versionNumber = request.Version.VersionID + versionID = request.Version.VersionID } err = command.downloadFile( request.Source.Bucket, remotePath, - "", + versionID, destinationDir, path.Base(remotePath), ) - if err != nil { - return InResponse{}, err - } - url := command.urlProvider.GetURL(request, remotePath) - err = command.writeURLFile( - destinationDir, - url, - ) if err != nil { return InResponse{}, err } - return InResponse{ - Version: s3resource.Version{ - Path: remotePath, - }, - Metadata: command.metadata(remotePath, request.Source.Private, url), - }, nil -} - -func (command *InCommand) inByVersionedFile(destinationDir string, request InRequest) (InResponse, error) { + if request.Params.Unpack { + destinationPath := filepath.Join(destinationDir, path.Base(remotePath)) + mime := archiveMimetype(destinationPath) + if mime == "" { + return InResponse{}, fmt.Errorf("not an archive: %s", destinationPath) + } + + err = extractArchive(mime, destinationPath) + if err != nil { + return InResponse{}, err + } + } - err := command.writeVersionFile(request.Version.VersionID, destinationDir) - if err != nil { + url := command.urlProvider.GetURL(request, remotePath) + if err = command.writeURLFile(destinationDir, url); err != nil { return InResponse{}, err } - remotePath := request.Source.VersionedFile - - err = command.downloadFile( - request.Source.Bucket, - remotePath, - request.Version.VersionID, - destinationDir, - path.Base(remotePath), - ) - + err = command.writeVersionFile(versionNumber, destinationDir) if err != nil { return InResponse{}, err } - url := command.urlProvider.GetURL(request, remotePath) - err = command.writeURLFile( - destinationDir, - url, - ) + metadata := command.metadata(remotePath, request.Source.Private, url) - if err != nil { - return InResponse{}, err + if versionID == "" { + return InResponse{ + Version: s3resource.Version{ + Path: remotePath, + }, + Metadata: metadata, + }, nil } return InResponse{ Version: s3resource.Version{ - VersionID: request.Version.VersionID, + VersionID: versionID, }, - Metadata: command.metadata(remotePath, request.Source.Private, url), + Metadata: metadata, }, nil - -} - -func (command *InCommand) createDirectory(destDir string) error { - return os.MkdirAll(destDir, 0755) } func (command *InCommand) writeURLFile(destDir string, url string) error { @@ -185,3 +165,34 @@ func (command *InCommand) metadata(remotePath string, private bool, url string) return metadata } + +func extractArchive(mime, filename string) error { + destDir := filepath.Dir(filename) + + err := inflate(mime, filename, destDir) + if err != nil { + return fmt.Errorf("failed to extract archive: %s", err) + } + + if mime == "application/gzip" || mime == "application/x-gzip" { + fileInfos, err := ioutil.ReadDir(destDir) + if err != nil { + return fmt.Errorf("failed to read dir: %s", err) + } + + if len(fileInfos) != 1 { + return fmt.Errorf("%d files found after gunzip; expected 1", len(fileInfos)) + } + + filename = filepath.Join(destDir, fileInfos[0].Name()) + mime = archiveMimetype(filename) + if mime == "application/x-tar" { + err = inflate(mime, filename, destDir) + if err != nil { + return fmt.Errorf("failed to extract archive: %s", err) + } + } + } + + return nil +} diff --git a/in/in_command_test.go b/in/in_command_test.go index ac7db481..2b75b838 100644 --- a/in/in_command_test.go +++ b/in/in_command_test.go @@ -1,9 +1,16 @@ package in_test import ( + "archive/tar" + "archive/zip" + "compress/gzip" + "io" "io/ioutil" + "log" "os" + "path" "path/filepath" + "strings" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" @@ -34,10 +41,10 @@ var _ = Describe("In Command", func() { request = InRequest{ Source: s3resource.Source{ Bucket: "bucket-name", - Regexp: "files/a-file-(.*).tgz", + Regexp: "files/a-file-(.*)", }, Version: s3resource.Version{ - Path: "files/a-file-1.3.tgz", + Path: "files/a-file-1.3", }, } @@ -74,7 +81,7 @@ var _ = Describe("In Command", func() { Context("when there is an existing version in the request", func() { BeforeEach(func() { - request.Version.Path = "files/a-file-1.3.tgz" + request.Version.Path = "files/a-file-1.3" }) It("downloads the existing version of the file", func() { @@ -85,9 +92,9 @@ var _ = Describe("In Command", func() { bucketName, remotePath, versionID, localPath := s3client.DownloadFileArgsForCall(0) Ω(bucketName).Should(Equal("bucket-name")) - Ω(remotePath).Should(Equal("files/a-file-1.3.tgz")) + Ω(remotePath).Should(Equal("files/a-file-1.3")) Ω(versionID).Should(BeEmpty()) - Ω(localPath).Should(Equal(filepath.Join(destDir, "a-file-1.3.tgz"))) + Ω(localPath).Should(Equal(filepath.Join(destDir, "a-file-1.3"))) }) It("creates a 'url' file that contains the URL", func() { @@ -104,7 +111,7 @@ var _ = Describe("In Command", func() { bucketName, remotePath, private, versionID := s3client.URLArgsForCall(0) Ω(bucketName).Should(Equal("bucket-name")) - Ω(remotePath).Should(Equal("files/a-file-1.3.tgz")) + Ω(remotePath).Should(Equal("files/a-file-1.3")) Ω(private).Should(Equal(false)) Ω(versionID).Should(BeEmpty()) }) @@ -129,7 +136,7 @@ var _ = Describe("In Command", func() { Ω(s3client.URLCallCount()).Should(Equal(1)) bucketName, remotePath, private, versionID := s3client.URLArgsForCall(0) Ω(bucketName).Should(Equal("bucket-name")) - Ω(remotePath).Should(Equal("files/a-file-1.3.tgz")) + Ω(remotePath).Should(Equal("files/a-file-1.3")) Ω(private).Should(Equal(true)) Ω(versionID).Should(BeEmpty()) }) @@ -153,7 +160,7 @@ var _ = Describe("In Command", func() { response, err := command.Run(destDir, request) Ω(err).ShouldNot(HaveOccurred()) - Ω(response.Version.Path).Should(Equal("files/a-file-1.3.tgz")) + Ω(response.Version.Path).Should(Equal("files/a-file-1.3")) }) It("has metadata about the file", func() { @@ -161,7 +168,7 @@ var _ = Describe("In Command", func() { Ω(err).ShouldNot(HaveOccurred()) Ω(response.Metadata[0].Name).Should(Equal("filename")) - Ω(response.Metadata[0].Value).Should(Equal("a-file-1.3.tgz")) + Ω(response.Metadata[0].Value).Should(Equal("a-file-1.3")) Ω(response.Metadata[1].Name).Should(Equal("url")) Ω(response.Metadata[1].Value).Should(Equal("http://google.com")) @@ -192,8 +199,294 @@ var _ = Describe("In Command", func() { _, err := command.Run(destDir, request) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("regex does not match provided version")) - Expect(err.Error()).To(ContainSubstring("files/a-file-1.3.tgz")) + Expect(err.Error()).To(ContainSubstring("files/a-file-1.3")) + }) + }) + + Context("when params is configured to unpack the file", func() { + BeforeEach(func() { + request.Params.Unpack = true + }) + + Context("when the file is a tarball", func() { + BeforeEach(func() { + s3client.DownloadFileStub = func(bucketName string, remotePath string, versionID string, localPath string) error { + src := filepath.Join(tmpPath, "some-file") + + err := ioutil.WriteFile(src, []byte("some-contents"), os.ModePerm) + Expect(err).NotTo(HaveOccurred()) + + err = createTarball([]string{src}, tmpPath, localPath) + Expect(err).NotTo(HaveOccurred()) + + _, err = os.Stat(localPath) + Expect(err).NotTo(HaveOccurred()) + + return nil + } + }) + + It("extracts the tarball", func() { + _, err := command.Run(destDir, request) + Expect(err).NotTo(HaveOccurred()) + + bs, err := ioutil.ReadFile(filepath.Join(destDir, "some-file")) + Expect(err).NotTo(HaveOccurred()) + + Expect(bs).To(Equal([]byte("some-contents"))) + }) + }) + + Context("when the file is a zip", func() { + BeforeEach(func() { + s3client.DownloadFileStub = func(bucketName string, remotePath string, versionID string, localPath string) error { + inDir, err := ioutil.TempDir(tmpPath, "zip-dir") + Expect(err).NotTo(HaveOccurred()) + + err = ioutil.WriteFile(path.Join(inDir, "some-file"), []byte("some-contents"), os.ModePerm) + Expect(err).NotTo(HaveOccurred()) + + err = zipit(path.Join(inDir, "/"), localPath, "") + Expect(err).NotTo(HaveOccurred()) + + return nil + } + }) + + It("unzips the zip", func() { + _, err := command.Run(destDir, request) + Expect(err).NotTo(HaveOccurred()) + + bs, err := ioutil.ReadFile(filepath.Join(destDir, "some-file")) + Expect(err).NotTo(HaveOccurred()) + + Expect(bs).To(Equal([]byte("some-contents"))) + }) + }) + + Context("when the file is gzipped", func() { + BeforeEach(func() { + request.Version.Path = "files/a-file-1.3.gz" + request.Source.Regexp = "files/a-file-(.*).gz" + + s3client.DownloadFileStub = func(bucketName string, remotePath string, versionID string, localPath string) error { + f, err := os.Create(localPath) + Expect(err).NotTo(HaveOccurred()) + + zw := gzip.NewWriter(f) + + _, err = zw.Write([]byte("some-contents")) + Expect(err).NotTo(HaveOccurred()) + + Expect(zw.Close()).NotTo(HaveOccurred()) + Expect(f.Close()).NotTo(HaveOccurred()) + + return nil + } + }) + + It("gunzips the gzip", func() { + _, err := command.Run(destDir, request) + Expect(err).NotTo(HaveOccurred()) + + bs, err := ioutil.ReadFile(filepath.Join(destDir, "a-file-1.3")) + Expect(err).NotTo(HaveOccurred()) + + Expect(string(bs)).To(Equal("some-contents")) + }) + }) + + Context("when the file is a gzipped tarball", func() { + BeforeEach(func() { + request.Version.Path = "files/a-file-1.3.tgz" + request.Source.Regexp = "files/a-file-(.*).tgz" + + s3client.DownloadFileStub = func(bucketName string, remotePath string, versionID string, localPath string) error { + err := os.MkdirAll(filepath.Join(tmpPath, "some-dir"), os.ModePerm) + Expect(err).NotTo(HaveOccurred()) + + someFile1 := filepath.Join(tmpPath, "some-dir", "some-file") + + err = ioutil.WriteFile(someFile1, []byte("some-contents"), os.ModePerm) + Expect(err).NotTo(HaveOccurred()) + + someFile2 := filepath.Join(tmpPath, "some-file") + + err = ioutil.WriteFile(someFile2, []byte("some-other-contents"), os.ModePerm) + Expect(err).NotTo(HaveOccurred()) + + tarPath := filepath.Join(tmpPath, "some-tar") + err = createTarball([]string{someFile1, someFile2}, tmpPath, tarPath) + Expect(err).NotTo(HaveOccurred()) + + _, err = os.Stat(tarPath) + Expect(err).NotTo(HaveOccurred()) + + tarf, err := os.Open(tarPath) + Expect(err).NotTo(HaveOccurred()) + + f, err := os.Create(localPath) + Expect(err).NotTo(HaveOccurred()) + + zw := gzip.NewWriter(f) + + _, err = io.Copy(zw, tarf) + Expect(err).NotTo(HaveOccurred()) + + Expect(zw.Close()).NotTo(HaveOccurred()) + Expect(f.Close()).NotTo(HaveOccurred()) + + return nil + } + }) + + It("extracts the gzipped tarball", func() { + _, err := command.Run(destDir, request) + Expect(err).NotTo(HaveOccurred()) + + Expect(filepath.Join(destDir, "some-dir", "some-file")).To(BeARegularFile()) + + bs, err := ioutil.ReadFile(filepath.Join(destDir, "some-dir", "some-file")) + Expect(err).NotTo(HaveOccurred()) + Expect(bs).To(Equal([]byte("some-contents"))) + + bs, err = ioutil.ReadFile(filepath.Join(destDir, "some-file")) + Expect(err).NotTo(HaveOccurred()) + Expect(bs).To(Equal([]byte("some-other-contents"))) + }) + }) + + Context("when the file is not an archive", func() { + BeforeEach(func() { + s3client.DownloadFileStub = func(bucketName string, remotePath string, versionID string, localPath string) error { + err := ioutil.WriteFile(localPath, []byte("some-contents"), os.ModePerm) + Expect(err).NotTo(HaveOccurred()) + + return nil + } + }) + + It("returns an error", func() { + _, err := command.Run(destDir, request) + Expect(err).To(HaveOccurred()) + }) }) }) }) }) + +func addFileToTar(tw *tar.Writer, tarPath, path string) error { + file, err := os.Open(path) + if err != nil { + return err + } + + stat, err := file.Stat() + if err != nil { + return err + } + + err = tw.WriteHeader(&tar.Header{ + Name: tarPath, + Size: stat.Size(), + Mode: int64(stat.Mode()), + ModTime: stat.ModTime(), + }) + if err != nil { + return err + } + + _, err = io.Copy(tw, file) + if err != nil { + return err + } + + return file.Close() +} + +func createTarball(paths []string, basePath string, destination string) error { + file, err := os.Create(destination) + if err != nil { + log.Fatalln(err) + } + + tw := tar.NewWriter(file) + + for _, path := range paths { + tarPath, err := filepath.Rel(basePath, path) + if err != nil { + return err + } + err = addFileToTar(tw, tarPath, path) + if err != nil { + return err + } + } + + err = tw.Close() + if err != nil { + return err + } + + return file.Close() +} + +// Thanks to Svett Ralchev +// http://blog.ralch.com/tutorial/golang-working-with-zip/ +func zipit(source, target, prefix string) error { + zipfile, err := os.Create(target) + if err != nil { + return err + } + + archive := zip.NewWriter(zipfile) + + err = filepath.Walk(source, func(path string, info os.FileInfo, err error) error { + if path == source { + return nil + } + + if err != nil { + return err + } + + header, err := zip.FileInfoHeader(info) + if err != nil { + return err + } + + header.Name = strings.TrimPrefix(path, source+string(os.PathSeparator)) + + if info.IsDir() { + header.Name += string(os.PathSeparator) + } else { + header.Method = zip.Deflate + } + + writer, err := archive.CreateHeader(header) + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + + file, err := os.Open(path) + if err != nil { + return err + } + + if _, err = io.Copy(writer, file); err != nil { + return err + } + + return file.Close() + }) + + if err = archive.Close(); err != nil { + return err + } + + return zipfile.Close() +} diff --git a/in/models.go b/in/models.go index 1aaf5b69..c5ef98e9 100644 --- a/in/models.go +++ b/in/models.go @@ -5,6 +5,11 @@ import "github.com/concourse/s3-resource" type InRequest struct { Source s3resource.Source `json:"source"` Version s3resource.Version `json:"version"` + Params Params `json:"params"` +} + +type Params struct { + Unpack bool `json:"unpack"` } type InResponse struct {