From 11f7a7fa1d7b0b1302e62a7aa8c2dea1458ad6a6 Mon Sep 17 00:00:00 2001 From: Kris Hicks Date: Thu, 29 Jun 2017 12:06:44 -0700 Subject: [PATCH] Add unpack option to InRequest params If the InRequest is configured with `Params{Unpack: true}`, unpack the archive after downloading it. The original archive is not stored. In other words, if you have an archive, `some-archive.tgz`, with a file `some-file` in it, the destination directory after downloading and unpacking will contain just `some-file`. --- README.md | 2 +- in/archive.go | 76 +++++++++++ in/in_command.go | 54 +++++++- in/in_command_test.go | 293 ++++++++++++++++++++++++++++++++++++++++++ in/models.go | 5 + 5 files changed, 424 insertions(+), 6 deletions(-) create mode 100644 in/archive.go diff --git a/README.md b/README.md index 54461bff..b06c03b5 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ Places the following files in the destination: #### Parameters -*None.* +* `unpack`: *Optional.* If true and the file is an archive (tar, gzipped tar, other gzipped file, or zip), unpack the file. Gzipped tarballs will be both ungzipped and untarred. ### `out`: Upload an object to the bucket. diff --git a/in/archive.go b/in/archive.go new file mode 100644 index 00000000..08e5af4e --- /dev/null +++ b/in/archive.go @@ -0,0 +1,76 @@ +package in + +import ( + "bufio" + "errors" + "fmt" + "io" + "os" + "os/exec" + "strings" + + "bitbucket.org/taruti/mimemagic" +) + +var archiveMimetypes = []string{ + "application/x-gzip", + "application/gzip", + "application/x-tar", + "application/zip", +} + +func mimetype(r *bufio.Reader) (string, error) { + bs, err := r.Peek(512) + if err != nil && err != io.EOF { + return "", err + } + + if len(bs) == 0 { + return "", errors.New("cannot determine mimetype from empty bytes") + } + + return mimemagic.Match("", bs), nil +} + +func archiveMimetype(filename string) string { + f, err := os.Open(filename) + if err != nil { + return "" + } + defer f.Close() + + mime, err := mimetype(bufio.NewReader(f)) + if err != nil { + return "" + } + + for i := range archiveMimetypes { + if strings.HasPrefix(mime, archiveMimetypes[i]) { + return archiveMimetypes[i] + } + } + + return "" +} + +func inflate(mime, path, destination string) error { + var cmd *exec.Cmd + + switch mime { + case "application/zip": + cmd = exec.Command("unzip", "-P", "", "-d", destination, path) + defer os.Remove(path) + + case "application/x-tar": + cmd = exec.Command("tar", "xf", path, "-C", destination) + defer os.Remove(path) + + case "application/gzip", "application/x-gzip": + cmd = exec.Command("gunzip", path) + + default: + return fmt.Errorf("don't know how to extract %s", mime) + } + + return cmd.Run() +} diff --git a/in/in_command.go b/in/in_command.go index f5c28d80..c7846aad 100644 --- a/in/in_command.go +++ b/in/in_command.go @@ -73,11 +73,6 @@ func (command *InCommand) Run(destinationDir string, request InRequest) (InRespo versionID = request.Version.VersionID } - err = command.writeVersionFile(versionNumber, destinationDir) - if err != nil { - return InResponse{}, err - } - err = command.downloadFile( request.Source.Bucket, remotePath, @@ -90,11 +85,29 @@ func (command *InCommand) Run(destinationDir string, request InRequest) (InRespo return InResponse{}, err } + if request.Params.Unpack { + destinationPath := filepath.Join(destinationDir, path.Base(remotePath)) + mime := archiveMimetype(destinationPath) + if mime == "" { + return InResponse{}, fmt.Errorf("not an archive: %s", destinationPath) + } + + err = extractArchive(mime, destinationPath) + if err != nil { + return InResponse{}, err + } + } + url := command.urlProvider.GetURL(request, remotePath) if err = command.writeURLFile(destinationDir, url); err != nil { return InResponse{}, err } + err = command.writeVersionFile(versionNumber, destinationDir) + if err != nil { + return InResponse{}, err + } + metadata := command.metadata(remotePath, request.Source.Private, url) if versionID == "" { @@ -152,3 +165,34 @@ func (command *InCommand) metadata(remotePath string, private bool, url string) return metadata } + +func extractArchive(mime, filename string) error { + destDir := filepath.Dir(filename) + + err := inflate(mime, filename, destDir) + if err != nil { + return fmt.Errorf("failed to extract archive: %s", err) + } + + if mime == "application/gzip" || mime == "application/x-gzip" { + fileInfos, err := ioutil.ReadDir(destDir) + if err != nil { + return fmt.Errorf("failed to read dir: %s", err) + } + + if len(fileInfos) != 1 { + return fmt.Errorf("%d files found after gunzip; expected 1", len(fileInfos)) + } + + filename = filepath.Join(destDir, fileInfos[0].Name()) + mime = archiveMimetype(filename) + if mime == "application/x-tar" { + err = inflate(mime, filename, destDir) + if err != nil { + return fmt.Errorf("failed to extract archive: %s", err) + } + } + } + + return nil +} diff --git a/in/in_command_test.go b/in/in_command_test.go index 69196845..2b75b838 100644 --- a/in/in_command_test.go +++ b/in/in_command_test.go @@ -1,9 +1,16 @@ package in_test import ( + "archive/tar" + "archive/zip" + "compress/gzip" + "io" "io/ioutil" + "log" "os" + "path" "path/filepath" + "strings" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" @@ -195,5 +202,291 @@ var _ = Describe("In Command", func() { Expect(err.Error()).To(ContainSubstring("files/a-file-1.3")) }) }) + + Context("when params is configured to unpack the file", func() { + BeforeEach(func() { + request.Params.Unpack = true + }) + + Context("when the file is a tarball", func() { + BeforeEach(func() { + s3client.DownloadFileStub = func(bucketName string, remotePath string, versionID string, localPath string) error { + src := filepath.Join(tmpPath, "some-file") + + err := ioutil.WriteFile(src, []byte("some-contents"), os.ModePerm) + Expect(err).NotTo(HaveOccurred()) + + err = createTarball([]string{src}, tmpPath, localPath) + Expect(err).NotTo(HaveOccurred()) + + _, err = os.Stat(localPath) + Expect(err).NotTo(HaveOccurred()) + + return nil + } + }) + + It("extracts the tarball", func() { + _, err := command.Run(destDir, request) + Expect(err).NotTo(HaveOccurred()) + + bs, err := ioutil.ReadFile(filepath.Join(destDir, "some-file")) + Expect(err).NotTo(HaveOccurred()) + + Expect(bs).To(Equal([]byte("some-contents"))) + }) + }) + + Context("when the file is a zip", func() { + BeforeEach(func() { + s3client.DownloadFileStub = func(bucketName string, remotePath string, versionID string, localPath string) error { + inDir, err := ioutil.TempDir(tmpPath, "zip-dir") + Expect(err).NotTo(HaveOccurred()) + + err = ioutil.WriteFile(path.Join(inDir, "some-file"), []byte("some-contents"), os.ModePerm) + Expect(err).NotTo(HaveOccurred()) + + err = zipit(path.Join(inDir, "/"), localPath, "") + Expect(err).NotTo(HaveOccurred()) + + return nil + } + }) + + It("unzips the zip", func() { + _, err := command.Run(destDir, request) + Expect(err).NotTo(HaveOccurred()) + + bs, err := ioutil.ReadFile(filepath.Join(destDir, "some-file")) + Expect(err).NotTo(HaveOccurred()) + + Expect(bs).To(Equal([]byte("some-contents"))) + }) + }) + + Context("when the file is gzipped", func() { + BeforeEach(func() { + request.Version.Path = "files/a-file-1.3.gz" + request.Source.Regexp = "files/a-file-(.*).gz" + + s3client.DownloadFileStub = func(bucketName string, remotePath string, versionID string, localPath string) error { + f, err := os.Create(localPath) + Expect(err).NotTo(HaveOccurred()) + + zw := gzip.NewWriter(f) + + _, err = zw.Write([]byte("some-contents")) + Expect(err).NotTo(HaveOccurred()) + + Expect(zw.Close()).NotTo(HaveOccurred()) + Expect(f.Close()).NotTo(HaveOccurred()) + + return nil + } + }) + + It("gunzips the gzip", func() { + _, err := command.Run(destDir, request) + Expect(err).NotTo(HaveOccurred()) + + bs, err := ioutil.ReadFile(filepath.Join(destDir, "a-file-1.3")) + Expect(err).NotTo(HaveOccurred()) + + Expect(string(bs)).To(Equal("some-contents")) + }) + }) + + Context("when the file is a gzipped tarball", func() { + BeforeEach(func() { + request.Version.Path = "files/a-file-1.3.tgz" + request.Source.Regexp = "files/a-file-(.*).tgz" + + s3client.DownloadFileStub = func(bucketName string, remotePath string, versionID string, localPath string) error { + err := os.MkdirAll(filepath.Join(tmpPath, "some-dir"), os.ModePerm) + Expect(err).NotTo(HaveOccurred()) + + someFile1 := filepath.Join(tmpPath, "some-dir", "some-file") + + err = ioutil.WriteFile(someFile1, []byte("some-contents"), os.ModePerm) + Expect(err).NotTo(HaveOccurred()) + + someFile2 := filepath.Join(tmpPath, "some-file") + + err = ioutil.WriteFile(someFile2, []byte("some-other-contents"), os.ModePerm) + Expect(err).NotTo(HaveOccurred()) + + tarPath := filepath.Join(tmpPath, "some-tar") + err = createTarball([]string{someFile1, someFile2}, tmpPath, tarPath) + Expect(err).NotTo(HaveOccurred()) + + _, err = os.Stat(tarPath) + Expect(err).NotTo(HaveOccurred()) + + tarf, err := os.Open(tarPath) + Expect(err).NotTo(HaveOccurred()) + + f, err := os.Create(localPath) + Expect(err).NotTo(HaveOccurred()) + + zw := gzip.NewWriter(f) + + _, err = io.Copy(zw, tarf) + Expect(err).NotTo(HaveOccurred()) + + Expect(zw.Close()).NotTo(HaveOccurred()) + Expect(f.Close()).NotTo(HaveOccurred()) + + return nil + } + }) + + It("extracts the gzipped tarball", func() { + _, err := command.Run(destDir, request) + Expect(err).NotTo(HaveOccurred()) + + Expect(filepath.Join(destDir, "some-dir", "some-file")).To(BeARegularFile()) + + bs, err := ioutil.ReadFile(filepath.Join(destDir, "some-dir", "some-file")) + Expect(err).NotTo(HaveOccurred()) + Expect(bs).To(Equal([]byte("some-contents"))) + + bs, err = ioutil.ReadFile(filepath.Join(destDir, "some-file")) + Expect(err).NotTo(HaveOccurred()) + Expect(bs).To(Equal([]byte("some-other-contents"))) + }) + }) + + Context("when the file is not an archive", func() { + BeforeEach(func() { + s3client.DownloadFileStub = func(bucketName string, remotePath string, versionID string, localPath string) error { + err := ioutil.WriteFile(localPath, []byte("some-contents"), os.ModePerm) + Expect(err).NotTo(HaveOccurred()) + + return nil + } + }) + + It("returns an error", func() { + _, err := command.Run(destDir, request) + Expect(err).To(HaveOccurred()) + }) + }) + }) }) }) + +func addFileToTar(tw *tar.Writer, tarPath, path string) error { + file, err := os.Open(path) + if err != nil { + return err + } + + stat, err := file.Stat() + if err != nil { + return err + } + + err = tw.WriteHeader(&tar.Header{ + Name: tarPath, + Size: stat.Size(), + Mode: int64(stat.Mode()), + ModTime: stat.ModTime(), + }) + if err != nil { + return err + } + + _, err = io.Copy(tw, file) + if err != nil { + return err + } + + return file.Close() +} + +func createTarball(paths []string, basePath string, destination string) error { + file, err := os.Create(destination) + if err != nil { + log.Fatalln(err) + } + + tw := tar.NewWriter(file) + + for _, path := range paths { + tarPath, err := filepath.Rel(basePath, path) + if err != nil { + return err + } + err = addFileToTar(tw, tarPath, path) + if err != nil { + return err + } + } + + err = tw.Close() + if err != nil { + return err + } + + return file.Close() +} + +// Thanks to Svett Ralchev +// http://blog.ralch.com/tutorial/golang-working-with-zip/ +func zipit(source, target, prefix string) error { + zipfile, err := os.Create(target) + if err != nil { + return err + } + + archive := zip.NewWriter(zipfile) + + err = filepath.Walk(source, func(path string, info os.FileInfo, err error) error { + if path == source { + return nil + } + + if err != nil { + return err + } + + header, err := zip.FileInfoHeader(info) + if err != nil { + return err + } + + header.Name = strings.TrimPrefix(path, source+string(os.PathSeparator)) + + if info.IsDir() { + header.Name += string(os.PathSeparator) + } else { + header.Method = zip.Deflate + } + + writer, err := archive.CreateHeader(header) + if err != nil { + return err + } + + if info.IsDir() { + return nil + } + + file, err := os.Open(path) + if err != nil { + return err + } + + if _, err = io.Copy(writer, file); err != nil { + return err + } + + return file.Close() + }) + + if err = archive.Close(); err != nil { + return err + } + + return zipfile.Close() +} diff --git a/in/models.go b/in/models.go index 1aaf5b69..c5ef98e9 100644 --- a/in/models.go +++ b/in/models.go @@ -5,6 +5,11 @@ import "github.com/concourse/s3-resource" type InRequest struct { Source s3resource.Source `json:"source"` Version s3resource.Version `json:"version"` + Params Params `json:"params"` +} + +type Params struct { + Unpack bool `json:"unpack"` } type InResponse struct {