diff --git a/cmd/cli/cli/arch_hdlr.go b/cmd/cli/cli/arch_hdlr.go index 0b8208a0b0..b5086ecb57 100644 --- a/cmd/cli/cli/arch_hdlr.go +++ b/cmd/cli/cli/arch_hdlr.go @@ -98,7 +98,7 @@ var ( verboseFlag, yesFlag, unitsFlag, - inclSrcDirNameFlag, + archSrcDirNameFlag, skipVerCksumFlag, continueOnErrorFlag, // TODO: revisit ), @@ -346,7 +346,7 @@ func putApndArchHandler(c *cli.Context) (err error) { } } - incl := flagIsSet(c, inclSrcDirNameFlag) + incl := flagIsSet(c, archSrcDirNameFlag) switch { case len(a.src.fdnames) > 0: // a) csv of files and/or directories (names) from the first arg, e.g. "f1[,f2...]" dst-bucket[/prefix] @@ -369,7 +369,7 @@ func putApndArchHandler(c *cli.Context) (err error) { debug.Assert(srcpath == "", srcpath) srcpath = a.pt.Prefix } - fobjs, err := lsFobj(c, srcpath, "" /*trim pref*/, a.archpath /*append pref*/, &ndir, a.src.recurs, incl) + fobjs, err := lsFobj(srcpath, "" /*trim pref*/, a.archpath /*append pref*/, &ndir, a.src.recurs, incl, false) if err != nil { return err } diff --git a/cmd/cli/cli/const.go b/cmd/cli/cli/const.go index 70b40d634a..5cf54c12df 100644 --- a/cmd/cli/cli/const.go +++ b/cmd/cli/cli/const.go @@ -935,9 +935,13 @@ var ( Name: "include-src-bck", Usage: "prefix the names of archived files with the source bucket name", } - inclSrcDirNameFlag = cli.BoolFlag{ + archSrcDirNameFlag = cli.BoolFlag{ Name: "include-src-dir", - Usage: "prefix the names of archived files with the (root) source directory (omitted by default)", + Usage: "prefix the names of archived files with the (root) source directory", + } + putSrcDirNameFlag = cli.BoolFlag{ + Name: "include-src-dir", + Usage: "prefix destination object names with the source directory", } // 'ais archive put': conditional APPEND archAppendOrPutFlag = cli.BoolFlag{ diff --git a/cmd/cli/cli/object.go b/cmd/cli/cli/object.go index 4b17eaa426..21f96a05a2 100644 --- a/cmd/cli/cli/object.go +++ b/cmd/cli/cli/object.go @@ -106,7 +106,7 @@ func verbList(c *cli.Context, wop wop, fnames []string, bck cmn.Bck, appendPref recurs = flagIsSet(c, recursFlag) ) for _, n := range fnames { - fobjs, err := lsFobj(c, n, "", appendPref, &ndir, recurs, incl) + fobjs, err := lsFobj(n, "", appendPref, &ndir, recurs, incl, false) if err != nil { return err } @@ -123,7 +123,7 @@ func verbRange(c *cli.Context, wop wop, pt *cos.ParsedTemplate, bck cmn.Bck, tri ) pt.InitIter() for n, hasNext := pt.Next(); hasNext; n, hasNext = pt.Next() { - fobjs, err := lsFobj(c, n, trimPref, appendPref, &ndir, recurs, incl) + fobjs, err := lsFobj(n, trimPref, appendPref, &ndir, recurs, incl, false) if err != nil { return err } @@ -147,7 +147,7 @@ func concatObject(c *cli.Context, bck cmn.Bck, objName string, fileNames []strin recurs = flagIsSet(c, recursFlag) ) for i, fileName := range fileNames { - fobjs, err := lsFobj(c, fileName, "", "", &ndir, recurs, false /*incl src dir*/) + fobjs, err := lsFobj(fileName, "", "", &ndir, recurs, false /*incl src dir*/, false) if err != nil { return err } diff --git a/cmd/cli/cli/object_hdlr.go b/cmd/cli/cli/object_hdlr.go index d423496b07..46383d7272 100644 --- a/cmd/cli/cli/object_hdlr.go +++ b/cmd/cli/cli/object_hdlr.go @@ -121,6 +121,7 @@ var ( concurrencyFlag, dryRunFlag, recursFlag, + putSrcDirNameFlag, verboseFlag, yesFlag, continueOnErrorFlag, @@ -328,7 +329,7 @@ func putHandler(c *cli.Context) error { } // 2. multi-file list & range - incl := flagIsSet(c, inclSrcDirNameFlag) + incl := flagIsSet(c, putSrcDirNameFlag) switch { case len(a.src.fdnames) > 0: if len(a.src.fdnames) > 1 { @@ -373,7 +374,7 @@ func putHandler(c *cli.Context) error { if ok := warnMultiSrcDstPrefix(c, &a, fmt.Sprintf("from '%s%s'", srcpath, s)); !ok { return nil } - fobjs, err := lsFobj(c, srcpath, "", a.dst.oname, &ndir, a.src.recurs, incl) + fobjs, err := lsFobj(srcpath, "", a.dst.oname, &ndir, a.src.recurs, incl, false) if err != nil { return err } diff --git a/cmd/cli/cli/walk.go b/cmd/cli/cli/walk.go index 3bfd09d164..275e8ea570 100644 --- a/cmd/cli/cli/walk.go +++ b/cmd/cli/cli/walk.go @@ -13,7 +13,6 @@ import ( "github.com/NVIDIA/aistore/cmn" "github.com/NVIDIA/aistore/cmn/cos" "github.com/NVIDIA/aistore/cmn/debug" - "github.com/urfave/cli" ) // walk locally accessible files and directories; handle file/dir matching wildcards and patterns @@ -102,64 +101,83 @@ func listRecurs(path, trimPref, appendPref, pattern string) (fobjs, error) { // - source path that may contain wildcard(s) // - (trimPref, appendPref) combo to influence destination naming // - recursive, etc. -// Returns: +// OUT: // - a slice of matching triplets: {source fname or dirname, destination name, size in bytes} -func lsFobj(c *cli.Context, path, trimPref, appendPref string, ndir *int, recurs, incl bool) (fobjs, error) { - var ( - pattern = cos.WildcardMatchAll // default pattern: entire directory - finfo, err = os.Stat(path) - ) - debug.Assert(trimPref == "" || strings.HasPrefix(path, trimPref)) - - // single file (uses cases: reg file, --template, --list) - if err == nil && !finfo.IsDir() { - if trimPref == "" { - // [convention] trim _everything_ leaving only the base, unless (below) - trimPref = filepath.Dir(path) - if incl { - // --include-source-(root)-dir: retain the last snippet - trimPref = filepath.Dir(trimPref) - } - } - fo := fobj{ - dstName: appendPref + trimPrefix(path, trimPref), - path: path, - size: finfo.Size(), +func lsFobj(srcpath, trimPref, appendPref string, ndir *int, recurs, incl, globbed bool) (fobjs fobjs, _ error) { + // 1. fstat ok + finfo, err := os.Stat(srcpath) + if err == nil { + if finfo.IsDir() { + return _lsDir(srcpath, trimPref, appendPref, cos.WildcardMatchAll, ndir, recurs, incl) } - return []fobj{fo}, nil + return _lsFil(finfo, srcpath, trimPref, appendPref, incl) } - if err != nil { - // expecting the base to be a filename-matching pattern (wildcard) - pattern = filepath.Base(path) - if isPattern(pattern) { - warn := fmt.Sprintf("%q is not a directory and does not appear to be a shell filename matching pattern (%q)", - path, pattern) - actionWarn(c, warn) + if globbed { + return nil, &errDoesNotExist{what: "srcpath", name: srcpath} + } + // 2. glob + const fmte = "%q is not a directory and does not appear to be a filename-matching pattern" + all, e := filepath.Glob(srcpath) + if e != nil { + return nil, fmt.Errorf(fmte+": %v", srcpath, e) + } + + // no matches? extract basename and use it as a pattern to list the parent directory + if len(all) == 0 { + pattern := filepath.Base(srcpath) + if !isPattern(pattern) { + return nil, fmt.Errorf(fmte, srcpath) } - path = filepath.Dir(path) - finfo, err = os.Stat(path) - if err != nil { - return nil, &errDoesNotExist{what: "path", name: path} + parent := filepath.Dir(srcpath) + if _, err := os.Stat(parent); err != nil { + return nil, &errDoesNotExist{what: "path", name: parent} } - if !finfo.IsDir() { - return nil, fmt.Errorf("%q is not a directory", path) + return _lsDir(parent, trimPref, appendPref, pattern, ndir, recurs, incl) + } + + // 3. append all + for _, src := range all { + fob, err := lsFobj(src, trimPref, appendPref, ndir, recurs, incl, true) + if err != nil { + return nil, fmt.Errorf("nested failure to ls %q: [%v]", src, err) } + fobjs = append(fobjs, fob...) } + return fobjs, nil +} +func _lsDir(srcpath, trimPref, appendPref, pattern string, ndir *int, recurs, incl bool) (fobjs, error) { *ndir++ // [convention] ditto if trimPref == "" { - trimPref = path + trimPref = srcpath if incl { - trimPref = strings.TrimSuffix(path, filepath.Base(path)) + trimPref = strings.TrimSuffix(srcpath, filepath.Base(srcpath)) } } f := listDir if recurs { f = listRecurs } - return f(path, trimPref, appendPref, pattern) + return f(srcpath, trimPref, appendPref, pattern) +} + +func _lsFil(finfo os.FileInfo, srcpath, trimPref, appendPref string, incl bool) (fobjs, error) { + if trimPref == "" { + // [convention] trim _everything_ leaving only the base, unless (below) + trimPref = filepath.Dir(srcpath) + if incl { + // --include-source-(root)-dir: retain the last snippet + trimPref = filepath.Dir(trimPref) + } + } + fo := fobj{ + dstName: appendPref + trimPrefix(srcpath, trimPref), + path: srcpath, + size: finfo.Size(), + } + return []fobj{fo}, nil } func groupByExt(files []fobj) (int64, map[string]counter) { diff --git a/cmd/cli/cli/yap.go b/cmd/cli/cli/yap.go index a634942ec5..c090494f60 100644 --- a/cmd/cli/cli/yap.go +++ b/cmd/cli/cli/yap.go @@ -187,13 +187,8 @@ func (a *putargs) parse(c *cli.Context, emptyDstOnameOK bool) (err error) { return err } - const efmt = "too many arguments: '%s'" - var hint = fmt.Sprintf("(hint: wildcards must be in single or double quotes, see %s for details)", qflprn(cli.HelpFlag)) - l := c.NArg() - if l > 4 { - return fmt.Errorf(efmt+" ...\n%s\n", strings.Join(c.Args()[2:4], " "), hint) - } - return fmt.Errorf(efmt+"\n%s\n", strings.Join(c.Args()[2:], " "), hint) + hint := fmt.Sprintf("(hint: wildcards must be in single or double quotes, see %s for details)", qflprn(cli.HelpFlag)) + return fmt.Errorf("too many arguments: '%s'\n"+hint, strings.Join(c.Args(), " ")) } func (*archbck) verb() string { return "ARCHIVE" } diff --git a/docs/cli/object.md b/docs/cli/object.md index 39e132d1db..77bf168575 100644 --- a/docs/cli/object.md +++ b/docs/cli/object.md @@ -44,7 +44,8 @@ ls promote concat evict mv cat - [Put a range of files](#put-a-range-of-files) - [Put a list of files](#put-a-list-of-files) - [Dry-Run option](#dry-run-option) - - [Put multiple directories](#put-multiple-directories) + - [Put multiple directories using Bash range notation](#put-multiple-directories-using-bash-range-notation) + - [Put multiple directories using filename-matching pattern (wildcard)](#put-multiple-directories-using-filename-matching-pattern-wildcard) - [Put multiple directories with the `--skip-vc` option](#put-multiple-directories-with-the-skip-vc-option) - [APPEND object](#append-object) - [Delete object](#delete-object) @@ -513,6 +514,7 @@ OPTIONS: --conc value limits number of concurrent put requests and number of concurrent shards created (default: 10) --dry-run preview the results without really running the action --recursive, -r recursive operation + --include-src-dir prefix destination object names with the source directory --verbose, -v verbose output --yes, -y assume 'yes' to all questions --cont-on-err keep running archiving xaction (job) in presence of errors in a any given multi-object transaction @@ -853,16 +855,20 @@ $ ais put "~/dir/test{0..2}{0..2}.txt" ais://mybucket -y 9 objects put into "ais://mybucket" bucket ``` -### Example 2. PUT a range of files into virtial directory +### Example 2. PUT a range of files into a virtual directory Same as above but in addition destination object names will have additional prefix `subdir/` (notice the trailing `/`) In other words, this PUT in affect creates a **virtual directory** inside destination `ais://mybucket` ```bash -# prep test files +# first, prepare test files $ for d1 in {0..2}; do for d2 in {0..2}; do echo "0" > ~/dir/test${d1}${d2}.txt; done; done +``` + +Next, PUT: +```console $ ais put "~/dir/test{0..2}{0..2}.txt" ais://mybucket/subdir/ -y ``` @@ -963,15 +969,69 @@ PUT /tmp/w/111 -> ais://nnn/fff111 > Note: to PUT files into a virtual destination directory, use trailing '/', e.g.: `ais put ais://nnn/fff/ ...` -## Put multiple directories +## Put multiple directories using Bash range notation -Put multiple directories into the cluster with range syntax. +First, let's generate some files and directories (strictly for illustration purposes): ```bash -$ for d1 in {0..10}; do mkdir dir$d1 && for d2 in {0..2}; do echo "0" > dir$d1/test${d2}.txt; done; done -$ ais put "dir{0..10}" ais://mybucket -y -33 objects put into "ais://mybucket" bucket -# PUT "/home/user/dir0/test0.txt" => b/dir0/test0.txt and 32 more +$ for d1 in {0..10}; do mkdir /tmp/testdir_$d1 && for d2 in {0..2}; do echo "0" > /tmp/testdir_$d1/test${d2}.txt; done; done +``` + +Next, PUT them all in one shot (notice quotation marks!): + +```bash +$ ais put "/tmp/testdir_{0..10}" ais://nnn +Files to upload: +EXTENSION COUNT SIZE +.txt 33 66B +TOTAL 33 66B + +PUT 33 files (11 directories, non-recursive) => ais://nnn? [Y/N]: +``` + +Let's now take a look at the result - and observe a PROBLEM: + +```console +$ ais ls ais://nnn --summary +NAME PRESENT OBJECTS SIZE (apparent, objects, remote) USAGE(%) +ais://nnn yes 3 0 112.01KiB 6B 0B 0% +``` + +So Yes, the problem is that by default destination object names are _sourced_ from the source file basenames. + +In this examples, we happen to have only **3** basenames: `test0.txt`, `test1.txt`, and `test2.txt`. + +The **workaround** is to include respective parent directories in the destination naming: + +> As always, see `ais put --help` for usage examples and more options. + +```console +$ ais put "/tmp/testdir_{0..10}" ais://nnn --include-src-dir +Files to upload: +EXTENSION COUNT SIZE +.txt 33 66B +TOTAL 33 66B + +PUT 33 files (11 directories, non-recursive) => ais://nnn? [Y/N]: y +Done + +$ ais ls ais://nnn --summary +NAME PRESENT OBJECTS SIZE (apparent, objects, remote) USAGE(%) +ais://nnn yes 33 0 320.06KiB 66B 0B 0% +``` + +## Put multiple directories using filename-matching pattern (wildcard) + +Same as above, but **note**: alternative syntax, which is maybe more conventional: + +```bash +$ ais put "/tmp/testdir_*" ais://nnn --include-src-dir +Files to upload: +EXTENSION COUNT SIZE +.txt 33 66B +TOTAL 33 66B + +PUT 33 files (11 directories, non-recursive) => ais://nnn? [Y/N]: ``` ## Put multiple directories with the `--skip-vc` option @@ -979,8 +1039,11 @@ $ ais put "dir{0..10}" ais://mybucket -y > The `--skip-vc` option allows AIS to skip loading existing object's metadata to perform metadata-associated processing (such as comparing source and destination checksums, for instance). In certain scenarios (e.g., massive uploading of new files that cannot be present in the bucket) this can help reduce PUT latency. ```bash -$ for d1 in {0..10}; do mkdir dir$d1 && for d2 in {0..2}; do echo "0" > dir$d1/test${d2}.txt; done; done -$ ais put "dir{0..10}" ais://mybucket -y --skip-vc +## prepare testing content +$ for d1 in {0..10}; do mkdir /tmp/testdir_$d1 && for d2 in {0..2}; do echo "0" > /tmp/testdir_$d1/test${d2}.txt; done; done + +## PUT +$ ais put ""/tmp/testdir_{0..10}"" ais://mybucket -y --skip-vc Files to upload: EXTENSION COUNT SIZE