Skip to content

Commit

Permalink
CLI: PUT and archive files from multiple matching directories
Browse files Browse the repository at this point in the history
* `GLOB`alize
* PUT: add back `--include-src-dir` option

Signed-off-by: Alex Aizman <[email protected]>
  • Loading branch information
alex-aizman committed Oct 1, 2024
1 parent 7cf1546 commit 16edff7
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 68 deletions.
6 changes: 3 additions & 3 deletions cmd/cli/cli/arch_hdlr.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ var (
verboseFlag,
yesFlag,
unitsFlag,
inclSrcDirNameFlag,
archSrcDirNameFlag,
skipVerCksumFlag,
continueOnErrorFlag, // TODO: revisit
),
Expand Down Expand Up @@ -346,7 +346,7 @@ func putApndArchHandler(c *cli.Context) (err error) {
}
}

incl := flagIsSet(c, inclSrcDirNameFlag)
incl := flagIsSet(c, archSrcDirNameFlag)
switch {
case len(a.src.fdnames) > 0:
// a) csv of files and/or directories (names) from the first arg, e.g. "f1[,f2...]" dst-bucket[/prefix]
Expand All @@ -369,7 +369,7 @@ func putApndArchHandler(c *cli.Context) (err error) {
debug.Assert(srcpath == "", srcpath)
srcpath = a.pt.Prefix
}
fobjs, err := lsFobj(c, srcpath, "" /*trim pref*/, a.archpath /*append pref*/, &ndir, a.src.recurs, incl)
fobjs, err := lsFobj(srcpath, "" /*trim pref*/, a.archpath /*append pref*/, &ndir, a.src.recurs, incl, false)
if err != nil {
return err
}
Expand Down
8 changes: 6 additions & 2 deletions cmd/cli/cli/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -935,9 +935,13 @@ var (
Name: "include-src-bck",
Usage: "prefix the names of archived files with the source bucket name",
}
inclSrcDirNameFlag = cli.BoolFlag{
archSrcDirNameFlag = cli.BoolFlag{
Name: "include-src-dir",
Usage: "prefix the names of archived files with the (root) source directory (omitted by default)",
Usage: "prefix the names of archived files with the (root) source directory",
}
putSrcDirNameFlag = cli.BoolFlag{
Name: "include-src-dir",
Usage: "prefix destination object names with the source directory",
}
// 'ais archive put': conditional APPEND
archAppendOrPutFlag = cli.BoolFlag{
Expand Down
6 changes: 3 additions & 3 deletions cmd/cli/cli/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func verbList(c *cli.Context, wop wop, fnames []string, bck cmn.Bck, appendPref
recurs = flagIsSet(c, recursFlag)
)
for _, n := range fnames {
fobjs, err := lsFobj(c, n, "", appendPref, &ndir, recurs, incl)
fobjs, err := lsFobj(n, "", appendPref, &ndir, recurs, incl, false)
if err != nil {
return err
}
Expand All @@ -123,7 +123,7 @@ func verbRange(c *cli.Context, wop wop, pt *cos.ParsedTemplate, bck cmn.Bck, tri
)
pt.InitIter()
for n, hasNext := pt.Next(); hasNext; n, hasNext = pt.Next() {
fobjs, err := lsFobj(c, n, trimPref, appendPref, &ndir, recurs, incl)
fobjs, err := lsFobj(n, trimPref, appendPref, &ndir, recurs, incl, false)
if err != nil {
return err
}
Expand All @@ -147,7 +147,7 @@ func concatObject(c *cli.Context, bck cmn.Bck, objName string, fileNames []strin
recurs = flagIsSet(c, recursFlag)
)
for i, fileName := range fileNames {
fobjs, err := lsFobj(c, fileName, "", "", &ndir, recurs, false /*incl src dir*/)
fobjs, err := lsFobj(fileName, "", "", &ndir, recurs, false /*incl src dir*/, false)
if err != nil {
return err
}
Expand Down
5 changes: 3 additions & 2 deletions cmd/cli/cli/object_hdlr.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ var (
concurrencyFlag,
dryRunFlag,
recursFlag,
putSrcDirNameFlag,
verboseFlag,
yesFlag,
continueOnErrorFlag,
Expand Down Expand Up @@ -328,7 +329,7 @@ func putHandler(c *cli.Context) error {
}

// 2. multi-file list & range
incl := flagIsSet(c, inclSrcDirNameFlag)
incl := flagIsSet(c, putSrcDirNameFlag)
switch {
case len(a.src.fdnames) > 0:
if len(a.src.fdnames) > 1 {
Expand Down Expand Up @@ -373,7 +374,7 @@ func putHandler(c *cli.Context) error {
if ok := warnMultiSrcDstPrefix(c, &a, fmt.Sprintf("from '%s%s'", srcpath, s)); !ok {
return nil
}
fobjs, err := lsFobj(c, srcpath, "", a.dst.oname, &ndir, a.src.recurs, incl)
fobjs, err := lsFobj(srcpath, "", a.dst.oname, &ndir, a.src.recurs, incl, false)
if err != nil {
return err
}
Expand Down
98 changes: 58 additions & 40 deletions cmd/cli/cli/walk.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"github.com/NVIDIA/aistore/cmn"
"github.com/NVIDIA/aistore/cmn/cos"
"github.com/NVIDIA/aistore/cmn/debug"
"github.com/urfave/cli"
)

// walk locally accessible files and directories; handle file/dir matching wildcards and patterns
Expand Down Expand Up @@ -102,64 +101,83 @@ func listRecurs(path, trimPref, appendPref, pattern string) (fobjs, error) {
// - source path that may contain wildcard(s)
// - (trimPref, appendPref) combo to influence destination naming
// - recursive, etc.
// Returns:
// OUT:
// - a slice of matching triplets: {source fname or dirname, destination name, size in bytes}
func lsFobj(c *cli.Context, path, trimPref, appendPref string, ndir *int, recurs, incl bool) (fobjs, error) {
var (
pattern = cos.WildcardMatchAll // default pattern: entire directory
finfo, err = os.Stat(path)
)
debug.Assert(trimPref == "" || strings.HasPrefix(path, trimPref))

// single file (uses cases: reg file, --template, --list)
if err == nil && !finfo.IsDir() {
if trimPref == "" {
// [convention] trim _everything_ leaving only the base, unless (below)
trimPref = filepath.Dir(path)
if incl {
// --include-source-(root)-dir: retain the last snippet
trimPref = filepath.Dir(trimPref)
}
}
fo := fobj{
dstName: appendPref + trimPrefix(path, trimPref),
path: path,
size: finfo.Size(),
func lsFobj(srcpath, trimPref, appendPref string, ndir *int, recurs, incl, globbed bool) (fobjs fobjs, _ error) {
// 1. fstat ok
finfo, err := os.Stat(srcpath)
if err == nil {
if finfo.IsDir() {
return _lsDir(srcpath, trimPref, appendPref, cos.WildcardMatchAll, ndir, recurs, incl)
}
return []fobj{fo}, nil
return _lsFil(finfo, srcpath, trimPref, appendPref, incl)
}

if err != nil {
// expecting the base to be a filename-matching pattern (wildcard)
pattern = filepath.Base(path)
if isPattern(pattern) {
warn := fmt.Sprintf("%q is not a directory and does not appear to be a shell filename matching pattern (%q)",
path, pattern)
actionWarn(c, warn)
if globbed {
return nil, &errDoesNotExist{what: "srcpath", name: srcpath}
}
// 2. glob
const fmte = "%q is not a directory and does not appear to be a filename-matching pattern"
all, e := filepath.Glob(srcpath)
if e != nil {
return nil, fmt.Errorf(fmte+": %v", srcpath, e)
}

// no matches? extract basename and use it as a pattern to list the parent directory
if len(all) == 0 {
pattern := filepath.Base(srcpath)
if !isPattern(pattern) {
return nil, fmt.Errorf(fmte, srcpath)
}
path = filepath.Dir(path)
finfo, err = os.Stat(path)
if err != nil {
return nil, &errDoesNotExist{what: "path", name: path}
parent := filepath.Dir(srcpath)
if _, err := os.Stat(parent); err != nil {
return nil, &errDoesNotExist{what: "path", name: parent}
}
if !finfo.IsDir() {
return nil, fmt.Errorf("%q is not a directory", path)
return _lsDir(parent, trimPref, appendPref, pattern, ndir, recurs, incl)
}

// 3. append all
for _, src := range all {
fob, err := lsFobj(src, trimPref, appendPref, ndir, recurs, incl, true)
if err != nil {
return nil, fmt.Errorf("nested failure to ls %q: [%v]", src, err)
}
fobjs = append(fobjs, fob...)
}
return fobjs, nil
}

func _lsDir(srcpath, trimPref, appendPref, pattern string, ndir *int, recurs, incl bool) (fobjs, error) {
*ndir++
// [convention] ditto
if trimPref == "" {
trimPref = path
trimPref = srcpath
if incl {
trimPref = strings.TrimSuffix(path, filepath.Base(path))
trimPref = strings.TrimSuffix(srcpath, filepath.Base(srcpath))
}
}
f := listDir
if recurs {
f = listRecurs
}
return f(path, trimPref, appendPref, pattern)
return f(srcpath, trimPref, appendPref, pattern)
}

func _lsFil(finfo os.FileInfo, srcpath, trimPref, appendPref string, incl bool) (fobjs, error) {
if trimPref == "" {
// [convention] trim _everything_ leaving only the base, unless (below)
trimPref = filepath.Dir(srcpath)
if incl {
// --include-source-(root)-dir: retain the last snippet
trimPref = filepath.Dir(trimPref)
}
}
fo := fobj{
dstName: appendPref + trimPrefix(srcpath, trimPref),
path: srcpath,
size: finfo.Size(),
}
return []fobj{fo}, nil
}

func groupByExt(files []fobj) (int64, map[string]counter) {
Expand Down
9 changes: 2 additions & 7 deletions cmd/cli/cli/yap.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,13 +187,8 @@ func (a *putargs) parse(c *cli.Context, emptyDstOnameOK bool) (err error) {
return err
}

const efmt = "too many arguments: '%s'"
var hint = fmt.Sprintf("(hint: wildcards must be in single or double quotes, see %s for details)", qflprn(cli.HelpFlag))
l := c.NArg()
if l > 4 {
return fmt.Errorf(efmt+" ...\n%s\n", strings.Join(c.Args()[2:4], " "), hint)
}
return fmt.Errorf(efmt+"\n%s\n", strings.Join(c.Args()[2:], " "), hint)
hint := fmt.Sprintf("(hint: wildcards must be in single or double quotes, see %s for details)", qflprn(cli.HelpFlag))
return fmt.Errorf("too many arguments: '%s'\n"+hint, strings.Join(c.Args(), " "))
}

func (*archbck) verb() string { return "ARCHIVE" }
Expand Down
85 changes: 74 additions & 11 deletions docs/cli/object.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ ls promote concat evict mv cat
- [Put a range of files](#put-a-range-of-files)
- [Put a list of files](#put-a-list-of-files)
- [Dry-Run option](#dry-run-option)
- [Put multiple directories](#put-multiple-directories)
- [Put multiple directories using Bash range notation](#put-multiple-directories-using-bash-range-notation)
- [Put multiple directories using filename-matching pattern (wildcard)](#put-multiple-directories-using-filename-matching-pattern-wildcard)
- [Put multiple directories with the `--skip-vc` option](#put-multiple-directories-with-the-skip-vc-option)
- [APPEND object](#append-object)
- [Delete object](#delete-object)
Expand Down Expand Up @@ -513,6 +514,7 @@ OPTIONS:
--conc value limits number of concurrent put requests and number of concurrent shards created (default: 10)
--dry-run preview the results without really running the action
--recursive, -r recursive operation
--include-src-dir prefix destination object names with the source directory
--verbose, -v verbose output
--yes, -y assume 'yes' to all questions
--cont-on-err keep running archiving xaction (job) in presence of errors in a any given multi-object transaction
Expand Down Expand Up @@ -853,16 +855,20 @@ $ ais put "~/dir/test{0..2}{0..2}.txt" ais://mybucket -y
9 objects put into "ais://mybucket" bucket
```

### Example 2. PUT a range of files into virtial directory
### Example 2. PUT a range of files into a virtual directory

Same as above but in addition destination object names will have additional prefix `subdir/` (notice the trailing `/`)

In other words, this PUT in affect creates a **virtual directory** inside destination `ais://mybucket`

```bash
# prep test files
# first, prepare test files
$ for d1 in {0..2}; do for d2 in {0..2}; do echo "0" > ~/dir/test${d1}${d2}.txt; done; done
```

Next, PUT:

```console
$ ais put "~/dir/test{0..2}{0..2}.txt" ais://mybucket/subdir/ -y
```

Expand Down Expand Up @@ -963,24 +969,81 @@ PUT /tmp/w/111 -> ais://nnn/fff111

> Note: to PUT files into a virtual destination directory, use trailing '/', e.g.: `ais put ais://nnn/fff/ ...`
## Put multiple directories
## Put multiple directories using Bash range notation

Put multiple directories into the cluster with range syntax.
First, let's generate some files and directories (strictly for illustration purposes):

```bash
$ for d1 in {0..10}; do mkdir dir$d1 && for d2 in {0..2}; do echo "0" > dir$d1/test${d2}.txt; done; done
$ ais put "dir{0..10}" ais://mybucket -y
33 objects put into "ais://mybucket" bucket
# PUT "/home/user/dir0/test0.txt" => b/dir0/test0.txt and 32 more
$ for d1 in {0..10}; do mkdir /tmp/testdir_$d1 && for d2 in {0..2}; do echo "0" > /tmp/testdir_$d1/test${d2}.txt; done; done
```

Next, PUT them all in one shot (notice quotation marks!):

```bash
$ ais put "/tmp/testdir_{0..10}" ais://nnn
Files to upload:
EXTENSION COUNT SIZE
.txt 33 66B
TOTAL 33 66B

PUT 33 files (11 directories, non-recursive) => ais://nnn? [Y/N]:
```

Let's now take a look at the result - and observe a PROBLEM:

```console
$ ais ls ais://nnn --summary
NAME PRESENT OBJECTS SIZE (apparent, objects, remote) USAGE(%)
ais://nnn yes 3 0 112.01KiB 6B 0B 0%
```

So Yes, the problem is that by default destination object names are _sourced_ from the source file basenames.

In this examples, we happen to have only **3** basenames: `test0.txt`, `test1.txt`, and `test2.txt`.

The **workaround** is to include respective parent directories in the destination naming:

> As always, see `ais put --help` for usage examples and more options.
```console
$ ais put "/tmp/testdir_{0..10}" ais://nnn --include-src-dir
Files to upload:
EXTENSION COUNT SIZE
.txt 33 66B
TOTAL 33 66B

PUT 33 files (11 directories, non-recursive) => ais://nnn? [Y/N]: y
Done

$ ais ls ais://nnn --summary
NAME PRESENT OBJECTS SIZE (apparent, objects, remote) USAGE(%)
ais://nnn yes 33 0 320.06KiB 66B 0B 0%
```

## Put multiple directories using filename-matching pattern (wildcard)

Same as above, but **note**: alternative syntax, which is maybe more conventional:

```bash
$ ais put "/tmp/testdir_*" ais://nnn --include-src-dir
Files to upload:
EXTENSION COUNT SIZE
.txt 33 66B
TOTAL 33 66B

PUT 33 files (11 directories, non-recursive) => ais://nnn? [Y/N]:
```

## Put multiple directories with the `--skip-vc` option

> The `--skip-vc` option allows AIS to skip loading existing object's metadata to perform metadata-associated processing (such as comparing source and destination checksums, for instance). In certain scenarios (e.g., massive uploading of new files that cannot be present in the bucket) this can help reduce PUT latency.
```bash
$ for d1 in {0..10}; do mkdir dir$d1 && for d2 in {0..2}; do echo "0" > dir$d1/test${d2}.txt; done; done
$ ais put "dir{0..10}" ais://mybucket -y --skip-vc
## prepare testing content
$ for d1 in {0..10}; do mkdir /tmp/testdir_$d1 && for d2 in {0..2}; do echo "0" > /tmp/testdir_$d1/test${d2}.txt; done; done

## PUT
$ ais put ""/tmp/testdir_{0..10}"" ais://mybucket -y --skip-vc

Files to upload:
EXTENSION COUNT SIZE
Expand Down

0 comments on commit 16edff7

Please sign in to comment.