From a42df022ec97301d65dedfc2c11d40ba6533eb3d Mon Sep 17 00:00:00 2001
From: Edmund Higham <edhigham@gmail.com>
Date: Thu, 30 Jan 2025 13:28:51 -0500
Subject: [PATCH] [hailctl] batch submit fixes CHANGELOG: Fix many issues,
 including (hail#14274), with hailctl batch submit introduced in 0.2.127.
 Fixes #14274, Replaces #14351 (authored by @jigold)

---
 build.yaml                                    |  27 +-
 hail/python/hailtop/aiotools/fs/fs.py         |   3 +
 hail/python/hailtop/hailctl/batch/cli.py      |  54 +++-
 hail/python/hailtop/hailctl/batch/submit.py   | 285 ++++++++++++------
 .../test/hailtop/hailctl/batch/__init__.py    |   0
 .../test/hailtop/hailctl/batch/test_submit.py | 191 ++++++++++++
 6 files changed, 444 insertions(+), 116 deletions(-)
 create mode 100644 hail/python/test/hailtop/hailctl/batch/__init__.py
 create mode 100644 hail/python/test/hailtop/hailctl/batch/test_submit.py
diff --git a/build.yaml b/build.yaml
index 327582ddff9..18a54f305d4 100644
--- a/build.yaml
+++ b/build.yaml
@@ -1027,6 +1027,7 @@ steps:
       export HAIL_DOCTEST_DATA_DIR=$(realpath ./data)
       export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }}
       export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell"
+      
       python3 -m pytest \
               -Werror:::hail -Werror:::hailtop -Werror::ResourceWarning \
               --log-cli-level=INFO \
@@ -1037,6 +1038,7 @@ steps:
               --durations=50 \
               --ignore=test/hailtop/batch/ \
               --ignore=test/hailtop/inter_cloud \
+              --ignore=test/hailtop/hailctl/batch \
               --timeout=120 \
               test
     inputs:
@@ -2995,7 +2997,8 @@ steps:
               --instafail \
               --durations=50 \
               --timeout=360 \
-              /io/test/hailtop/batch/
+              /io/test/hailtop/batch/ /io/test/hailtop/hailctl/batch
+
     inputs:
       - from: /repo/hail/python/pytest.ini
         to: /io/pytest.ini
@@ -3060,9 +3063,7 @@ steps:
       BATCH_ID=$(hailctl batch submit simple_hail.py --name=test-hailctl-batch-submit --files=foo -o json | jq '.id')
       STATUS=$(hailctl batch wait -o json $BATCH_ID)
       STATE=$(echo $STATUS | jq -jr '.state')
-      if [ "$STATE" == "success" ]; then
-          exit 0;
-      else
+      if [ "$STATE" != "success" ]; then
           echo $STATUS;
           exit 1;
       fi
@@ -3081,21 +3082,19 @@ steps:
       BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json hail_with_args.py 100 | jq '.id')
       STATUS=$(hailctl batch wait -o json $BATCH_ID)
       STATE=$(echo $STATUS | jq -jr '.state')
-      if [ "$STATE" == "success" ]; then
-          exit 0;
-      else
+      if [ "$STATE" != "success" ]; then
           echo $STATUS;
           exit 1;
       fi
 
-      cat >file.sh <<EOF
+      cat > file.sh << 'EOF'
       set -ex
 
-      cat foo
+      cat foo/baz.txt
       echo "Hello World!"
       EOF
 
-      BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json file.sh | jq '.id')
+      BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json --image-name ubuntu:22.04 file.sh | jq '.id')
       STATUS=$(hailctl batch wait -o json $BATCH_ID)
       STATE=$(echo $STATUS | jq -jr '.state')
       if [ "$STATE" == "success" ]; then
@@ -3114,12 +3113,10 @@ steps:
       echo "Hello World! $1 $2"
       EOF
 
-      BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json file-with-args.sh abc 123 | jq '.id')
+      BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json --image-name ubuntu:22.04 file-with-args.sh abc 123 | jq '.id')
       STATUS=$(hailctl batch wait -o json $BATCH_ID)
       STATE=$(echo $STATUS | jq -jr '.state')
-      if [ "$STATE" == "success" ]; then
-          exit 0;
-      else
+      if [ "$STATE" != "success" ]; then
           echo $STATUS;
           exit 1;
       fi
@@ -3978,8 +3975,6 @@ steps:
     dependsOn:
       - ci_utils_image
       - default_ns
-    scopes:
-      - deploy
   - kind: runImage
     name: test_gcp_ar_cleanup_policies
     resources:
diff --git a/hail/python/hailtop/aiotools/fs/fs.py b/hail/python/hailtop/aiotools/fs/fs.py
index 5b3830023c2..6aa97dfe362 100644
--- a/hail/python/hailtop/aiotools/fs/fs.py
+++ b/hail/python/hailtop/aiotools/fs/fs.py
@@ -247,6 +247,9 @@ def with_new_path_components(self, *parts: str) -> "AsyncFSURL":
     def __str__(self) -> str:
         pass
 
+    def __truediv__(self, part: str) -> 'AsyncFSURL':
+        return self.with_new_path_components(part)
+
 
 class AsyncFS(abc.ABC):
     FILE = "file"
diff --git a/hail/python/hailtop/hailctl/batch/cli.py b/hail/python/hailtop/hailctl/batch/cli.py
index c10650f02f1..2a5086a1741 100644
--- a/hail/python/hailtop/hailctl/batch/cli.py
+++ b/hail/python/hailtop/hailctl/batch/cli.py
@@ -1,15 +1,14 @@
 import asyncio
-import json
 from enum import Enum
 from typing import Annotated as Ann
 from typing import Any, Dict, List, Optional, cast
 
+import orjson
 import typer
 from typer import Argument as Arg
 from typer import Option as Opt
 
 from . import billing, list_batches
-from . import submit as _submit
 from .batch_cli_utils import (
     ExtendedOutputFormat,
     ExtendedOutputFormatOption,
@@ -131,7 +130,7 @@ def wait(
         quiet = quiet or output != StructuredFormatPlusText.TEXT
         out = batch.wait(disable_progress_bar=quiet)
         if output == StructuredFormatPlusText.JSON:
-            print(json.dumps(out))
+            print(orjson.dumps(out).decode('utf-8'))
         else:
             print(out)
 
@@ -158,26 +157,61 @@ def job(batch_id: int, job_id: int, output: StructuredFormatOption = StructuredF
 @app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
 def submit(
     ctx: typer.Context,
-    script: str,
+    script: Ann[str, Arg(help='File to execute')],
     arguments: Ann[
-        Optional[List[str]], Arg(help='You should use -- if you want to pass option-like arguments through.')
+        Optional[List[str]],
+        Arg(help='You should use -- if you want to pass option-like arguments through.'),
     ] = None,
     files: Ann[
-        Optional[List[str]], Opt(help='Files or directories to add to the working directory of the job.')
+        Optional[List[str]],
+        Opt(help='Extra files or folders to add to the working directory of the job.'),
     ] = None,
     name: Ann[str, Opt(help='The name of the batch.')] = '',
     image_name: Ann[Optional[str], Opt(help='Name of Docker image for the job (default: hailgenetics/hail)')] = None,
     output: StructuredFormatPlusTextOption = StructuredFormatPlusText.TEXT,
+    wait: Ann[bool, Opt(help='Wait for the batch to complete.')] = False,
 ):
-    """Submit a batch with a single job that runs SCRIPT with the arguments ARGUMENTS.
+    """Submit a batch with a single job that runs SCRIPT, optionally with ARGUMENTS.
 
-    If you wish to pass option-like arguments you should use "--". For example:
+    Use '--' to pass additional arguments and switches to SCRIPT:
+    $ hailctl batch submit SCRIPT -- ARGUMENT --option VALUE
 
+    Specify a docker image to use for the job:
+    $ hailctl batch submit --image-name docker.io/image SCRIPT
 
+    Add additional files to your job using the --files SRC[:DST] option as follows:
 
-    $ hailctl batch submit --image-name docker.io/image my_script.py -- some-argument --animal dog
+    Copy the local working directory to the working directory of the job:
+    $ hailctl batch submit --files .
+    $ hailctl batch submit --files .:.
+
+    Copy a local file or folder into the working directory of the job:
+    $ hailctl batch submit SCRIPT --files a-file-or-folder
+
+    Copy a local file or folder to a path relative to the working directory of the job:
+    $ hailctl batch submit SCRIPT --files foo:bar
+
+    Copy a local file or folder into a specific absolute path on the worker:
+    $ hailctl batch submit SCRIPT --files $HOME/foo:/path/to/bar
+
+    Notes
+    -----
+    1. SCRIPTs ending in '.py' will be invoked with `python3` and `bash` otherwise.
+    1. File options are applied left-to-right.
+    2. If DST does not exist, SRC will be copied to DST, otherwise
+       if DST is a file, DST will be overwritten by SRC if SRC is a file, otherwise
+       if DST is a folder, SRC will be copied into DST as DST/$(basename SRC), otherwise
+       an error will be raised.
+    3. Only SRC paths may contain environment variables
     """
-    asyncio.run(_submit.submit(name, image_name, files or [], output, script, [*(arguments or []), *ctx.args]))
+    from .submit import HailctlBatchSubmitError  # pylint: disable=import-outside-toplevel
+    from .submit import submit as _submit  # pylint: disable=import-outside-toplevel
+
+    try:
+        asyncio.run(_submit(name, image_name, files or [], output, script, [*(arguments or []), *ctx.args], wait))
+    except HailctlBatchSubmitError as err:
+        print(err.message)
+        raise typer.Exit(err.exit_code)
 
 
 @app.command('init', help='Initialize a Hail Batch environment.')
diff --git a/hail/python/hailtop/hailctl/batch/submit.py b/hail/python/hailtop/hailctl/batch/submit.py
index 234ec62ac52..3a660a0e036 100644
--- a/hail/python/hailtop/hailctl/batch/submit.py
+++ b/hail/python/hailtop/hailctl/batch/submit.py
@@ -1,108 +1,213 @@
 import os
-import re
-from shlex import quote as shq
-from typing import Tuple
+import shlex
+from contextlib import AsyncExitStack
+from pathlib import Path
+from typing import Any, Generator, List, NoReturn, Optional, Set, Tuple, Union
 
 import orjson
-
-from hailtop import __pip_version__
-
-FILE_REGEX = re.compile(r'(?P<src>[^:]+)(:(?P<dest>.+))?')
-
-
-async def submit(name, image_name, files, output, script, arguments):
-    import hailtop.batch as hb  # pylint: disable=import-outside-toplevel
-    from hailtop.aiotools.copy import copy_from_dict  # pylint: disable=import-outside-toplevel
-    from hailtop.config import (  # pylint: disable=import-outside-toplevel
-        get_deploy_config,
-        get_remote_tmpdir,
-        get_user_config_path,
-    )
-    from hailtop.utils import (  # pylint: disable=import-outside-toplevel
-        secret_alnum_string,
-        unpack_comma_delimited_inputs,
+import typer
+
+from hailtop import yamlx
+from hailtop.aiotools.copy import copy_from_dict
+from hailtop.aiotools.fs import AsyncFSURL
+from hailtop.aiotools.router_fs import RouterAsyncFS
+from hailtop.batch import Batch, ServiceBackend
+from hailtop.batch.job import BashJob
+from hailtop.config import (
+    get_deploy_config,
+    get_remote_tmpdir,
+    get_user_config_path,
+)
+from hailtop.utils import secret_alnum_string
+from hailtop.version import __pip_version__
+
+from .batch_cli_utils import StructuredFormatPlusTextOption
+
+
+class HailctlBatchSubmitError(Exception):
+    def __init__(self, message: str, exit_code: int):
+        self.message = message
+        self.exit_code = exit_code
+
+
+async def submit(
+    name: str,
+    image_name: Optional[str],
+    files_options: List[str],
+    output: StructuredFormatPlusTextOption,
+    script: str,
+    arguments: List[str],
+    wait: bool,
+):
+    async with AsyncExitStack() as exitstack:
+        fs = RouterAsyncFS()
+        exitstack.push_async_callback(fs.close)
+
+        remote_tmpdir = get_remote_tmpdir('hailctl batch submit')
+        remote_tmpdir = fs.parse_url(remote_tmpdir) / secret_alnum_string()
+
+        backend = ServiceBackend()
+        exitstack.push_async_callback(backend._async_close)
+
+        b = Batch(name=name, backend=backend)
+        j = b.new_bash_job()
+        j.image(image_name or os.environ.get('HAIL_GENETICS_HAIL_IMAGE', f'hailgenetics/hail:{__pip_version__}'))
+        j.env('HAIL_QUERY_BACKEND', 'batch')
+
+        # The knowledge of why the current working directory is mirrored onto the
+        # worker has been lost to the sands of time. Some speculate that a user's
+        # code didn't work because it relied on the state of the local filesystem.
+        # Nonetheless, we continue the fine work of our forebears like good sheep.
+        remote_working_dir = __real_absolute_local_path('.', strict=True)
+        working_dir_shq = shq(str(remote_working_dir))
+        j.command(f'mkdir -p {working_dir_shq} && pushd {working_dir_shq}')
+
+        script_path = __real_absolute_local_path(script, strict=True)
+        xfers = [(script_path, script_path)]
+        xfers += [parse_files_to_src_dest(files) for files in files_options]
+        await transfer_files_options_files_into_job(xfers, remote_working_dir, remote_tmpdir, b, j)
+        await transfer_user_config_into_job(b, j, remote_tmpdir)
+
+        command = 'python3' if str(script_path).endswith('.py') else 'bash'
+        script_arguments = " ".join(shq(x) for x in arguments)
+        j.command(f'{command} {script_path} {script_arguments}')
+
+        quiet = output != 'text'
+        batch_handle = await b._async_run(wait=False, disable_progress_bar=quiet)
+        assert batch_handle
+
+        # You run into all sorts of problems mixing `async` calls with those
+        # that internally use `async_to_blocking`. Until we split async/blocking
+        # clients, use the "private" `_async_batch` property.
+        async_batch = batch_handle._async_batch
+
+        if output == 'text':
+            deploy_config = get_deploy_config()
+            url = deploy_config.external_url('batch', f'/batches/{async_batch.id}/jobs/1')
+            print(f'Submitted batch {async_batch.id}, see {url}')
+        else:
+            assert output == 'json'
+            print(orjson.dumps({'id': async_batch.id}).decode('utf-8'))
+
+        if wait:
+            out = await async_batch.wait(disable_progress_bar=quiet)
+            try:
+                out['log'] = (await async_batch.get_job_log(1))['main']
+            except:
+                out['log'] = 'Could not retrieve job log.'
+
+            print(yamlx.dump(out) if output == 'text' else orjson.dumps(out))
+
+            if out['state'] != 'success':
+                raise typer.Exit(1)
+
+
+async def transfer_files_options_files_into_job(
+    src_dst_pairs: List[Tuple[Path, Optional[Path]]],
+    remote_working_dir: Path,
+    remote_tmpdir: AsyncFSURL,
+    b: Batch,
+    j: BashJob,
+) -> None:
+    if non_existing_files := [str(src) for src, _ in src_dst_pairs if not src.exists()]:
+        non_existing_files_str = '- ' + '\n- '.join(non_existing_files)
+        raise HailctlBatchSubmitError(f'Some --files did not exist:\n{non_existing_files_str}', 1)
+
+    src_dst_staging_triplets = [
+        (src, dst, str(remote_tmpdir / 'in' / str(src).lstrip('/')))
+        for src, dst in generate_file_xfers(src_dst_pairs, remote_working_dir)
+    ]
+
+    await copy_from_dict(files=[{'from': str(src), 'to': staging} for src, _, staging in src_dst_staging_triplets])
+
+    parents = [dst.parent for _, dst, _ in src_dst_staging_triplets]
+    parents.sort(key=lambda p: len(p.parents), reverse=True)
+    mkdirs = set()
+    for folder in parents:
+        if folder not in mkdirs:
+            j.command(f'mkdir -p {shq(folder)}')
+            mkdirs.add(folder)
+            mkdirs.update(folder.parents)
+
+    for _, dst, staging in src_dst_staging_triplets:
+        in_file = await b._async_read_input(staging)
+        j.command(f'ln -s {shq(in_file)} {shq(dst)}')
+
+
+async def transfer_user_config_into_job(b: Batch, j: BashJob, remote_tmpdir: Path) -> None:
+    if (user_config_path := get_user_config_path()).exists():
+        staging = str(remote_tmpdir / user_config_path.name)
+        await copy_from_dict(files=[{'from': str(user_config_path), 'to': str(staging)}])
+        file = await b._async_read_input(staging)
+        j.command(f'mkdir -p $HOME/.config/hail && ln -s {file} $HOME/.config/hail/config.ini')
+
+
+def parse_files_to_src_dest(file: str) -> Tuple[Path, Optional[Path]]:
+    def raise_value_error(msg: str) -> NoReturn:
+        raise ValueError(f'Invalid file specification {file}: {msg}.')
+
+    try:
+        from_, *to_ = file.split(':')
+    except ValueError:
+        raise_value_error('Must have the form "src" or "src:dst"')
+
+    src = (
+        __real_absolute_local_path(from_, strict=False)  # defer strictness checks
+        if len(from_) != 0  # src is non-empty
+        else raise_value_error('Must have a "src" defined')
     )
 
-    files = unpack_comma_delimited_inputs(files)
-    user_config = str(get_user_config_path())
-
-    quiet = output != 'text'
-
-    remote_tmpdir = get_remote_tmpdir('hailctl batch submit')
-
-    tmpdir_path_prefix = secret_alnum_string()
-
-    def cloud_prefix(path):
-        path = path.lstrip('/')
-        return f'{remote_tmpdir}/{tmpdir_path_prefix}/{path}'
-
-    def file_input_to_src_dest(file: str) -> Tuple[str, str, str]:
-        match = FILE_REGEX.match(file)
-        if match is None:
-            raise ValueError(f'invalid file specification {file}. Must have the form "src" or "src:dest"')
-
-        result = match.groupdict()
-
-        src = result.get('src')
-        if src is None:
-            raise ValueError(f'invalid file specification {file}. Must have a "src" defined.')
-        src = os.path.abspath(os.path.expanduser(src))
-        src = src.rstrip('/')
-
-        dest = result.get('dest')
-        if dest is not None:
-            dest = os.path.abspath(os.path.expanduser(dest))
-        else:
-            dest = os.getcwd()
+    dst = (
+        None
+        if len(to_) == 0  # dst = []
+        else (p := to_[0], Path(p) / src.name if p.endswith('/') else Path(p))[-1]
+        if len(to_) == 1  # dst = [folder]
+        else raise_value_error('specify at most one "dst".')
+    )
 
-        cloud_file = cloud_prefix(src)
+    return src, dst
 
-        return (src, dest, cloud_file)
 
-    backend = hb.ServiceBackend()
-    b = hb.Batch(name=name, backend=backend)
-    j = b.new_bash_job()
-    j.image(image_name or os.environ.get('HAIL_GENETICS_HAIL_IMAGE', f'hailgenetics/hail:{__pip_version__}'))
+def generate_file_xfers(
+    src_dst: List[Tuple[Path, Optional[Path]]],
+    absolute_remote_cwd: Path,
+) -> Generator[Tuple[Path, Path], None, None]:
+    # Try to generate a set of deterministic SRC -> DST copy instructions
+    # given a set of source paths, SRC, and optional destination paths, DST.
 
-    local_files_to_cloud_files = []
+    visited: Set[Path] = set()
+    known_paths: Set[Path] = {Path('/'), absolute_remote_cwd}
+    q = list(reversed(src_dst))
 
-    for file in files:
-        src, dest, cloud_file = file_input_to_src_dest(file)
-        local_files_to_cloud_files.append({'from': src, 'to': cloud_file})
-        in_file = b.read_input(cloud_file)
-        j.command(f'mkdir -p {os.path.dirname(dest)}; ln -s {in_file} {dest}')
+    while len(q) != 0:
+        src, dst = q.pop()
 
-    script_src, _, script_cloud_file = file_input_to_src_dest(script)
-    user_config_src, _, user_config_cloud_file = file_input_to_src_dest(user_config)
+        if dst is None:
+            dst = absolute_remote_cwd
 
-    await copy_from_dict(files=local_files_to_cloud_files)
-    await copy_from_dict(
-        files=[
-            {'from': script_src, 'to': script_cloud_file},
-            {'from': user_config_src, 'to': user_config_cloud_file},
-        ]
-    )
+        if not dst.is_absolute():
+            dst = absolute_remote_cwd / dst
 
-    script_file = b.read_input(script_cloud_file)
-    config_file = b.read_input(user_config_cloud_file)
+        if src.is_dir():
+            q += [(src / name, dst / name) for name in os.listdir(src)]
+            continue
 
-    j.env('HAIL_QUERY_BACKEND', 'batch')
+        # src is a file
+        if src not in visited:
+            if dst in known_paths:
+                dst = dst / src.name
+            visited.add(src)
+            known_paths.add(dst)
+            known_paths.update(dst.parents)
+            yield src, dst
 
-    command = 'python3' if script.endswith('.py') else 'bash'
-    script_arguments = " ".join(shq(x) for x in arguments)
 
-    j.command(f'mkdir -p $HOME/.config/hail && ln -s {config_file} $HOME/.config/hail/config.ini')
-    j.command(f'cd {os.getcwd()}')
-    j.command(f'{command} {script_file} {script_arguments}')
-    batch_handle = await b._async_run(wait=False, disable_progress_bar=quiet)
-    assert batch_handle
+# Note well, friends:
+# This uses the local environment to support paths with variables like $HOME or $XDG_ directories.
+# Consequently, it is inappropriate to resolve paths on the worker with this function.
+def __real_absolute_local_path(path: Union[str, Path], *, strict: bool) -> Path:
+    return Path(os.path.expandvars(path)).expanduser().resolve(strict=strict)
 
-    if output == 'text':
-        deploy_config = get_deploy_config()
-        url = deploy_config.external_url('batch', f'/batches/{batch_handle.id}/jobs/1')
-        print(f'Submitted batch {batch_handle.id}, see {url}')
-    else:
-        assert output == 'json'
-        print(orjson.dumps({'id': batch_handle.id}).decode('utf-8'))
 
-    await backend.async_close()
+def shq(p: Any) -> str:
+    return shlex.quote(str(p))
diff --git a/hail/python/test/hailtop/hailctl/batch/__init__.py b/hail/python/test/hailtop/hailctl/batch/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/hail/python/test/hailtop/hailctl/batch/test_submit.py b/hail/python/test/hailtop/hailctl/batch/test_submit.py
new file mode 100644
index 00000000000..913a9bfded7
--- /dev/null
+++ b/hail/python/test/hailtop/hailctl/batch/test_submit.py
@@ -0,0 +1,191 @@
+import os
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Union
+
+import pytest
+from typer.testing import CliRunner
+
+from hailtop.hailctl.batch import cli
+
+
+@pytest.fixture
+def runner():
+    yield CliRunner(mix_stderr=False)
+
+
+def write_script(dir: Union[str, Path], file_to_echo: str):
+    with open(f'{dir}/test_job.py', 'w') as f:
+        f.write(f'print(open("{file_to_echo}").read())')
+
+
+def write_hello(filename: str):
+    os.makedirs(os.path.dirname(filename), exist_ok=True)
+    with open(filename, 'w') as f:
+        f.write('hello\n')
+
+
+@contextmanager
+def tmp_cwd(path: Path):
+    cwd = os.getcwd()
+    path.mkdir(parents=True, exist_ok=True)
+    os.chdir(path)
+    try:
+        yield str(path)
+    finally:
+        os.chdir(cwd)
+
+
+@pytest.fixture(name='tmp_cwd')
+def tmp_cwd_fixture(tmp_path):
+    with tmp_cwd(tmp_path) as cd:
+        yield cd
+
+
+@pytest.mark.timeout(5 * 60)  # image pulling is very slow
+def test_file_with_no_dest(runner: CliRunner, tmp_cwd):
+    write_hello(f'{tmp_cwd}/hello.txt')
+    write_script(tmp_cwd, 'hello.txt')
+    res = runner.invoke(
+        cli.app,
+        ['submit', '--wait', '--files', 'hello.txt', 'test_job.py'],
+        catch_exceptions=False,
+    )
+    assert res.exit_code == 0, repr((res.output, res.stdout, res.stderr, res.exception))
+
+
+@pytest.mark.timeout(5 * 60)  # image pulling is very slow
+@pytest.mark.parametrize('files', ['.', '.:.', '$PWD', '$PWD:.'])
+def test_copy_cwd(runner: CliRunner, tmp_cwd, files):
+    write_hello(f'{tmp_cwd}/hello.txt')
+    write_script(tmp_cwd, 'hello.txt')
+    res = runner.invoke(
+        cli.app,
+        ['submit', '--wait', '--files', files, 'test_job.py'],
+        catch_exceptions=False,
+    )
+    assert res.exit_code == 0, repr((res.output, res.stdout, res.stderr, res.exception))
+
+
+@pytest.mark.timeout(5 * 60)  # image pulling is very slow
+def test_file_in_current_dir(runner: CliRunner, tmp_cwd):
+    write_hello(f'{tmp_cwd}/hello.txt')
+    write_script(tmp_cwd, '/hello.txt')
+    res = runner.invoke(
+        cli.app,
+        ['submit', '--wait', '--files', 'hello.txt:/', 'test_job.py'],
+        catch_exceptions=False,
+    )
+    assert res.exit_code == 0, repr((res.output, res.stdout, res.stderr, res.exception))
+
+
+@pytest.mark.timeout(5 * 60)  # image pulling is very slow
+def test_copy_file_as(runner: CliRunner, tmp_cwd):
+    write_hello(f'{tmp_cwd}/hello.txt')
+    write_script(tmp_cwd, '/child')
+    res = runner.invoke(
+        cli.app,
+        ['submit', '--wait', '--files', 'hello.txt:/child', 'test_job.py'],
+        catch_exceptions=False,
+    )
+    assert res.exit_code == 0, repr((res.output, res.stdout, res.stderr, res.exception))
+
+
+@pytest.mark.timeout(5 * 60)  # image pulling is very slow
+def test_file_mount_in_child_dir(runner: CliRunner, tmp_cwd):
+    write_hello(f'{tmp_cwd}/hello.txt')
+    write_script(tmp_cwd, '/child/hello.txt')
+    res = runner.invoke(
+        cli.app,
+        ['submit', '--wait', '--files', 'hello.txt:/child/', 'test_job.py'],
+        catch_exceptions=False,
+    )
+    assert res.exit_code == 0, repr((res.output, res.stdout, res.stderr, res.exception))
+
+
+@pytest.mark.timeout(5 * 60)  # image pulling is very slow
+def test_file_mount_in_child_dir_to_root_dir(runner: CliRunner, tmp_cwd):
+    write_hello(f'{tmp_cwd}/child/hello.txt')
+    write_script(tmp_cwd, '/hello.txt')
+    res = runner.invoke(
+        cli.app,
+        ['submit', '--wait', '--files', 'child/hello.txt:/', 'test_job.py'],
+        catch_exceptions=False,
+    )
+    assert res.exit_code == 0, repr((res.output, res.stdout, res.stderr, res.exception))
+
+
+@pytest.mark.timeout(5 * 60)  # image pulling is very slow
+def test_mount_multiple_files(runner: CliRunner, tmp_cwd):
+    write_hello(f'{tmp_cwd}/child/hello1.txt')
+    write_hello(f'{tmp_cwd}/child/hello2.txt')
+    write_script(tmp_cwd, '/hello1.txt')
+    res = runner.invoke(
+        cli.app,
+        [
+            'submit',
+            '--wait',
+            '--files',
+            'child/hello1.txt:/',
+            '--files',
+            'child/hello2.txt:/',
+            'test_job.py',
+        ],
+        catch_exceptions=False,
+    )
+    assert res.exit_code == 0, repr((res.output, res.stdout, res.stderr, res.exception))
+
+
+@pytest.mark.timeout(5 * 60)  # image pulling is very slow
+def test_dir_mount_in_child_dir_to_child_dir(runner: CliRunner, tmp_cwd):
+    write_hello(f'{tmp_cwd}/child/hello1.txt')
+    write_hello(f'{tmp_cwd}/child/hello2.txt')
+    write_script(tmp_cwd, '/child/hello1.txt')
+    res = runner.invoke(
+        cli.app,
+        ['submit', '--wait', '--files', 'child/:/child/', 'test_job.py'],
+        catch_exceptions=False,
+    )
+    assert res.exit_code == 0, repr((res.output, res.stdout, res.stderr, res.exception))
+
+
+@pytest.mark.timeout(5 * 60)  # image pulling is very slow
+def test_file_outside_curdir(runner: CliRunner, tmp_path):
+    with tmp_cwd(tmp_path / 'working'):
+        write_hello(f'{tmp_path}/hello.txt')
+        write_script(tmp_path, '/hello.txt')
+        res = runner.invoke(
+            cli.app,
+            ['submit', '--wait', '--files', f'{tmp_path}/hello.txt:/', '../test_job.py'],
+            catch_exceptions=False,
+        )
+        assert res.exit_code == 0, repr((res.output, res.stdout, res.stderr, res.exception))
+
+
+@pytest.mark.timeout(5 * 60)  # image pulling is very slow
+def test_dir_outside_curdir(runner: CliRunner, tmp_path):
+    with tmp_cwd(tmp_path / 'working'):
+        write_hello(f'{tmp_path}/hello1.txt')
+        write_hello(f'{tmp_path}/hello2.txt')
+        write_script(tmp_path, '/foo/hello1.txt')
+        res = runner.invoke(
+            cli.app,
+            ['submit', '--wait', '--files', f'{tmp_path}/:/foo/', '../test_job.py'],
+            catch_exceptions=False,
+        )
+        assert res.exit_code == 0, repr((res.output, res.stdout, res.stderr, res.exception))
+
+
+@pytest.mark.timeout(5 * 60)  # image pulling is very slow
+def test_mounting_dir_to_root_dir_fails(runner: CliRunner, tmp_path):
+    write_hello(f'{tmp_path}/hello1.txt')
+    write_hello(f'{tmp_path}/hello2.txt')
+
+    write_script(tmp_path, '/hello1.txt')
+
+    with pytest.raises(ValueError, match='cannot mount a directory to "/"'):
+        runner.invoke(
+            cli.app,
+            ['submit', '--wait', '--files', f'{tmp_path}/:/', '../test_job.py'],
+            catch_exceptions=False,
+        )