Skip to content

Commit

Permalink
[hailctl] batch submit fixes
Browse files Browse the repository at this point in the history
CHANGELOG: Fix many issues, including (hail#14274), with hailctl batch submit introduced in 0.2.127.
Fixes #14274, Replaces #14351 (authored by @jigold)
  • Loading branch information
ehigham committed Feb 7, 2025
1 parent f961f8f commit a42df02
Show file tree
Hide file tree
Showing 6 changed files with 444 additions and 116 deletions.
27 changes: 11 additions & 16 deletions build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,7 @@ steps:
export HAIL_DOCTEST_DATA_DIR=$(realpath ./data)
export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }}
export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell"
python3 -m pytest \
-Werror:::hail -Werror:::hailtop -Werror::ResourceWarning \
--log-cli-level=INFO \
Expand All @@ -1037,6 +1038,7 @@ steps:
--durations=50 \
--ignore=test/hailtop/batch/ \
--ignore=test/hailtop/inter_cloud \
--ignore=test/hailtop/hailctl/batch \
--timeout=120 \
test
inputs:
Expand Down Expand Up @@ -2995,7 +2997,8 @@ steps:
--instafail \
--durations=50 \
--timeout=360 \
/io/test/hailtop/batch/
/io/test/hailtop/batch/ /io/test/hailtop/hailctl/batch
inputs:
- from: /repo/hail/python/pytest.ini
to: /io/pytest.ini
Expand Down Expand Up @@ -3060,9 +3063,7 @@ steps:
BATCH_ID=$(hailctl batch submit simple_hail.py --name=test-hailctl-batch-submit --files=foo -o json | jq '.id')
STATUS=$(hailctl batch wait -o json $BATCH_ID)
STATE=$(echo $STATUS | jq -jr '.state')
if [ "$STATE" == "success" ]; then
exit 0;
else
if [ "$STATE" != "success" ]; then
echo $STATUS;
exit 1;
fi
Expand All @@ -3081,21 +3082,19 @@ steps:
BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json hail_with_args.py 100 | jq '.id')
STATUS=$(hailctl batch wait -o json $BATCH_ID)
STATE=$(echo $STATUS | jq -jr '.state')
if [ "$STATE" == "success" ]; then
exit 0;
else
if [ "$STATE" != "success" ]; then
echo $STATUS;
exit 1;
fi
cat >file.sh <<EOF
cat > file.sh << 'EOF'
set -ex
cat foo
cat foo/baz.txt
echo "Hello World!"
EOF
BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json file.sh | jq '.id')
BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json --image-name ubuntu:22.04 file.sh | jq '.id')
STATUS=$(hailctl batch wait -o json $BATCH_ID)
STATE=$(echo $STATUS | jq -jr '.state')
if [ "$STATE" == "success" ]; then
Expand All @@ -3114,12 +3113,10 @@ steps:
echo "Hello World! $1 $2"
EOF
BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json file-with-args.sh abc 123 | jq '.id')
BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json --image-name ubuntu:22.04 file-with-args.sh abc 123 | jq '.id')
STATUS=$(hailctl batch wait -o json $BATCH_ID)
STATE=$(echo $STATUS | jq -jr '.state')
if [ "$STATE" == "success" ]; then
exit 0;
else
if [ "$STATE" != "success" ]; then
echo $STATUS;
exit 1;
fi
Expand Down Expand Up @@ -3978,8 +3975,6 @@ steps:
dependsOn:
- ci_utils_image
- default_ns
scopes:
- deploy
- kind: runImage
name: test_gcp_ar_cleanup_policies
resources:
Expand Down
3 changes: 3 additions & 0 deletions hail/python/hailtop/aiotools/fs/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,9 @@ def with_new_path_components(self, *parts: str) -> "AsyncFSURL":
def __str__(self) -> str:
pass

def __truediv__(self, part: str) -> 'AsyncFSURL':
return self.with_new_path_components(part)


class AsyncFS(abc.ABC):
FILE = "file"
Expand Down
54 changes: 44 additions & 10 deletions hail/python/hailtop/hailctl/batch/cli.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import asyncio
import json
from enum import Enum
from typing import Annotated as Ann
from typing import Any, Dict, List, Optional, cast

import orjson
import typer
from typer import Argument as Arg
from typer import Option as Opt

from . import billing, list_batches
from . import submit as _submit
from .batch_cli_utils import (
ExtendedOutputFormat,
ExtendedOutputFormatOption,
Expand Down Expand Up @@ -131,7 +130,7 @@ def wait(
quiet = quiet or output != StructuredFormatPlusText.TEXT
out = batch.wait(disable_progress_bar=quiet)
if output == StructuredFormatPlusText.JSON:
print(json.dumps(out))
print(orjson.dumps(out).decode('utf-8'))
else:
print(out)

Expand All @@ -158,26 +157,61 @@ def job(batch_id: int, job_id: int, output: StructuredFormatOption = StructuredF
@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
def submit(
ctx: typer.Context,
script: str,
script: Ann[str, Arg(help='File to execute')],
arguments: Ann[
Optional[List[str]], Arg(help='You should use -- if you want to pass option-like arguments through.')
Optional[List[str]],
Arg(help='You should use -- if you want to pass option-like arguments through.'),
] = None,
files: Ann[
Optional[List[str]], Opt(help='Files or directories to add to the working directory of the job.')
Optional[List[str]],
Opt(help='Extra files or folders to add to the working directory of the job.'),
] = None,
name: Ann[str, Opt(help='The name of the batch.')] = '',
image_name: Ann[Optional[str], Opt(help='Name of Docker image for the job (default: hailgenetics/hail)')] = None,
output: StructuredFormatPlusTextOption = StructuredFormatPlusText.TEXT,
wait: Ann[bool, Opt(help='Wait for the batch to complete.')] = False,
):
"""Submit a batch with a single job that runs SCRIPT with the arguments ARGUMENTS.
"""Submit a batch with a single job that runs SCRIPT, optionally with ARGUMENTS.
If you wish to pass option-like arguments you should use "--". For example:
Use '--' to pass additional arguments and switches to SCRIPT:
$ hailctl batch submit SCRIPT -- ARGUMENT --option VALUE
Specify a docker image to use for the job:
$ hailctl batch submit --image-name docker.io/image SCRIPT
Add additional files to your job using the --files SRC[:DST] option as follows:
$ hailctl batch submit --image-name docker.io/image my_script.py -- some-argument --animal dog
Copy the local working directory to the working directory of the job:
$ hailctl batch submit --files .
$ hailctl batch submit --files .:.
Copy a local file or folder into the working directory of the job:
$ hailctl batch submit SCRIPT --files a-file-or-folder
Copy a local file or folder to a path relative to the working directory of the job:
$ hailctl batch submit SCRIPT --files foo:bar
Copy a local file or folder into a specific absolute path on the worker:
$ hailctl batch submit SCRIPT --files $HOME/foo:/path/to/bar
Notes
-----
1. SCRIPTs ending in '.py' will be invoked with `python3` and `bash` otherwise.
1. File options are applied left-to-right.
2. If DST does not exist, SRC will be copied to DST, otherwise
if DST is a file, DST will be overwritten by SRC if SRC is a file, otherwise
if DST is a folder, SRC will be copied into DST as DST/$(basename SRC), otherwise
an error will be raised.
3. Only SRC paths may contain environment variables
"""
asyncio.run(_submit.submit(name, image_name, files or [], output, script, [*(arguments or []), *ctx.args]))
from .submit import HailctlBatchSubmitError # pylint: disable=import-outside-toplevel
from .submit import submit as _submit # pylint: disable=import-outside-toplevel

try:
asyncio.run(_submit(name, image_name, files or [], output, script, [*(arguments or []), *ctx.args], wait))
except HailctlBatchSubmitError as err:
print(err.message)
raise typer.Exit(err.exit_code)


@app.command('init', help='Initialize a Hail Batch environment.')
Expand Down
Loading

0 comments on commit a42df02

Please sign in to comment.