Skip to content

Commit

Permalink
[hailctl] batch submit fixes
Browse files Browse the repository at this point in the history
CHANGELOG: Fix many issues, including (hail#14274), with hailctl batch submit introduced in 0.2.127.
Fixes hail-is#14274, Replaces hail-is#14351 (authored by @jigold)
  • Loading branch information
ehigham committed Feb 14, 2025
1 parent f961f8f commit 460103d
Show file tree
Hide file tree
Showing 6 changed files with 506 additions and 122 deletions.
27 changes: 11 additions & 16 deletions build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,7 @@ steps:
export HAIL_DOCTEST_DATA_DIR=$(realpath ./data)
export HAIL_TEST_STORAGE_URI={{ global.test_storage_uri }}/{{ token }}
export PYSPARK_SUBMIT_ARGS="--driver-memory 6g pyspark-shell"
python3 -m pytest \
-Werror:::hail -Werror:::hailtop -Werror::ResourceWarning \
--log-cli-level=INFO \
Expand All @@ -1037,6 +1038,7 @@ steps:
--durations=50 \
--ignore=test/hailtop/batch/ \
--ignore=test/hailtop/inter_cloud \
--ignore=test/hailtop/hailctl/batch \
--timeout=120 \
test
inputs:
Expand Down Expand Up @@ -2995,7 +2997,8 @@ steps:
--instafail \
--durations=50 \
--timeout=360 \
/io/test/hailtop/batch/
/io/test/hailtop/batch/ /io/test/hailtop/hailctl/batch
inputs:
- from: /repo/hail/python/pytest.ini
to: /io/pytest.ini
Expand Down Expand Up @@ -3060,9 +3063,7 @@ steps:
BATCH_ID=$(hailctl batch submit simple_hail.py --name=test-hailctl-batch-submit --files=foo -o json | jq '.id')
STATUS=$(hailctl batch wait -o json $BATCH_ID)
STATE=$(echo $STATUS | jq -jr '.state')
if [ "$STATE" == "success" ]; then
exit 0;
else
if [ "$STATE" != "success" ]; then
echo $STATUS;
exit 1;
fi
Expand All @@ -3081,21 +3082,19 @@ steps:
BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json hail_with_args.py 100 | jq '.id')
STATUS=$(hailctl batch wait -o json $BATCH_ID)
STATE=$(echo $STATUS | jq -jr '.state')
if [ "$STATE" == "success" ]; then
exit 0;
else
if [ "$STATE" != "success" ]; then
echo $STATUS;
exit 1;
fi
cat >file.sh <<EOF
cat > file.sh << 'EOF'
set -ex
cat foo
cat foo/baz.txt
echo "Hello World!"
EOF
BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json file.sh | jq '.id')
BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json --image-name ubuntu:22.04 file.sh | jq '.id')
STATUS=$(hailctl batch wait -o json $BATCH_ID)
STATE=$(echo $STATUS | jq -jr '.state')
if [ "$STATE" == "success" ]; then
Expand All @@ -3114,12 +3113,10 @@ steps:
echo "Hello World! $1 $2"
EOF
BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json file-with-args.sh abc 123 | jq '.id')
BATCH_ID=$(hailctl batch submit --name=test-hailctl-batch-submit --files=foo -o json --image-name ubuntu:22.04 file-with-args.sh abc 123 | jq '.id')
STATUS=$(hailctl batch wait -o json $BATCH_ID)
STATE=$(echo $STATUS | jq -jr '.state')
if [ "$STATE" == "success" ]; then
exit 0;
else
if [ "$STATE" != "success" ]; then
echo $STATUS;
exit 1;
fi
Expand Down Expand Up @@ -3978,8 +3975,6 @@ steps:
dependsOn:
- ci_utils_image
- default_ns
scopes:
- deploy
- kind: runImage
name: test_gcp_ar_cleanup_policies
resources:
Expand Down
3 changes: 3 additions & 0 deletions hail/python/hailtop/aiotools/fs/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,9 @@ def with_new_path_components(self, *parts: str) -> "AsyncFSURL":
def __str__(self) -> str:
pass

def __truediv__(self, part: str) -> 'AsyncFSURL':
return self.with_new_path_components(part)


class AsyncFS(abc.ABC):
FILE = "file"
Expand Down
119 changes: 103 additions & 16 deletions hail/python/hailtop/hailctl/batch/cli.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import asyncio
import json
from enum import Enum
from typing import Annotated as Ann
from typing import Any, Dict, List, Optional, cast

import orjson
import typer
from typer import Argument as Arg
from typer import Option as Opt

from hailtop import __pip_version__

from . import billing, list_batches
from . import submit as _submit
from .batch_cli_utils import (
ExtendedOutputFormat,
ExtendedOutputFormatOption,
Expand Down Expand Up @@ -131,7 +132,7 @@ def wait(
quiet = quiet or output != StructuredFormatPlusText.TEXT
out = batch.wait(disable_progress_bar=quiet)
if output == StructuredFormatPlusText.JSON:
print(json.dumps(out))
print(orjson.dumps(out).decode('utf-8'))
else:
print(out)

Expand All @@ -155,31 +156,117 @@ def job(batch_id: int, job_id: int, output: StructuredFormatOption = StructuredF
print(f"Job with ID {job_id} on batch {batch_id} not found")


@app.command('init', help='Initialize a Hail Batch environment.')
def initialize(verbose: Ann[bool, Opt('--verbose', '-v', help='Print gcloud commands being executed')] = False):
asyncio.run(async_basic_initialize(verbose=verbose))


@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
def submit(
ctx: typer.Context,
script: str,
script: Ann[str, Arg(help='File to execute', show_default=False)],
name: Ann[str, Opt(help='The name of the batch.')] = '',
image: Ann[Optional[str], Opt(help='Name of Docker image for the job')] = f'hailgenetics/hail:{__pip_version__}',
arguments: Ann[
Optional[List[str]], Arg(help='You should use -- if you want to pass option-like arguments through.')
Optional[List[str]],
Arg(help='You should use -- if you want to pass option-like arguments through.'),
] = None,
files: Ann[
Optional[List[str]], Opt(help='Files or directories to add to the working directory of the job.')
Optional[List[str]],
Opt(help='Extra files or folders to add to the working directory of the job.'),
] = None,
name: Ann[str, Opt(help='The name of the batch.')] = '',
image_name: Ann[Optional[str], Opt(help='Name of Docker image for the job (default: hailgenetics/hail)')] = None,
output: StructuredFormatPlusTextOption = StructuredFormatPlusText.TEXT,
wait: Ann[bool, Opt(help='Wait for the batch to complete.')] = False,
):
"""Submit a batch with a single job that runs SCRIPT with the arguments ARGUMENTS.
"""Submit a batch with a single job that runs SCRIPT, optionally with ARGUMENTS.
If you wish to pass option-like arguments you should use "--". For example:
Use '--' to pass additional arguments and switches to SCRIPT:
$ hailctl batch submit [OPTIONS] SCRIPT [-- ARGUMENTS]
$ hailctl batch submit --image-name docker.io/image my_script.py -- some-argument --animal dog
"""
asyncio.run(_submit.submit(name, image_name, files or [], output, script, [*(arguments or []), *ctx.args]))
Specify a docker image to use for the job:
@app.command('init', help='Initialize a Hail Batch environment.')
def initialize(verbose: Ann[bool, Opt('--verbose', '-v', help='Print gcloud commands being executed')] = False):
asyncio.run(async_basic_initialize(verbose=verbose))
$ hailctl batch submit SCRIPT --image docker.io/image
Specify the name of the batch to submit:
$ hailctl batch submit SCRIPT --name my-batch
Add additional files to your job using the --files SRC[:DST] option as follows:
Copy the local working directory to the working directory of the job:
$ hailctl batch submit --files .
$ hailctl batch submit --files .:.
Copy a local file or folder into the working directory of the job:
$ hailctl batch submit SCRIPT --files a-file-or-folder
Copy a local file or folder `src` to a relative or absolute path on the worker:
$ hailctl batch submit SCRIPT --files src:dst
Copy a local file or folder to a specific absolute path on the worker:
$ hailctl batch submit SCRIPT --files $HOME/foo:/path/to/bar
Copy the result of globbing a local folder SRC with PATTERN to into DST on the worker:
$ hailctl batch submit SCRIPT --files src/[pattern]:dst
Notes
-----
SCRIPTs ending in '.py' will be invoked with `python3`, or as an executable otherwise.
Relative DST paths are relative to the worker's working directory
If DST does not exist, SRC will be copied to DST, otherwise
If DST is a file, DST will be overwritten by SRC if SRC is a file, otherwise
If DST is a folder, SRC will be copied into DST as DST/$(basename SRC), otherwise
An error will be raised.
Environment variables are permitted in SRC paths only
Recursive glob patterns are not supported
"""
from .submit import HailctlBatchSubmitError # pylint: disable=import-outside-toplevel
from .submit import submit as _submit # pylint: disable=import-outside-toplevel

try:
asyncio.run(_submit(name, image, files or [], output, script, [*(arguments or []), *ctx.args], wait))
except HailctlBatchSubmitError as err:
print(err.message)
raise typer.Exit(err.exit_code)
Loading

0 comments on commit 460103d

Please sign in to comment.