From dd1d97855424a6200edee891966aca16e8c5883a Mon Sep 17 00:00:00 2001 From: Ada Bohm Date: Fri, 16 Aug 2024 18:32:47 +0200 Subject: [PATCH] Docs updated and small style improvements --- CHANGELOG.md | 4 + crates/hyperqueue/src/bin/hq.rs | 10 +- crates/hyperqueue/src/client/commands/job.rs | 3 +- crates/hyperqueue/src/client/output/cli.rs | 4 +- docs/jobs/jobs.md | 4 +- docs/jobs/openjobs.md | 98 ++++++++++++++++++++ mkdocs.yml | 1 + tests/test_job.py | 9 ++ 8 files changed, 124 insertions(+), 9 deletions(-) create mode 100644 docs/jobs/openjobs.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 19dc3d143..6c837aff1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Dev +## New features + +* Open jobs. You may dynamically submit into an existing job. See [Open jobs documentation](https://it4innovations.github.io/hyperqueue/stable/jobs/openjobs/) + ## Fixes * HQ should no longer crash while printing job info when a failed task does not have any workers diff --git a/crates/hyperqueue/src/bin/hq.rs b/crates/hyperqueue/src/bin/hq.rs index 849ad5d4b..f4516fede 100644 --- a/crates/hyperqueue/src/bin/hq.rs +++ b/crates/hyperqueue/src/bin/hq.rs @@ -85,17 +85,17 @@ async fn command_submit_job_file( async fn command_job_list(gsettings: &GlobalSettings, opts: JobListOpts) -> anyhow::Result<()> { let mut connection = get_client_session(gsettings.server_directory()).await?; - let filter = if opts.filter.is_empty() { + let (filter, show_open) = if opts.filter.is_empty() { if opts.all { - vec![] + (vec![], true) } else { - vec![Status::Waiting, Status::Running, Status::Opened] + (vec![Status::Waiting, Status::Running, Status::Opened], true) } } else { - opts.filter + (opts.filter, false) }; - output_job_list(gsettings, &mut connection, filter).await + output_job_list(gsettings, &mut connection, filter, show_open).await } async fn command_job_summary(gsettings: &GlobalSettings) -> anyhow::Result<()> { diff --git a/crates/hyperqueue/src/client/commands/job.rs b/crates/hyperqueue/src/client/commands/job.rs index 406b51098..e871d50c5 100644 --- a/crates/hyperqueue/src/client/commands/job.rs +++ b/crates/hyperqueue/src/client/commands/job.rs @@ -117,6 +117,7 @@ pub async fn output_job_list( gsettings: &GlobalSettings, session: &mut ClientSession, job_filters: Vec, + show_open: bool, ) -> anyhow::Result<()> { let message = FromClientMessage::JobInfo(JobInfoRequest { selector: IdSelector::All, @@ -128,7 +129,7 @@ pub async fn output_job_list( if !job_filters.is_empty() { response .jobs - .retain(|j| job_filters.contains(&job_status(j))); + .retain(|j| (show_open && j.is_open) || job_filters.contains(&job_status(j))); } response.jobs.sort_unstable_by_key(|j| j.id); gsettings diff --git a/crates/hyperqueue/src/client/output/cli.rs b/crates/hyperqueue/src/client/output/cli.rs index 25595de22..c8299e035 100644 --- a/crates/hyperqueue/src/client/output/cli.rs +++ b/crates/hyperqueue/src/client/output/cli.rs @@ -472,7 +472,7 @@ impl Output for CliOutput { self.print_horizontal_table(rows, header); if has_opened { - println!("* = Jobs with opened session") + println!("* = Open jobs") } if job_count != total_jobs { @@ -1004,7 +1004,7 @@ impl Output for CliOutput { fn print_job_open(&self, job_id: JobId) { println!( - "Job {} opened", + "Job {} is open.", job_id.to_string().color(colored::Color::Green), ); } diff --git a/docs/jobs/jobs.md b/docs/jobs/jobs.md index 786cd4440..27ea1e6ff 100644 --- a/docs/jobs/jobs.md +++ b/docs/jobs/jobs.md @@ -248,7 +248,9 @@ matches from the following list of rules: 2. If at least one task has not been `completed` yet, then job state is `Waiting`. 3. If at least one task is `Failed`, then job state is `Failed`. 4. If at least one task is `Canceled`, then job state is `Canceled`. -5. All tasks have to be `Finished`, therefore the job state will also be `Finished`. +5. If all tasks are finished and job is open (see [Open Jobs](openjobs.md)), then job state is `Opened`. +5. Remaining case: all tasks are `Finished` and job is closed, then job state is `Finished`. + ## Cancelling jobs You can prematurely terminate a submitted job that haven't been completed yet by *cancelling* it using the `hq job cancel` diff --git a/docs/jobs/openjobs.md b/docs/jobs/openjobs.md new file mode 100644 index 000000000..daac1da8a --- /dev/null +++ b/docs/jobs/openjobs.md @@ -0,0 +1,98 @@ +# Open jobs + +By default, a job is a set of tasks that are created atomically during a submit, and no other task can be added to the job. +We call this job *closed*. In contrast, HQ allows you to create an *open* job that allows new tasks to be submitted as long as it is open. + +## Opening a job + +A job can be opened by the following command: + +```commandline +$ hq job open +``` + +If openning was successfull: + +``` +Job is open. +``` + +If you want to get just ID without any additional text, you can open job as follows: + +```commandline +$ hq --output-mode=quiet job open +``` + +Note: In the list of jobs, an open job is marked with "*" before the id. + +## Submitting tasks into open jobs + +A submit to an open job is the same as a normal submit, except that you must specify the job you are submitting to with the `--job` argument. You may submit multiple times into the same job. Tasks are scheduled to the workers immediately when they are received by the server. + +``` +$ hq submit --job ... other submit args ... +$ hq submit --job ... other submit args ... +$ hq submit --job ... other submit args ... +``` + +## Task Ids + +All tasks in one job shares the task you space. When you do not specify task ids, HQ automatically assigns a smallest ID that is bigger then any existing task id. + +```commandline +$ hq job open +$ hq submit --job -- hostname # Task ID is 0 +$ hq submit --job -- hostname # Task ID is 1 + +# Task IDs are 2, 3, 4 ... +$ hq submit --job --each-line='test.txt' -- do-something +``` + +If you are explicitly specifying task IDs, it is an error if task ID is reused: + +```commandline +$ hq submit --job -- hostname # Task ID is 0 + +# This is Ok +$ hq submit --job --array 10-20 -- hostname + +# This fails: Task ID 0 and 10, 11, 12 already exist +$ hq submit --job --array 0-12 -- hostname +``` + +## Job name and `--max-fails` + +Job's name and configuration open `--max-fails` are the property of the job. They can be set when job is opened and they cannot be later changed. Submits options `--name` and `--max-fails` are ignored if you are submitting into an open job. + +```commandline +# Configuring jobs's name and max fails +$ hq job open --name=MyOpenJob --max-fails=10 + +# Option --max-fails is ignored here +$ hq submit --job --max-fails=5 ... +``` + +## Submit file into open job + +Submitting job definition file into an open job works in the similar way as a normal submit, you just need to add `--job` parameter. + +```commandline +$ hq job submit-file --job job-defition.toml +``` + +## Closing job + +You can close a job by calling: + +```commandline +$ hq job close +``` + +When a job is closed, you are not allowed to submit any more tasks to the job. +It has no effect on tasks already submitted to the job; they continue to be processed as usual. + +Closing of already closed job throws an error. + +Leaving open jobs has no overhead, but it does affect the semantics of job completion. +A job is considered completed when all tasks have been completed and the job is *closed*. +Therefore, `hq job wait ...` will wait until all tasks of the selected jobs are complete and the jobs are closed. diff --git a/mkdocs.yml b/mkdocs.yml index 19745866b..7511a763e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -31,6 +31,7 @@ nav: - Output Streaming: jobs/streaming.md - Directives: jobs/directives.md - Job Definition File: jobs/jobfile.md + - Open jobs: jobs/openjobs.md - Multinode Tasks: jobs/multinode.md - CLI: - Shortcuts: cli/shortcuts.md diff --git a/tests/test_job.py b/tests/test_job.py index 50e640883..163daa211 100644 --- a/tests/test_job.py +++ b/tests/test_job.py @@ -1322,6 +1322,15 @@ def test_attach_to_open_job_array(hq_env: HqEnv): assert os.path.isfile(filename) == (task_id in ids) +def test_job_list_keep_open_jobs(hq_env: HqEnv): + hq_env.start_server() + hq_env.command(["job", "open"]) + hq_env.command(["job", "submit", "--job=1", "--", "hostname"]) + hq_env.command(["job", "cancel", "1"]) + table = hq_env.command(["job", "list"], as_table=True) + assert len(table) == 1 + + def test_attach_to_open_job_consecutive(hq_env: HqEnv): hq_env.start_server() hq_env.command(["job", "open"])