diff --git a/CHANGELOG.md b/CHANGELOG.md index d46e62535..94b6c4937 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ * Enable passing of empty `stdout`/`stderr` to Python function tasks in the Python API (https://github.com/It4innovations/hyperqueue/issues/691). +* `hq alloc add --name ` will now correctly use the passed `` to name allocations submitted to Slurm/PBS. # v0.18.0 diff --git a/crates/hyperqueue/src/server/autoalloc/queue/common.rs b/crates/hyperqueue/src/server/autoalloc/queue/common.rs index 521668871..6b5bc02a7 100644 --- a/crates/hyperqueue/src/server/autoalloc/queue/common.rs +++ b/crates/hyperqueue/src/server/autoalloc/queue/common.rs @@ -63,6 +63,14 @@ pub fn create_allocation_dir( Ok(dir) } +/// Creates a name for an external allocation, based on the allocation counter +/// and an optional name prefix. +pub fn format_allocation_name(name: Option, queue_id: u32, allocation_id: u64) -> String { + let mut name = name.unwrap_or_else(|| format!("hq-{queue_id}")); + name.push_str(&format!("-{allocation_id}")); + name +} + /// Submits a script into PBS/Slurm and creates debug information in the given allocation `directory`. pub async fn submit_script( script: String, diff --git a/crates/hyperqueue/src/server/autoalloc/queue/pbs.rs b/crates/hyperqueue/src/server/autoalloc/queue/pbs.rs index cc6732fca..66422a5dd 100644 --- a/crates/hyperqueue/src/server/autoalloc/queue/pbs.rs +++ b/crates/hyperqueue/src/server/autoalloc/queue/pbs.rs @@ -11,8 +11,8 @@ use crate::common::manager::info::ManagerType; use crate::common::manager::pbs::{format_pbs_duration, parse_pbs_datetime}; use crate::common::utils::time::local_to_system_time; use crate::server::autoalloc::queue::common::{ - build_worker_args, check_command_output, create_allocation_dir, create_command, submit_script, - wrap_worker_cmd, ExternalHandler, + build_worker_args, check_command_output, create_allocation_dir, create_command, + format_allocation_name, submit_script, wrap_worker_cmd, ExternalHandler, }; use crate::server::autoalloc::queue::{ AllocationExternalStatus, AllocationStatusMap, AllocationSubmissionResult, QueueHandler, @@ -64,7 +64,7 @@ impl QueueHandler for PbsHandler { let script = build_pbs_submit_script( worker_count, timelimit, - &format!("hq-alloc-{queue_id}"), + &format_allocation_name(name, queue_id, allocation_num), &directory.join("stdout").display().to_string(), &directory.join("stderr").display().to_string(), &queue_info.additional_args.join(" "), diff --git a/crates/hyperqueue/src/server/autoalloc/queue/slurm.rs b/crates/hyperqueue/src/server/autoalloc/queue/slurm.rs index 4aa670254..4fc664582 100644 --- a/crates/hyperqueue/src/server/autoalloc/queue/slurm.rs +++ b/crates/hyperqueue/src/server/autoalloc/queue/slurm.rs @@ -14,8 +14,8 @@ use crate::common::manager::slurm::{ }; use crate::common::utils::time::local_to_system_time; use crate::server::autoalloc::queue::common::{ - build_worker_args, create_allocation_dir, create_command, submit_script, wrap_worker_cmd, - ExternalHandler, + build_worker_args, create_allocation_dir, create_command, format_allocation_name, + submit_script, wrap_worker_cmd, ExternalHandler, }; use crate::server::autoalloc::queue::{ common, AllocationExternalStatus, AllocationStatusMap, AllocationSubmissionResult, @@ -67,7 +67,7 @@ impl QueueHandler for SlurmHandler { let script = build_slurm_submit_script( worker_count, timelimit, - &format!("hq-alloc-{queue_id}"), + &format_allocation_name(name, queue_id, allocation_num), &working_dir.join("stdout").display().to_string(), &working_dir.join("stderr").display().to_string(), &queue_info.additional_args.join(" "), diff --git a/tests/autoalloc/test_autoalloc.py b/tests/autoalloc/test_autoalloc.py index 485942849..f3df5ed89 100644 --- a/tests/autoalloc/test_autoalloc.py +++ b/tests/autoalloc/test_autoalloc.py @@ -221,7 +221,7 @@ def test_pbs_queue_qsub_args(hq_env: HqEnv): pbs_args = extract_script_args(data, "#PBS") assert pbs_args == [ "-l select=1", - "-N hq-alloc-1", + "-N hq-1-1", f"-o {join(dirname(qsub_script_path), 'stdout')}", f"-e {join(dirname(qsub_script_path), 'stderr')}", "-l walltime=00:03:00", @@ -246,10 +246,10 @@ def test_slurm_queue_sbatch_args(hq_env: HqEnv): sbatch_script_path = queue.get() with open(sbatch_script_path) as f: data = f.read() - pbs_args = extract_script_args(data, "#SBATCH") - assert pbs_args == [ + slurm_args = extract_script_args(data, "#SBATCH") + assert slurm_args == [ "--nodes=1", - "--job-name=hq-alloc-1", + "--job-name=hq-1-1", f"--output={join(dirname(sbatch_script_path), 'stdout')}", f"--error={join(dirname(sbatch_script_path), 'stderr')}", "--time=00:03:00", @@ -908,6 +908,29 @@ def test_external_slurm_submit_multiple_workers(cluster_hq_env: HqEnv, slurm_cre wait_for_job_state(cluster_hq_env, 1, "FINISHED") +def test_slurm_allocation_name(hq_env: HqEnv): + queue = ManagerQueue() + handler = ExtractSubmitScriptPath(queue, SlurmManager()) + + def check_name(path: str, name: str): + with open(path) as f: + data = f.read() + slurm_args = extract_script_args(data, "#SBATCH") + for arg in slurm_args: + if "--job-name=" in arg: + assert arg[len("--job-name=") :] == name + return + raise Exception(f"Slurm name {name} not found in {path}") + + with MockJobManager(hq_env, adapt_slurm(handler)): + hq_env.start_server() + prepare_tasks(hq_env) + + add_queue(hq_env, manager="slurm", name="foo", backlog=2) + check_name(queue.get(), "foo-1") + check_name(queue.get(), "foo-2") + + def wait_for_alloc(hq_env: HqEnv, state: str, allocation_id: str, timeout=DEFAULT_TIMEOUT): """ Wait until an allocation has the given `state`. diff --git a/tests/autoalloc/utils.py b/tests/autoalloc/utils.py index e70cbc6a3..2b1b9e860 100644 --- a/tests/autoalloc/utils.py +++ b/tests/autoalloc/utils.py @@ -25,7 +25,7 @@ def extract_script_commands(script: str) -> str: def add_queue( hq_env: HqEnv, manager: ManagerType, - name: Optional[str] = "foo", + name: Optional[str] = None, backlog=1, workers_per_alloc=1, additional_worker_args: List[str] = None,