Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial Job Sizing Infrastructure #488

Merged
merged 19 commits into from
Jan 30, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add JobSize.size_from_jobs_sims_blocks
Added a class method to `gempyor.batch.JobSize` class that is slightly
more convenient to use from a CLI script. Provides a good home for
future code realted to GH-360. Corresponding tests and docs.
TimothyWillard committed Jan 27, 2025
commit 82544b3f0cdf58e66be16102f840f47d8e4049a3
85 changes: 85 additions & 0 deletions flepimop/gempyor_pkg/src/gempyor/batch.py
Original file line number Diff line number Diff line change
@@ -9,6 +9,8 @@


from dataclasses import dataclass
import math
from typing import Literal


@dataclass(frozen=True, slots=True)
@@ -38,3 +40,86 @@ def __post_init__(self) -> None:
f"but instead was given '{val}'."
)
)

@classmethod
def size_from_jobs_sims_blocks(
cls,
jobs: int | None,
simulations: int | None,
blocks: int | None,
iterations_per_slot: int | None,
slots: int | None,
subpops: int | None,
batch_system: Literal["aws", "local", "slurm"],
) -> "JobSize":
"""
Infer a job size from several explicit and implicit parameters.
Args:
jobs: An explicit number of jobs.
simulations: An explicit number of simulations per a block.
blocks: An explicit number of blocks per a job.
iterations_per_slot: A total number of iterations per a job, which is
simulations times blocks. Required if `simulations` or `blocks` is
not given.
slots: An implicit number of slots to use for the job. Required if `jobs`
is not given.
subpops: The number of subpopulations being considered in this job. Affects
the inferred simulations per a job on AWS. Required if `simulations`
and `blocks` are not given.
batch_size: The system the job is being sized for. Affects the inferred
simulations per a job.
Returns:
A job size instance with either the explicit or inferred job sizing.
Examples:
>>> JobSize.size_from_jobs_sims_blocks(1, 2, 3, None, None, None, "local")
JobSize(jobs=1, simulations=2, blocks=3)
>>> JobSize.size_from_jobs_sims_blocks(
... None, None, None, 100, 10, 25, "local"
... )
JobSize(jobs=10, simulations=100, blocks=1)
>>> JobSize.size_from_jobs_sims_blocks(None, None, 4, 100, 10, 25, "local")
JobSize(jobs=10, simulations=25, blocks=4)
Raises:
ValueError: If `iterations_per_slot` is `None` and either `simulations` or
`blocks` is `None`.
ValueError: If `jobs` and `slots` are both `None`.
ValueError: If `simulations`, `blocks`, and `subpops` are all `None`.
"""
if iterations_per_slot is None and (simulations is None or blocks is None):
raise ValueError(
(
"If simulations and blocks are not all explicitly "
"provided then an iterations per slot must be given."
)
)

jobs = slots if jobs is None else jobs
if jobs is None:
raise ValueError(
"If jobs is not explicitly provided, it must be given via slots."
)

if simulations is None:
if blocks is None:
if subpops is None:
raise ValueError(
(
"If simulations and blocks are not explicitly "
"provided, then a subpops must be given."
)
)
if batch_system == "aws":
simulations = 5 * math.ceil(max(60 - math.sqrt(subpops), 10) / 5)
else:
simulations = iterations_per_slot
else:
simulations = math.ceil(iterations_per_slot / blocks)

if blocks is None:
blocks = math.ceil(iterations_per_slot / simulations)

return cls(jobs=jobs, simulations=simulations, blocks=blocks)
78 changes: 77 additions & 1 deletion flepimop/gempyor_pkg/tests/batch/test_job_size_class.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Literal
from itertools import product
from typing import Generator, Literal

import pytest

@@ -29,3 +30,78 @@ def test_less_than_one_value_error(
),
):
JobSize(**kwargs)


@pytest.mark.parametrize(("simulations", "blocks"), [(None, None), (1, None), (None, 1)])
def test_size_from_jobs_sims_blocks_iteration_value_error(
simulations: int | None, blocks: int | None
) -> None:
with pytest.raises(
ValueError,
match=(
"^If simulations and blocks are not all explicitly "
"provided then an iterations per slot must be given.$"
),
):
JobSize.size_from_jobs_sims_blocks(1, simulations, blocks, None, 1, 1, "aws")


def test_size_from_jobs_sims_blocks_slots_value_error() -> None:
with pytest.raises(
ValueError,
match="^If jobs is not explicitly provided, it must be given via slots.$",
):
JobSize.size_from_jobs_sims_blocks(None, 1, 1, 1, None, 1, "aws")


def test_size_from_jobs_sims_blocks_subpops_value_error() -> None:
with pytest.raises(
ValueError,
match=(
"^If simulations and blocks are not explicitly "
"provided, then a subpops must be given.$"
),
):
JobSize.size_from_jobs_sims_blocks(1, None, None, 1, 1, None, "aws")


def generate_size_from_jobs_sims_blocks(
*args: int | None,
) -> Generator[tuple[int | None, ...], None, None]:
for combo in product(args, repeat=6):
jobs, simulations, blocks, iterations_per_slot, slots, subpops = combo
if iterations_per_slot is None and (simulations is None or blocks is None):
continue
elif jobs is None and slots is None:
continue
elif simulations is None and blocks is None and subpops is None:
continue
yield combo


@pytest.mark.parametrize("combo", generate_size_from_jobs_sims_blocks(None, 1, 10))
def test_size_from_jobs_sims_blocks_output(combo: tuple[int | None, ...]) -> None:
jobs, simulations, blocks, iterations_per_slot, slots, subpops = combo
job_sizes_by_batch_system = {}
for batch_system in ("aws", "local", "slurm"):
job_size = JobSize.size_from_jobs_sims_blocks(
jobs, simulations, blocks, iterations_per_slot, slots, subpops, batch_system
)
assert (
job_size.jobs == jobs
if jobs is not None
else isinstance(job_size.jobs, int) and job_size.jobs > 0
)
assert (
job_size.simulations == simulations
if simulations is not None
else isinstance(job_size.simulations, int) and job_size.simulations > 0
)
assert (
job_size.blocks == blocks
if blocks is not None
else isinstance(job_size.blocks, int) and job_size.blocks > 0
)
job_sizes_by_batch_system[batch_system] = job_size
assert job_sizes_by_batch_system["local"] == job_sizes_by_batch_system["slurm"]
assert job_sizes_by_batch_system["local"].jobs == job_sizes_by_batch_system["aws"].jobs