Skip to content

Commit

Permalink
correct resolution of global eval override of task or sample sandboxes (
Browse files Browse the repository at this point in the history
#904)

* correct resolution of global eval override of task or sample sandboxes

* change param order
  • Loading branch information
jjallaire authored Nov 26, 2024
1 parent db263a3 commit 7ec902f
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
- Log viewer: metadata which contains images will now render the images.
- Log viewer: show custom tool call views in messages display.
- Bugfix: Correctly read and forward image detail property.
- Bugfix: Correct resolution of global eval override of task or sample sandboxes.

## v0.3.47 (18 November 2024)

Expand Down
2 changes: 2 additions & 0 deletions src/inspect_ai/_eval/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ async def eval_async(
tasks=task_batch,
parallel=parallel,
eval_config=eval_config,
eval_sandbox=sandbox,
recorder=recorder,
model_args=model_args,
epochs_reducer=epochs_reducer,
Expand All @@ -407,6 +408,7 @@ async def eval_async(
tasks=resolved_tasks,
parallel=parallel,
eval_config=eval_config,
eval_sandbox=sandbox,
recorder=recorder,
model_args=model_args,
epochs_reducer=epochs_reducer,
Expand Down
17 changes: 13 additions & 4 deletions src/inspect_ai/_eval/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@
from inspect_ai.scorer._reducer import ScoreReducer, reducer_log_names
from inspect_ai.scorer._reducer.registry import validate_reducer
from inspect_ai.solver._solver import Solver, SolverSpec
from inspect_ai.util._sandbox.environment import TaskCleanup, TaskInit
from inspect_ai.util._sandbox.environment import (
SandboxEnvironmentSpec,
SandboxEnvironmentType,
TaskCleanup,
TaskInit,
resolve_sandbox_environment,
)
from inspect_ai.util._sandbox.registry import registry_find_sandboxenv

from .loader import (
Expand All @@ -44,6 +50,7 @@ async def eval_run(
tasks: list[ResolvedTask],
parallel: int,
eval_config: EvalConfig,
eval_sandbox: SandboxEnvironmentType | None,
recorder: Recorder,
model_args: dict[str, Any],
epochs_reducer: list[ScoreReducer] | None = None,
Expand All @@ -66,7 +73,7 @@ async def eval_run(
if has_sandbox:
cleanup = eval_config.sandbox_cleanup is not False
shutdown_sandbox_environments = await startup_sandbox_environments(
tasks, cleanup
resolve_sandbox_environment(eval_sandbox), tasks, cleanup
)

# resolve solver and solver spec
Expand Down Expand Up @@ -319,14 +326,16 @@ async def worker() -> None:


async def startup_sandbox_environments(
tasks: list[ResolvedTask], cleanup: bool
eval_sandbox: SandboxEnvironmentSpec | None,
tasks: list[ResolvedTask],
cleanup: bool,
) -> Callable[[], Awaitable[None]]:
# find unique sandboxenvs
sandboxenvs: Set[TaskSandboxEnvironment] = set()
for task in tasks:
# resolve each sample and add to sandboxenvs
for sample in task.task.dataset:
sandbox = resolve_sandbox_for_task(task.task, sample)
sandbox = resolve_sandbox_for_task(eval_sandbox, task.task, sample)
if sandbox is not None and sandbox not in sandboxenvs:
sandboxenvs.add(sandbox)

Expand Down
4 changes: 3 additions & 1 deletion src/inspect_ai/_eval/task/sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,12 @@ class TaskSandboxEnvironment(NamedTuple):


def resolve_sandbox_for_task(
eval_sandbox: SandboxEnvironmentSpec | None,
task: Task,
sample: Sample,
) -> TaskSandboxEnvironment | None:
sandbox = resolve_sandbox(task.sandbox, sample)
# eval_sandbox overrides task or sample sandbox
sandbox = eval_sandbox or resolve_sandbox(task.sandbox, sample)
if sandbox is not None:
return TaskSandboxEnvironment(sandbox, task_run_dir(task))
else:
Expand Down

0 comments on commit 7ec902f

Please sign in to comment.