Skip to content

Commit

Permalink
eval_set does not support zero retries (#467)
Browse files Browse the repository at this point in the history
* add failing test

* fix

* naming

---------

Co-authored-by: jjallaire <[email protected]>
  • Loading branch information
art-dsit and jjallaire authored Sep 20, 2024
1 parent 081b29b commit 0e45169
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/inspect_ai/_eval/evalset.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ def task_to_failed_log(task: ResolvedTask) -> Log:
retry=retry_if_not_result(all_evals_succeeded),
retry_error_callback=return_last_value,
reraise=True,
stop=stop_after_attempt(retry_attempts or 10),
stop=stop_after_attempt(10 if retry_attempts is None else retry_attempts),
wait=wait_exponential(retry_wait or 30, max=(60 * 60)),
before_sleep=before_sleep,
before=before,
Expand Down
14 changes: 13 additions & 1 deletion tests/test_eval_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from copy import deepcopy
from pathlib import Path

from test_helpers.utils import failing_solver, failing_task
from test_helpers.utils import failing_solver, failing_task, failing_task_deterministic

from inspect_ai import Task, task
from inspect_ai._eval.evalset import (
Expand Down Expand Up @@ -219,3 +219,15 @@ def test_eval_set_s3(mock_s3) -> None:
)
assert success
assert logs[0].status == "success"


def test_eval_zero_retries() -> None:
with tempfile.TemporaryDirectory() as log_dir:
success, logs = eval_set(
tasks=failing_task_deterministic([True, False]),
log_dir=log_dir,
retry_attempts=0,
retry_wait=0.1,
model="mockllm/model",
)
assert not success
26 changes: 26 additions & 0 deletions tests/test_helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sys
from pathlib import Path
from random import random
from typing import Sequence

import pytest

Expand Down Expand Up @@ -166,6 +167,31 @@ def failing_task(rate=0.5, samples=1) -> Task:
)


@solver
def failing_solver_deterministic(should_fail: Sequence[bool]):
it = iter(should_fail)

async def solve(state: TaskState, generate: Generate):
should_fail_this_time = it.__next__()
if should_fail_this_time:
raise ValueError("Eval failed!")
return state

return solve


@task
def failing_task_deterministic(should_fail: Sequence[bool]) -> Task:
dataset: list[Sample] = []
for _ in range(0, len(should_fail)):
dataset.append(Sample(input="Say hello", target="hello"))
return Task(
dataset=dataset,
plan=[failing_solver_deterministic(should_fail), generate()],
scorer=match(),
)


def ensure_test_package_installed():
try:
import inspect_package # type: ignore # noqa: F401
Expand Down

0 comments on commit 0e45169

Please sign in to comment.