From 0e4516949f207661e9f7d4e3fc60d8ec176825a4 Mon Sep 17 00:00:00 2001 From: art-dsit <153507562+art-dsit@users.noreply.github.com> Date: Fri, 20 Sep 2024 16:45:22 +0100 Subject: [PATCH] eval_set does not support zero retries (#467) * add failing test * fix * naming --------- Co-authored-by: jjallaire --- src/inspect_ai/_eval/evalset.py | 2 +- tests/test_eval_set.py | 14 +++++++++++++- tests/test_helpers/utils.py | 26 ++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/inspect_ai/_eval/evalset.py b/src/inspect_ai/_eval/evalset.py index a87a84f0b..29f7773eb 100644 --- a/src/inspect_ai/_eval/evalset.py +++ b/src/inspect_ai/_eval/evalset.py @@ -366,7 +366,7 @@ def task_to_failed_log(task: ResolvedTask) -> Log: retry=retry_if_not_result(all_evals_succeeded), retry_error_callback=return_last_value, reraise=True, - stop=stop_after_attempt(retry_attempts or 10), + stop=stop_after_attempt(10 if retry_attempts is None else retry_attempts), wait=wait_exponential(retry_wait or 30, max=(60 * 60)), before_sleep=before_sleep, before=before, diff --git a/tests/test_eval_set.py b/tests/test_eval_set.py index 6f74ac2f2..82b3195dc 100644 --- a/tests/test_eval_set.py +++ b/tests/test_eval_set.py @@ -3,7 +3,7 @@ from copy import deepcopy from pathlib import Path -from test_helpers.utils import failing_solver, failing_task +from test_helpers.utils import failing_solver, failing_task, failing_task_deterministic from inspect_ai import Task, task from inspect_ai._eval.evalset import ( @@ -219,3 +219,15 @@ def test_eval_set_s3(mock_s3) -> None: ) assert success assert logs[0].status == "success" + + +def test_eval_zero_retries() -> None: + with tempfile.TemporaryDirectory() as log_dir: + success, logs = eval_set( + tasks=failing_task_deterministic([True, False]), + log_dir=log_dir, + retry_attempts=0, + retry_wait=0.1, + model="mockllm/model", + ) + assert not success diff --git a/tests/test_helpers/utils.py b/tests/test_helpers/utils.py index 599e1018a..2865de768 100644 --- a/tests/test_helpers/utils.py +++ b/tests/test_helpers/utils.py @@ -4,6 +4,7 @@ import sys from pathlib import Path from random import random +from typing import Sequence import pytest @@ -166,6 +167,31 @@ def failing_task(rate=0.5, samples=1) -> Task: ) +@solver +def failing_solver_deterministic(should_fail: Sequence[bool]): + it = iter(should_fail) + + async def solve(state: TaskState, generate: Generate): + should_fail_this_time = it.__next__() + if should_fail_this_time: + raise ValueError("Eval failed!") + return state + + return solve + + +@task +def failing_task_deterministic(should_fail: Sequence[bool]) -> Task: + dataset: list[Sample] = [] + for _ in range(0, len(should_fail)): + dataset.append(Sample(input="Say hello", target="hello")) + return Task( + dataset=dataset, + plan=[failing_solver_deterministic(should_fail), generate()], + scorer=match(), + ) + + def ensure_test_package_installed(): try: import inspect_package # type: ignore # noqa: F401