diff --git a/CHANGELOG.md b/CHANGELOG.md index d2a119e7d..125b868d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - Add optional `content` parameter to `ModelOutput.for_tool_call()`. - Display total samples in Inspect View - Prune `sample_reductions` when returning eval logs with `header_only=True`. +- For simple matching scorers, only include explanation if it differs from answer. ## v0.3.39 (3 October 2024) diff --git a/src/inspect_ai/scorer/_common.py b/src/inspect_ai/scorer/_common.py index 9de9021ec..a5d39a4d5 100644 --- a/src/inspect_ai/scorer/_common.py +++ b/src/inspect_ai/scorer/_common.py @@ -20,13 +20,19 @@ async def score(state: TaskState, target: Target) -> Score: for value in target: answer, matched = match(state.output.completion, value) if matched: + explanation = ( + state.output.completion + if state.output.completion != answer + else None + ) return Score( value=CORRECT, answer=answer, explanation=state.output.completion ) - return Score( - value=INCORRECT, answer=answer, explanation=state.output.completion + explanation = ( + state.output.completion if state.output.completion != answer else None ) + return Score(value=INCORRECT, answer=answer, explanation=explanation) return score