Skip to content

Commit

Permalink
Improve F1 scorer to accept an answer extraction function
Browse files Browse the repository at this point in the history
  • Loading branch information
dragonstyle committed Sep 4, 2024
1 parent 51676ab commit 09c4351
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions src/inspect_ai/scorer/_classification.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import re
import string
from typing import List
from typing import Callable, List

from inspect_ai.solver._task_state import TaskState

Expand All @@ -11,19 +11,26 @@


@scorer(metrics=[mean(), stderr()])
def f1() -> Scorer:
def f1(
answer_fn: Callable[[str], str] | None = None,
) -> Scorer:
"""Scorer which produces an F1 score
Computes the `F1` score for the answer (which balances recall precision by taking the harmonic mean between recall and precision).
"""

async def score(state: TaskState, target: Target) -> Score:
# Get generated answer and extract relevant answer text
answer = state.output.completion
answer = (
answer_fn(state.output.completion) if answer_fn else state.output.completion
)
targets = target.target

f1_score = max_f1_score(answer, targets)
return Score(value=f1_score, answer=answer)
return Score(
value=f1_score,
answer=answer,
)

return score

Expand Down

0 comments on commit 09c4351

Please sign in to comment.