From 189602bdaa48fcbae4f209c101cfb7d4d424ca3a Mon Sep 17 00:00:00 2001 From: jjallaire-aisi Date: Tue, 6 Aug 2024 08:00:18 -0400 Subject: [PATCH] improved metrics value_to_float string conversion (#196) Co-authored-by: aisi-inspect <166920645+aisi-inspect@users.noreply.github.com> --- CHANGELOG.md | 1 + src/inspect_ai/scorer/_metric.py | 31 ++++++++++++++++++++--------- tests/scorer/test_value_to_float.py | 31 +++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 9 deletions(-) create mode 100644 tests/scorer/test_value_to_float.py diff --git a/CHANGELOG.md b/CHANGELOG.md index fdc380779..a856f4711 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased +- Improved metrics `value_to_float` string conversion (handle numbers, "true", "false", etc.) - Log viewer: Ctrl/Cmd+F to find text when running in VS Code. - Set Claude default `max_tokens` to 4096 diff --git a/src/inspect_ai/scorer/_metric.py b/src/inspect_ai/scorer/_metric.py index 856b9fb2f..99e6dd119 100644 --- a/src/inspect_ai/scorer/_metric.py +++ b/src/inspect_ai/scorer/_metric.py @@ -120,12 +120,16 @@ def value_to_float( ) -> ValueToFloat: """Create a ValueToFloat function. - Create a ValueToFloat function that maps string values of - the form "C", "I", "P", and "N" to 1, 0, 0.5, and 0 - (respectively). Note that those are the default literal - values, but they can be customized. Numeric values are - cast to float. Arrays and dictionaries give a warning - and return 0. + Create a ValueToFloat function that maps scalar values of + different types into floats. For strings, common boolean + representations (e.g. 'yes', 'no', 'true', 'false') are + mapped to 1 and 0. In addition, the specified correct, + incorrect, partial, and noanswer values (by default "C" + "I", "P", are mapped to "N" to 1, 0, 0.5, and 0. Note that + those are the default literal values, but they can be + customized. Strings with only numbers are converted, and + numeric values are cast to float. Arrays and dictionarie + give a warning and return 0. Args: correct (Value): Value that represents a correct answer (1) @@ -146,9 +150,18 @@ def to_float(value: Value) -> float: return 0.5 elif value == incorrect or value == noanswer: return 0 - else: - logger.warning(f"Unable to convert value to float: {value}") - return 0 + elif isinstance(value, str): + value = value.lower() + if value in ["yes", "true"]: + return 1.0 + elif value in ["no", "false"]: + return 0.0 + elif value.replace(".", "").isnumeric(): + return float(value) + + # couldn't extract a value + logger.warning(f"Unable to convert value to float: {value}") + return 0.0 return to_float diff --git a/tests/scorer/test_value_to_float.py b/tests/scorer/test_value_to_float.py new file mode 100644 index 000000000..0644b2f28 --- /dev/null +++ b/tests/scorer/test_value_to_float.py @@ -0,0 +1,31 @@ +from inspect_ai.scorer import CORRECT, PARTIAL, value_to_float + + +def test_value_to_float_numbers(): + fn = value_to_float() + assert fn(1) == 1.0 + assert fn(0.5) == 0.5 + assert fn(True) == 1.0 + assert fn(False) == 0 + + +def test_value_to_float_strings(): + fn = value_to_float() + assert fn("1.0") == 1.0 + assert fn("0.5") == 0.5 + assert fn("0") == 0 + assert fn("yes") == 1.0 + assert fn("No") == 0.0 + assert fn(CORRECT) == 1.0 + assert fn(PARTIAL) == 0.5 + + +def test_value_to_float_custom(): + fn = value_to_float(correct="correct", incorrect="incorrect") + assert fn("correct") == 1.0 + assert fn("incorrect") == 0 + + +def test_value_to_float_invalid(): + fn = value_to_float() + assert fn("foo") == 0.0