Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Made judge response processing more robust. #491

Merged
merged 1 commit into from
Jan 20, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 22 additions & 7 deletions src/lighteval/tasks/extended/mix_eval/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import logging
import re

import numpy as np
Expand All @@ -37,6 +38,9 @@
from lighteval.tasks.requests import Doc


logger = logging.getLogger(__name__)


def mixeval_freeform_prompt(line, task_name: str = ""):
prompt = construct_prompt_freeform(line)
return Doc(
Expand Down Expand Up @@ -71,19 +75,30 @@ def mixeval_multichoice_prompt(line, task_name: str = ""):


def process_judge_response(x):
search = re.search(r"<score>\s(\d)\s</score>", x)
return int(search.group(1)) if search else 0
try:
search = re.search(r"<score>\s(\d)\s</score>", x)
return int(search.group(1)) if search else 0
except Exception as e:
logger.warning(f"Error processing judge response for flow: {e}")
return 0


def process_judge_response_multichoice_gpt(x):
search = re.search(r"\[\[([01])\]\]", x)
return int(search.group(1)) if search else 0
try:
search = re.search(r"\[\[([01])\]\]", x)
return int(search.group(1)) if search else 0
except Exception as e:
logger.warning(f"Error processing judge response for multichoice GPT: {e}")
return 0


def process_judge_response_freeform_gpt(x):
search = re.search(r"\[\[(\d.\d)\]\]", x)
answer = float(search.group(1) if search else 0)
return answer
try:
search = re.search(r"\[\[(\d.\d)\]\]", x)
return float(search.group(1)) if search else 0
except Exception as e:
logger.warning(f"Error processing judge response for freeform GPT: {e}")
return 0


llm_judge_mixeval_multichoice_flow_judge = SampleLevelMetricGrouping(
Expand Down
Loading