Skip to content

Commit

Permalink
added endpoint through openai lib
Browse files Browse the repository at this point in the history
  • Loading branch information
clefourrier committed Jul 25, 2024
1 parent 66ed7a2 commit fd9dc34
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 20 deletions.
19 changes: 11 additions & 8 deletions src/lighteval/metrics/llm_as_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,20 @@
from lighteval.utils import NO_OPENAI_ERROR_MSG, is_openai_available


class JudgeOpenAI:
class JudgeEndpoint:
"""
A class representing a judge for evaluating answers using the OpenAI API.
A class representing a judge for evaluating answers using the OpenAI API or the Inference Endpoints API.
Args:
model (str): The name of the OpenAI model to use.
model (str): The name of the model to use.
seed (int): The seed value for generating random responses.
temperature (float): The temperature value for controlling the randomness of the responses.
templates_path (str): The path to the JSON file containing the templates for prompts.
api_key (str): The API key to use to create/connect to the endpoint
Attributes:
client: An instance of the OpenAI client.
model (str): The name of the OpenAI model.
client: An instance of the endpoint client.
model (str): The name of the endpoint model.
seed (int): The seed value, passed to the API when generating responses.
temperature (float): The temperature value, passed to the API when generating responses.
templates (dict): A dictionary containing the templates for prompts.
Expand All @@ -63,15 +64,17 @@ class JudgeOpenAI:
def __init__(
self,
model: str,
url: str,
seed: int,
temperature: float,
templates_path: str,
openai_api_key: str,
api_key: str,
multi_turn: bool = False,
):
self.client = None # loaded lazily
self.openai_api_key = openai_api_key
self.api_key = api_key
self.model = model
self.url = url # None for Open AI, value for Inference endpoint
self.seed = seed
self.temperature = temperature
self.multi_turn = multi_turn
Expand Down Expand Up @@ -118,7 +121,7 @@ def evaluate_answer(

from openai import OpenAI

self.client = OpenAI(api_key=self.openai_api_key)
self.client = OpenAI(base_url=self.url, api_key=self.api_key)

prompts = [
self.__get_prompts_single_turn(
Expand Down
19 changes: 17 additions & 2 deletions src/lighteval/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,22 @@ class Metrics(Enum):
category=MetricCategory.LLM_AS_JUDGE_MULTI_TURN,
use_case=MetricUseCase.SUMMARIZATION,
sample_level_fn=JudgeLLM(
judge_model_name="gpt-3.5-turbo",
judge_model_name_or_url="gpt-3.5-turbo",
template_path=os.path.join(os.path.dirname(__file__), "judge_prompts.jsonl"),
multi_turn=True,
).compute,
corpus_level_fn={
"single_turn": np.mean,
"multi_turn": np.mean,
},
)
llm_judge_multi_turn_local_endpoint = SampleLevelMetricGrouping(
metric_name=["single_turn", "multi_turn"],
higher_is_better=True,
category=MetricCategory.LLM_AS_JUDGE_MULTI_TURN,
use_case=MetricUseCase.SUMMARIZATION,
sample_level_fn=JudgeLLM(
judge_model_name_or_url="http://localhost:3000/v1", # replace with your endpoint url if needed
template_path=os.path.join(os.path.dirname(__file__), "judge_prompts.jsonl"),
multi_turn=True,
).compute,
Expand All @@ -249,7 +264,7 @@ class Metrics(Enum):
category=MetricCategory.LLM_AS_JUDGE,
use_case=MetricUseCase.SUMMARIZATION,
sample_level_fn=JudgeLLM(
judge_model_name="gpt-3.5-turbo",
judge_model_name_or_url="gpt-3.5-turbo",
template_path=os.path.join(os.path.dirname(__file__), "judge_prompts.jsonl"),
multi_turn=False,
).compute,
Expand Down
25 changes: 15 additions & 10 deletions src/lighteval/metrics/metrics_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from lighteval.metrics.imports.bert_scorer import BERTScorer
from lighteval.metrics.imports.data_stats_metric import DataStatsMetric
from lighteval.metrics.imports.summac import SummaCZS
from lighteval.metrics.llm_as_judge import JudgeOpenAI
from lighteval.metrics.llm_as_judge import JudgeEndpoint
from lighteval.metrics.normalizations import remove_braces, remove_braces_and_strip
from lighteval.tasks.requests import Doc
from lighteval.utils import as_list
Expand Down Expand Up @@ -622,21 +622,26 @@ def edit_similarity(self, s1, s2):


class JudgeLLM:
available_models = ["gpt-3.5-turbo", "gpt-4o", "gpt-4-turbo", "gpt-4"]
available_models_openai = ["gpt-3.5-turbo", "gpt-4o", "gpt-4-turbo", "gpt-4"]

def __init__(self, judge_model_name: str, template_path: str, multi_turn: bool = False):
if judge_model_name not in self.available_models:
raise ValueError(f"{judge_model_name} not in available models for llm as a judge metric")
def __init__(self, judge_model_name_or_url: str, template_path: str, multi_turn: bool = False):
if judge_model_name_or_url in self.available_models_openai:
API_KEY = os.getenv("OPENAI_API_KEY")
url = None
model = judge_model_name_or_url
else:
API_KEY = os.getenv("HF_TOKEN")
url = judge_model_name_or_url
model = "tgi"

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
self.multi_turn = multi_turn

self.judge = JudgeOpenAI(
model=judge_model_name,
self.judge = JudgeEndpoint(
model=model,
url=url,
seed=42,
temperature=0.0,
templates_path=template_path,
openai_api_key=OPENAI_API_KEY,
api_key=API_KEY,
multi_turn=multi_turn,
)

Expand Down

0 comments on commit fd9dc34

Please sign in to comment.