Skip to content

Commit

Permalink
updated inference endpoint system
Browse files Browse the repository at this point in the history
  • Loading branch information
clefourrier committed Jul 25, 2024
1 parent fd9dc34 commit 4067ee4
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 12 deletions.
1 change: 1 addition & 0 deletions src/lighteval/metrics/llm_as_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class JudgeEndpoint:
Args:
model (str): The name of the model to use.
url (str): Endpoint to go to (open ai or inference endpoint)
seed (int): The seed value for generating random responses.
temperature (float): The temperature value for controlling the randomness of the responses.
templates_path (str): The path to the JSON file containing the templates for prompts.
Expand Down
10 changes: 5 additions & 5 deletions src/lighteval/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,13 +228,13 @@ class Metrics(Enum):
corpus_level_fn=np.mean,
higher_is_better=True,
)
llm_judge_multi_turn_openai = SampleLevelMetricGrouping(
llm_judge_multi_turn_gpt3p5 = SampleLevelMetricGrouping(
metric_name=["single_turn", "multi_turn"],
higher_is_better=True,
category=MetricCategory.LLM_AS_JUDGE_MULTI_TURN,
use_case=MetricUseCase.SUMMARIZATION,
sample_level_fn=JudgeLLM(
judge_model_name_or_url="gpt-3.5-turbo",
judge_model_name="gpt-3.5-turbo",
template_path=os.path.join(os.path.dirname(__file__), "judge_prompts.jsonl"),
multi_turn=True,
).compute,
Expand All @@ -243,13 +243,13 @@ class Metrics(Enum):
"multi_turn": np.mean,
},
)
llm_judge_multi_turn_local_endpoint = SampleLevelMetricGrouping(
llm_judge_multi_turn_llama3_405 = SampleLevelMetricGrouping(
metric_name=["single_turn", "multi_turn"],
higher_is_better=True,
category=MetricCategory.LLM_AS_JUDGE_MULTI_TURN,
use_case=MetricUseCase.SUMMARIZATION,
sample_level_fn=JudgeLLM(
judge_model_name_or_url="http://localhost:3000/v1", # replace with your endpoint url if needed
judge_model_name="meta-llama/Meta-Llama-3.1-405B-Instruct-FP8",
template_path=os.path.join(os.path.dirname(__file__), "judge_prompts.jsonl"),
multi_turn=True,
).compute,
Expand All @@ -264,7 +264,7 @@ class Metrics(Enum):
category=MetricCategory.LLM_AS_JUDGE,
use_case=MetricUseCase.SUMMARIZATION,
sample_level_fn=JudgeLLM(
judge_model_name_or_url="gpt-3.5-turbo",
judge_model_name="gpt-3.5-turbo",
template_path=os.path.join(os.path.dirname(__file__), "judge_prompts.jsonl"),
multi_turn=False,
).compute,
Expand Down
10 changes: 4 additions & 6 deletions src/lighteval/metrics/metrics_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,19 +624,17 @@ def edit_similarity(self, s1, s2):
class JudgeLLM:
available_models_openai = ["gpt-3.5-turbo", "gpt-4o", "gpt-4-turbo", "gpt-4"]

def __init__(self, judge_model_name_or_url: str, template_path: str, multi_turn: bool = False):
if judge_model_name_or_url in self.available_models_openai:
def __init__(self, judge_model_name: str, template_path: str, multi_turn: bool = False):
if judge_model_name in self.available_models_openai:
API_KEY = os.getenv("OPENAI_API_KEY")
url = None
model = judge_model_name_or_url
else:
API_KEY = os.getenv("HF_TOKEN")
url = judge_model_name_or_url
model = "tgi"
url = "https://api-inference.huggingface.co/v1/"

self.multi_turn = multi_turn
self.judge = JudgeEndpoint(
model=model,
model=judge_model_name,
url=url,
seed=42,
temperature=0.0,
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/tasks/extended/mt_bench/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def mt_bench_prompt(line, task_name: str = None):
evaluation_splits=["train"],
few_shots_split="",
few_shots_select="random",
metric=["llm_judge_multi_turn_openai"],
metric=["llm_judge_multi_turn_gpt3p5", "llm_judge_multi_turn_llama3_405"],
generation_size=1024,
stop_sequence=[],
)
Expand Down

0 comments on commit 4067ee4

Please sign in to comment.