fixed evaluate() in judge.py to handle None

eisenzopf · May 30, 2024 · a9f2492 · a9f2492
1 parent 667c712
commit a9f2492
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 10 deletions.
diff --git a/llm_eval/judge.py b/llm_eval/judge.py
@@ -21,15 +21,16 @@ def extract_output_by_name(self, name, value):
         else:
             return value
 
+
     def evaluate(self):
         client = OpenAI(
-        organization=self.config['judge']['organization'],
-        project=self.config['judge']['project'],
-        api_key=self.config['judge']['api_key']
+            organization=self.config['judge']['organization'],
+            project=self.config['judge']['project'],
+            api_key=self.config['judge']['api_key']
         )
 
         for index, row in self.dataset.iterrows():
-            # flip a coin to determine which model is participant 1 or 2
+            # Flip a coin to determine which model is participant 1 or 2
             flip = random.randint(0, 1)
             if flip == 0:
                 comp1_model = row['model1']
@@ -43,7 +44,7 @@ def evaluate(self):
                 comp2_value = row['comp1.value']
 
             entries = f"""Ok here is the instruction that we provided to both participants:
-Welcome to our customer service analysis tool. You will be provided with transcripts of conversations between customers and service agents. Your task is to follow the instruction and output a response from each conversation. Focus on provided concise outputs that could be useful for follow-up actions and ensure that your outputs are directly relevant to the discussed topics. This prompt is meant to ensure that you understand the essence of the customer's concerns and can articulate it succinctly in a structured format that is easy for both human and machine processing. Continue with this approach for the upcoming conversations.
+Welcome to our customer service analysis tool. You will be provided with transcripts of conversations between customers and service agents. Your task is to follow the instruction and output a response from each conversation. Focus on providing concise outputs that could be useful for follow-up actions and ensure that your outputs are directly relevant to the discussed topics. This prompt is meant to ensure that you understand the essence of the customer's concerns and can articulate it succinctly in a structured format that is easy for both human and machine processing. Continue with this approach for the upcoming conversations.
 
 {row['instruction']}
 
@@ -57,7 +58,7 @@ def evaluate(self):
 """
             messages = [
                 {"role": "system", "content": "You are going to pick an answer from two different participants based on an instruction. You should pick the entry that follows instructions the best."},
-                {"role": "user", "content": entries }
+                {"role": "user", "content": entries}
             ]
             completion = client.chat.completions.create(model="gpt-4o", messages=messages)
             completion_content = completion.choices[0].message.content
@@ -70,16 +71,18 @@ def evaluate(self):
                 elif line.startswith('explanation:'):
                     explanation = line.split('explanation:')[1].strip()
 
+            # Check if preference is None before using it
+            if preference is None:
+                print(f"Preference is None for index {index}")
+                continue
+
             # Save the results to the dataframe
             if "1" in preference:
                 self.dataset.at[index, 'preference'] = comp1_model
-                preference = comp1_model
             elif "2" in preference:
                 self.dataset.at[index, 'preference'] = comp2_model
-                preference = comp2_model
             elif "tie" in preference:
                 self.dataset.at[index, 'preference'] = "tie"
-                preference = "tie"
 
             self.dataset.at[index, 'explanation'] = explanation
             print(f"preference: {preference}\nexplanation: {explanation}")

diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "llm-eval"
-version = "0.5.14"
+version = "0.5.15"
 authors = [
     {name = "Jonathan Eisenzopf", email = "[email protected]"},
 ]