drafting

pyg-team · Jan 24, 2025 · bd35260 · bd35260
2 parents f860da7 + 2689f15
commit bd35260
Showing 1 changed file with 14 additions and 0 deletions.
diff --git a/examples/llm/tech_qa.py b/examples/llm/tech_qa.py
@@ -229,13 +229,27 @@ def eval(question: str, pred: str, correct_answer: str):
         return llm_judge.score(question, pred, correct_answer)
 
     scores = []
+<<<<<<< HEAD
     eval_tuples = []
     for test_batch in tqdm(test_loader, desc="Testing"):
         pred = (inference_step(model, test_batch))
         for question, pred, label in zip(test_batch.question, preds, test_batch.label):
             eval_tuples.append((question, pred, label))
     for question, pred, label in tqdm(eval_tuples, desc="Eval"):
         scores.append(eval(question, pred, label))
+=======
+    for test_batch in test_loader:
+        preds = inference_step(model, test_batch)
+        """
+        TQDM here since only 1 mini-batch for testing
+        since theres only 11 test questions and eval-batch-size
+        is 16 by default
+        """
+        for question, pred, label in tqdm(
+                list(zip(test_batch.question, preds, test_batch.label)),
+                desc="Test:"):
+            scores.append(eval(question, pred, label))
+>>>>>>> 2689f15431c6da5a6c7535884f10c50104bef1a0
     avg_scores = sum(scores) / len(scores)
     print("Avg marlin accuracy=", avg_scores)