Fix use_wandb in ast eval, responses file deletion, wandb artifacts renaming (ShishirPatil#115)

morganmcg1 · web-flow · commit 5884d59ee158 · 2023-08-24T17:52:32.000-07:00
- Fix `use_wandb` to be `args.use_wandb`
- deletes existing responses file if one is found (otherwise data is
appended)
- Fix W&amp;B artifacts logging
- Only append to table if line from jsonl is not none
- Enforce artifacts naming conventions (will error if `:` are present)
diff --git a/eval/eval-scripts/ast_eval_hf.py b/eval/eval-scripts/ast_eval_hf.py
@@ -161,14 +161,15 @@ def main(args):
         else:
             pass
 
-    if use_wandb:
+    if args.use_wandb:
+        import wandb
         if args.wandb_run_id is not None: 
             wandb.init(project=args.wandb_project, entity=args.wandb_entity, id=args.wandb_run_id, resume="must") 
         else:
             wandb.init(project=args.wandb_project, entity=args.wandb_entity)
 
-        wandb.summary['final_functionality_accuracy': total_correct / len(llm_responses)]
-        wandb.summary['final_hallucination':  total_hallucination/len(llm_responses)]
+        wandb.summary['final_functionality_accuracy'] = total_correct / len(llm_responses)
+        wandb.summary['final_hallucination'] = total_hallucination/len(llm_responses)
 
     print('Final Functionality accuracy: ', total_correct / len(llm_responses))
     print('Final hallucination: ', total_hallucination/len(llm_responses))
diff --git a/eval/get_llm_responses.py b/eval/get_llm_responses.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 
 import argparse
+import re
+import os 
 import sys
 import json
 import openai
@@ -132,6 +134,10 @@ def callback_with_lock(result, output_file):
             questions.append(json.loads(line)["text"])
             question_ids.append(json.loads(line)["question_id"])
 
+    if os.path.exists(args.output_file):
+        print(f"\nExisting responses file found at: {args.output_file}, deleting it ...\n")
+        os.remove(args.output_file)
+
     file_write_lock = mp.Lock()
     with mp.Pool(1) as pool:
         results = []
@@ -148,11 +154,11 @@ def callback_with_lock(result, output_file):
     end_time = time.time()
     elapsed_time = end_time - start_time
     print("Total time used: ", elapsed_time)
-    
+
     if args.use_wandb:
         print("\nSaving all responses to Weights & Biases...\n")
-
         wandb.summary["elapsed_time_s"] = elapsed_time
+        wandb.log({"elapsed_time_s":elapsed_time})
 
         line_count = 0 
         with open(args.output_file, 'r') as file:
@@ -161,16 +167,18 @@ def callback_with_lock(result, output_file):
 
                 if i == 0:
                     tbl = wandb.Table(columns=list(data.keys()))
-                tbl.add_data(*list(data.values()))
-                line_count+=1
+                if data is not None:
+                    tbl.add_data(*list(data.values()))
+                    line_count+=1
         
         # Log the Tale to W&B
         wandb.log({"llm_eval_responses": tbl})
         wandb.summary["response_count"] = line_count
 
         # Also log results file as W&B Artifact
+        artifact_model_name = re.sub(r'[^a-zA-Z0-9-_.]', '-', args.model)
         wandb.log_artifact(args.output_file, 
-            name=f"{args.api_name}-{args._model}-eval-results", 
-            type=f"eval-results", 
+            name=f"{args.api_name}-{artifact_model_name}-eval-responses", 
+            type=f"eval-responses", 
             aliases=[f"{line_count}-responses"]
         )