Eval cleanup (#75)

* Fix server bugs & Improve readme (#74) * Update README.md * Update README.md * rm * update * update * update --------- Co-authored-by: Lianmin Zheng <[email protected]>
sotopia-lab · Mar 30, 2023 · d01ad24 · d01ad24
1 parent 0934f63
commit d01ad24
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 91 deletions.
diff --git a/README.md b/README.md
@@ -57,6 +57,7 @@ python3 -m fastchat.serve.gradio_web_server
 
 ## Evaluation
 
+Check [evaluation](fastchat/eval) for an end-to-end evaluation pipeline based on GPT-4.
 
 ## Fine-tuning
 ### Data

diff --git a/fastchat/eval/README.md b/fastchat/eval/README.md
@@ -9,16 +9,19 @@ This directory contains end-to-end pipelines for AI-enhanced evaluation. We will
 Make sure you have setup the OpenAI API Key in your environment. Then run:
 
 ```bash
-python qa_baseline_gpt35.py --question table/question.jsonl --output table/answer/awswer_gpt35.jsonl
+python qa_baseline_gpt35.py --question table/question.jsonl --output table/answer/answer_gpt35.jsonl
 ```
 
 ### Bard
 
 Unfortunately, Bard has not release its public APIs till now. You may have to enter the anwsers manually. Or you could find a third-party project that interfaces with Bard.
 
-### Vicuna
+### Vicuna and others
 
-TODO: add instructions
+To generate answers with Vicuna or other models, specify path to the model checkpoint. Then run:
+```bash
+python model_qa.py --model-name /model/path --question-file tables/question.jsonl --answer-file table/answer/answer.jsonl
+```
 
 ## Evaluate Answers Automatically
 

diff --git a/fastchat/eval/eval_qa_chatgpt.py b/fastchat/eval/eval_qa_chatgpt.py
diff --git a/fastchat/eval/eval.py → fastchat/eval/model_qa.py b/fastchat/eval/eval.py → fastchat/eval/model_qa.py
@@ -4,12 +4,11 @@
 import os
 import json
 from tqdm import tqdm
-import ray
+import shortuuid
 
 from fastchat.conversation import default_conversation
 from fastchat.utils import disable_torch_init
 
-@ray.remote(num_gpus=1)
 @torch.inference_mode()
 def eval_model(model_name, questions_file, answers_file):
     # Model
@@ -20,11 +19,11 @@ def eval_model(model_name, questions_file, answers_file):
         torch_dtype=torch.float16).cuda()
 
 
-    qa_file = open(os.path.expanduser(questions_file), "r")
+    ques_file = open(os.path.expanduser(questions_file), "r")
     ans_file = open(os.path.expanduser(answers_file), "w")
-    for i, line in enumerate(tqdm(qa_file)):
-        idx = json.loads(line)["id"]
-        qs = json.loads(line)["question"]
+    for i, line in enumerate(tqdm(ques_file)):
+        idx = json.loads(line)["question_id"]
+        qs = json.loads(line)["text"]
         cat = json.loads(line)["category"]
         conv = default_conversation.copy()
         conv.append_message(conv.roles[0], qs)
@@ -43,25 +42,20 @@ def eval_model(model_name, questions_file, answers_file):
             index = outputs.index(conv.sep, len(prompt))
 
         outputs = outputs[len(prompt) + len(conv.roles[1]) + 2:index].strip()
-        ans_file.write(json.dumps({"id": idx, "answer": outputs, "category": cat}) + "\n")
+        ans_id = shortuuid.uuid()
+        ans_file.write(json.dumps({"question_id": idx,
+                                   "text": outputs,
+                                   "answer_id": ans_id,
+                                   "model_id": model_name,
+                                   "metadata": {}}) + "\n")
         ans_file.flush()
     ans_file.close()
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--model-name", type=str, default="facebook/opt-350m")
-    parser.add_argument("--questions-file", type=str, default="mini_evals/qa.jsonl")
-    parser.add_argument("--answers-file", type=str, default="answers.jsonl")
+    parser.add_argument("--question-file", type=str, default="tables/question.jsonl")
+    parser.add_argument("--answer-file", type=str, default="answer.jsonl")
     args = parser.parse_args()
 
-    ray.init()
-    handle = []
-    for i in range(1, 5):
-        model_name = args.model_name
-        model_name.replace('~/', '')
-        print(model_name)
-        question_file = f'mini_evals/qa_v2-{i}.jsonl'
-        answers_file = f'answers/v4/answers-v2-{i}.jsonl'
-        handle.append(eval_model.remote(model_name, question_file, answers_file))
-
-    results = ray.get(handle)
+    eval_model(args.model_name, args.question_file, args.answers_file)