Skip to content

Commit 5884d59

Browse files
authored
Fix use_wandb in ast eval, responses file deletion, wandb artifacts renaming (ShishirPatil#115)
- Fix `use_wandb` to be `args.use_wandb` - deletes existing responses file if one is found (otherwise data is appended) - Fix W&B artifacts logging - Only append to table if line from jsonl is not none - Enforce artifacts naming conventions (will error if `:` are present)
1 parent a1f4589 commit 5884d59

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

eval/eval-scripts/ast_eval_hf.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -161,14 +161,15 @@ def main(args):
161161
else:
162162
pass
163163

164-
if use_wandb:
164+
if args.use_wandb:
165+
import wandb
165166
if args.wandb_run_id is not None:
166167
wandb.init(project=args.wandb_project, entity=args.wandb_entity, id=args.wandb_run_id, resume="must")
167168
else:
168169
wandb.init(project=args.wandb_project, entity=args.wandb_entity)
169170

170-
wandb.summary['final_functionality_accuracy': total_correct / len(llm_responses)]
171-
wandb.summary['final_hallucination': total_hallucination/len(llm_responses)]
171+
wandb.summary['final_functionality_accuracy'] = total_correct / len(llm_responses)
172+
wandb.summary['final_hallucination'] = total_hallucination/len(llm_responses)
172173

173174
print('Final Functionality accuracy: ', total_correct / len(llm_responses))
174175
print('Final hallucination: ', total_hallucination/len(llm_responses))

eval/get_llm_responses.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
# limitations under the License.
1414

1515
import argparse
16+
import re
17+
import os
1618
import sys
1719
import json
1820
import openai
@@ -132,6 +134,10 @@ def callback_with_lock(result, output_file):
132134
questions.append(json.loads(line)["text"])
133135
question_ids.append(json.loads(line)["question_id"])
134136

137+
if os.path.exists(args.output_file):
138+
print(f"\nExisting responses file found at: {args.output_file}, deleting it ...\n")
139+
os.remove(args.output_file)
140+
135141
file_write_lock = mp.Lock()
136142
with mp.Pool(1) as pool:
137143
results = []
@@ -148,11 +154,11 @@ def callback_with_lock(result, output_file):
148154
end_time = time.time()
149155
elapsed_time = end_time - start_time
150156
print("Total time used: ", elapsed_time)
151-
157+
152158
if args.use_wandb:
153159
print("\nSaving all responses to Weights & Biases...\n")
154-
155160
wandb.summary["elapsed_time_s"] = elapsed_time
161+
wandb.log({"elapsed_time_s":elapsed_time})
156162

157163
line_count = 0
158164
with open(args.output_file, 'r') as file:
@@ -161,16 +167,18 @@ def callback_with_lock(result, output_file):
161167

162168
if i == 0:
163169
tbl = wandb.Table(columns=list(data.keys()))
164-
tbl.add_data(*list(data.values()))
165-
line_count+=1
170+
if data is not None:
171+
tbl.add_data(*list(data.values()))
172+
line_count+=1
166173

167174
# Log the Tale to W&B
168175
wandb.log({"llm_eval_responses": tbl})
169176
wandb.summary["response_count"] = line_count
170177

171178
# Also log results file as W&B Artifact
179+
artifact_model_name = re.sub(r'[^a-zA-Z0-9-_.]', '-', args.model)
172180
wandb.log_artifact(args.output_file,
173-
name=f"{args.api_name}-{args._model}-eval-results",
174-
type=f"eval-results",
181+
name=f"{args.api_name}-{artifact_model_name}-eval-responses",
182+
type=f"eval-responses",
175183
aliases=[f"{line_count}-responses"]
176184
)

0 commit comments

Comments
 (0)