diff --git a/show_result.py b/show_result.py index 21c5eb9..e3e2449 100644 --- a/show_result.py +++ b/show_result.py @@ -150,7 +150,7 @@ def get_battles_from_judgment(bench_name, parser.add_argument("--markdown-control-only", action="store_true") args = parser.parse_args() print(args) - assert not args.load_bootstrap or (args.load_battles and args.load_bootstrap), "If loading prexisting bootstrapping data, you must also load preexisting battles." + assert not args.load_bootstrap or (args.load_battles and args.load_bootstrap), "If loading preexisting bootstrapping data, you must also load preexisting battles." assert sum([args.style_control, args.length_control_only, args.markdown_control_only]) < 2, "You can only control one of the three: length, markdown, or both style." answer_dir = os.path.join("data", args.bench_name, "model_answer") @@ -260,4 +260,4 @@ def get_battles_from_judgment(bench_name, col_list[-2], col_list[-1] = col_list[-1], col_list[-2] stats = stats.loc[:,col_list] stats['date'] = date_str[:4] + '-' + date_str[4:6] + '-' + date_str[6:] - stats.to_csv(f"leaderboard/arena_hard_leaderboard_{date_str}.csv", index=False) \ No newline at end of file + stats.to_csv(f"leaderboard/arena_hard_leaderboard_{date_str}.csv", index=False)