diff --git a/code/evaluate_llama2.py b/code/evaluate_llama2.py index b39c452..3eaf935 100644 --- a/code/evaluate_llama2.py +++ b/code/evaluate_llama2.py @@ -106,7 +106,9 @@ def hook_fn(layer, input, output): # Compute scores rouge = evaluate.load('rouge') results_generated = rouge.compute(predictions=predictions_generated, references=references) - results_generated["time"] = e2e_inference_time + results_generated["time-total"] = e2e_inference_time + results_generated["time-per-token"] = e2e_inference_time_norm + print("Results: ",results_generated)