Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix an attempt to add a string value to a numerical value #447

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions llm_bench/python/utils/metrics_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,17 @@ def print_metrics(
f"[{iter_str}] First token latency: {iter_data['first_token_latency']:.2f} ms/{latency_unit}, "
f"other tokens latency: {iter_data['other_tokens_avg_latency']:.2f} ms/{latency_unit}, len of tokens: {len(tms)} * {batch_size}",
)
else:
log.warning(f'[{iter_str}] No hook data output for first token latency and other tokens latency')
if len(tms_infer) > 0:
iter_data['first_token_infer_latency'] = tms_infer[0] * 1000 if len(tms_infer) > 0 else -1
iter_data['other_tokens_infer_avg_latency'] = sum(tms_infer[1:]) / (len(tms_infer) - 1) * 1000 if len(tms_infer) > 1 else -1
log.info(
f"[{iter_str}] First infer latency: {iter_data['first_token_infer_latency']:.2f} ms/infer, "
f"other infers latency: {iter_data['other_tokens_infer_avg_latency']:.2f} ms/infer, inference count: {len(tms_infer)}",
)
else:
log.warning(f'[{iter_str}] No hook data output for first infer latency and other infers latency')
if stable_diffusion is not None:
print_stable_diffusion_infer_latency(iter_str, iter_data, stable_diffusion)
output_str = ''
Expand Down Expand Up @@ -118,15 +122,16 @@ def output_avg_statis_tokens(prompt_dict, prompt_idx_list, iter_data_list, batch
if iter_data['iteration'] == 0:
continue
if iter_data['prompt_idx'] == p_idx:
avg_1st_token_latency += iter_data['first_token_latency']
avg_2nd_tokens_latency += iter_data['other_tokens_avg_latency']
avg_input_size += iter_data['input_size']
avg_1st_token_latency += iter_data['first_token_latency'] if iter_data['first_token_latency'] != '' else 0
avg_2nd_tokens_latency += iter_data['other_tokens_avg_latency'] if iter_data['other_tokens_avg_latency'] != '' else 0
avg_input_size += iter_data['input_size'] if iter_data['input_size'] != '' else 0
index_num = index_num + 1
if index_num > 0:
avg_1st_token_latency = avg_1st_token_latency / index_num
avg_2nd_tokens_latency = avg_2nd_tokens_latency / index_num
avg_input_size = int(avg_input_size / index_num)
avg_2nd_token_tput = (1 / avg_2nd_tokens_latency) * batch_size * 1000
if avg_2nd_tokens_latency > 0:
avg_2nd_token_tput = (1 / avg_2nd_tokens_latency) * batch_size * 1000
latency_unit = 'token'
if batch_size > 1:
latency_unit = '{}tokens'.format(batch_size)
Expand Down
Loading