Skip to content

Commit

Permalink
Logging: Move metrics to gen logging
Browse files Browse the repository at this point in the history
This didn't have a place in the generation function.

Signed-off-by: kingbri <[email protected]>
  • Loading branch information
kingbri1 committed Mar 14, 2024
1 parent 1ec8eb9 commit 2ebefe8
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 30 deletions.
35 changes: 5 additions & 30 deletions backends/exllamav2/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from typing import List, Optional, Union

from backends.exllamav2.grammar import ExLlamaV2Grammar
from common.gen_logging import log_generation_params, log_prompt, log_response
from common.gen_logging import log_generation_params, log_metrics, log_prompt, log_response
from common.templating import (
PromptTemplate,
find_template_from_model,
Expand Down Expand Up @@ -969,35 +969,10 @@ def generate_gen(self, prompt: str, **kwargs):
# Print response
log_response(full_response)

# Print metrics
elapsed_time = last_chunk_time - start_time
context_len = None if ids is None else context_len

initial_response = (
f"Metrics: {generated_tokens} tokens generated in "
f"{round(elapsed_time, 2)} seconds"
)
itemization = []
extra_parts = []

# Add tokens per second
tokens_per_second = (
"Indeterminate"
if elapsed_time == 0
else round(generated_tokens / elapsed_time, 2)
)
itemization.append(f"{tokens_per_second} T/s")

# Add context (original token count)
if ids is not None:
itemization.append(f"context {context_len} tokens")

if context_len > self.config.max_seq_len:
extra_parts.append("<-- Not accurate (truncated)")

# Print output
logger.info(
initial_response
+ " ("
+ ", ".join(itemization)
+ ") "
+ " ".join(extra_parts)
log_metrics(
generated_tokens, elapsed_time, context_len, self.config.max_seq_len
)
38 changes: 38 additions & 0 deletions common/gen_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,41 @@ def log_response(response: str):
if PREFERENCES.prompt:
formatted_response = "\n" + response
logger.info(f"Response: {formatted_response if response else 'Empty'}\n")


def log_metrics(
generated_tokens: int,
elapsed_time: float,
context_len: Optional[int],
max_seq_len: int,
):
initial_response = (
f"Metrics: {generated_tokens} tokens generated in "
f"{round(elapsed_time, 2)} seconds"
)
itemization = []
extra_parts = []

# Add tokens per second
tokens_per_second = (
"Indeterminate"
if elapsed_time == 0
else round(generated_tokens / elapsed_time, 2)
)
itemization.append(f"{tokens_per_second} T/s")

# Add context (original token count)
if context_len:
itemization.append(f"context {context_len} tokens")

if context_len > max_seq_len:
extra_parts.append("<-- Not accurate (truncated)")

# Print output
logger.info(
initial_response
+ " ("
+ ", ".join(itemization)
+ ") "
+ " ".join(extra_parts)
)

0 comments on commit 2ebefe8

Please sign in to comment.