Skip to content

Commit

Permalink
Added logging
Browse files Browse the repository at this point in the history
  • Loading branch information
tgaddair committed Mar 21, 2024
1 parent d5f1921 commit acea9bc
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions server/lorax_server/models/flash_causal_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,11 +784,12 @@ def warmup(self, batch: FlashCausalLMBatch, max_new_tokens: int):
)

with warmup_mode():
logger.info("Warming up to max_total_tokens: {}", max_new_tokens)
with tqdm(total=max_new_tokens, desc="Warmup to max_total_tokens") as pbar:
for i in range(max_new_tokens):
for _ in range(max_new_tokens):
_, batch = self.generate_token(batch, is_warmup=True)
logger.info("Warmed up to token {}", i)
pbar.update(1)
logger.info("Finished generating warmup tokens")
except RuntimeError as e:
if "CUDA out of memory" in str(e) or isinstance(e, torch.cuda.OutOfMemoryError):
raise RuntimeError(
Expand Down

0 comments on commit acea9bc

Please sign in to comment.