From 75af974c8802e3e471943563970a9690365751a5 Mon Sep 17 00:00:00 2001 From: kingbri Date: Thu, 19 Sep 2024 22:05:56 -0400 Subject: [PATCH] Model: Raise an error if the context length is too large The dynamic generator gave a not-so-helpful exception already which basically said to not exceed the max sequence length. Instead of possible undefined behavior, error out. Signed-off-by: kingbri --- backends/exllamav2/model.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py index 15c51c1..5d783d1 100644 --- a/backends/exllamav2/model.py +++ b/backends/exllamav2/model.py @@ -1228,10 +1228,9 @@ async def generate_gen( # The first index will always be the positive prompt context_len = input_ids[0].size(dim=-1) if context_len > self.config.max_seq_len: - logger.warning( + raise ValueError( f"Context length {context_len} is greater than max_seq_len " - f"{self.config.max_seq_len}. Generation is truncated and " - "metrics may not be accurate." + f"{self.config.max_seq_len}" ) # Automatically set max_tokens to fill up the context