diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py
index 15c51c1..5d783d1 100644
--- a/backends/exllamav2/model.py
+++ b/backends/exllamav2/model.py
@@ -1228,10 +1228,9 @@ async def generate_gen(
         # The first index will always be the positive prompt
         context_len = input_ids[0].size(dim=-1)
         if context_len > self.config.max_seq_len:
-            logger.warning(
+            raise ValueError(
                 f"Context length {context_len} is greater than max_seq_len "
-                f"{self.config.max_seq_len}. Generation is truncated and "
-                "metrics may not be accurate."
+                f"{self.config.max_seq_len}"
             )
 
         # Automatically set max_tokens to fill up the context