Skip to content

Commit

Permalink
Merge pull request #243 from DocShotgun/chunk-size-fix
Browse files Browse the repository at this point in the history
Enforce chunk_size as multiple of 256
  • Loading branch information
bdashore3 authored Nov 18, 2024
2 parents bd9e78e + 37cc701 commit a69f860
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions backends/exllamav2/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,9 +322,20 @@ async def create(cls, model_directory: pathlib.Path, quiet=False, **kwargs):
if num_experts_override:
self.config.num_experts_per_token = kwargs.get("num_experts_per_token")

# Make sure chunk size is >= 16 and <= max seq length
# Make sure chunk size is >= 256, keep near or below max seq len
user_chunk_size = unwrap(kwargs.get("chunk_size"), 2048)
chunk_size = sorted((16, user_chunk_size, self.config.max_seq_len))[1]
chunk_size = sorted((256, user_chunk_size, self.config.max_seq_len))[1]
chunk_remainder = chunk_size % 256
if chunk_remainder != 0:
rounded_chunk_size = int(256 * ((chunk_size - chunk_remainder) / 256 + 1))

logger.warning(
f"The given chunk size ({chunk_size}) is "
"not a multiple of 256.\n"
"Overriding chunk_size with an overestimated value of "
f"{rounded_chunk_size} tokens."
)
chunk_size = rounded_chunk_size
self.config.max_input_len = chunk_size
self.config.max_attention_size = chunk_size**2

Expand Down

0 comments on commit a69f860

Please sign in to comment.