Skip to content

Commit

Permalink
Model: Remove dev wheel setting checks
Browse files Browse the repository at this point in the history
Removes TP and DRY sampler checks since those are in stable.

Signed-off-by: kingbri <[email protected]>
  • Loading branch information
kingbri1 committed Sep 15, 2024
1 parent 2d22183 commit 2a41910
Showing 1 changed file with 5 additions and 27 deletions.
32 changes: 5 additions & 27 deletions backends/exllamav2/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
ExLlamaV2Cache_Q4,
ExLlamaV2Cache_Q6,
ExLlamaV2Cache_Q8,
ExLlamaV2Cache_TP,
ExLlamaV2Tokenizer,
ExLlamaV2Lora,
)
Expand Down Expand Up @@ -55,14 +56,6 @@
from common.transformers_utils import GenerationConfig, HuggingFaceConfig
from common.utils import coalesce, unwrap

# Dynamic imports
try:
from exllamav2 import ExLlamaV2Cache_TP

has_tp = True
except ImportError:
has_tp = False


class ExllamaV2Container:
"""The model container class for ExLlamaV2 models."""
Expand Down Expand Up @@ -197,17 +190,10 @@ async def create(cls, model_directory: pathlib.Path, quiet=False, **kwargs):
else:
# Set tensor parallel
if use_tp:
if has_tp:
self.use_tp = True
self.use_tp = True

# TP has its own autosplit loader
self.gpu_split_auto = False
else:
# TODO: Remove conditional with exl2 v0.1.9 release
logger.warning(
"Tensor parallelism is not supported in the "
"current ExllamaV2 version."
)
# TP has its own autosplit loader
self.gpu_split_auto = False

# Enable manual GPU split if provided
if gpu_split:
Expand Down Expand Up @@ -703,7 +689,7 @@ def create_cache(
):
"""Utility function to create a model cache."""

if has_tp and use_tp:
if use_tp:
return ExLlamaV2Cache_TP(
model,
base=cache_class,
Expand Down Expand Up @@ -967,14 +953,6 @@ def check_unsupported_settings(self, **kwargs):
Meant for dev wheels!
"""

if unwrap(kwargs.get("dry_allowed_length"), 0) > 0 and not hasattr(
ExLlamaV2Sampler.Settings, "dry_multiplier"
):
logger.warning(
"DRY sampling is not supported by the currently "
"installed ExLlamaV2 version."
)

return kwargs

async def generate_gen(
Expand Down

0 comments on commit 2a41910

Please sign in to comment.