Skip to content

Commit

Permalink
Tree: Remove fasttensors
Browse files Browse the repository at this point in the history
Now a noop in upstream.

Signed-off-by: kingbri <[email protected]>
  • Loading branch information
kingbri1 committed Sep 30, 2024
1 parent 6726014 commit 126a444
Show file tree
Hide file tree
Showing 4 changed files with 0 additions and 14 deletions.
3 changes: 0 additions & 3 deletions backends/exllamav2/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,6 @@ async def create(cls, model_directory: pathlib.Path, quiet=False, **kwargs):
else:
self.config.scale_alpha_value = rope_alpha

# Enable fasttensors loading if present
self.config.fasttensors = unwrap(kwargs.get("fasttensors"), False)

# Set max batch size to the config override
self.max_batch_size = unwrap(kwargs.get("max_batch_size"))

Expand Down
7 changes: 0 additions & 7 deletions common/config_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,6 @@ class ModelConfig(BaseConfigModel):
),
ge=1,
)
fasttensors: Optional[bool] = Field(
False,
description=(
"Enables fasttensors to possibly increase model loading speeds "
"(default: False)."
),
)

_metadata: Metadata = PrivateAttr(Metadata())
model_config = ConfigDict(protected_namespaces=())
Expand Down
3 changes: 0 additions & 3 deletions config_sample.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,6 @@ model:
# WARNING: Don't set this unless you know what you're doing!
num_experts_per_token:

# Enables fasttensors to possibly increase model loading speeds (default: False).
fasttensors: false

# Options for draft models (speculative decoding)
# This will use more VRAM!
draft_model:
Expand Down
1 change: 0 additions & 1 deletion endpoints/core/types/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ class ModelLoadRequest(BaseModel):
chunk_size: Optional[int] = None
prompt_template: Optional[str] = None
num_experts_per_token: Optional[int] = None
fasttensors: Optional[bool] = None

# Non-config arguments
draft: Optional[DraftModelLoadRequest] = None
Expand Down

0 comments on commit 126a444

Please sign in to comment.