Skip to content

Commit

Permalink
Model: Remove override_base_seq_len
Browse files Browse the repository at this point in the history
  • Loading branch information
DocShotgun committed Oct 30, 2024
1 parent 7d18d2e commit 603760c
Show file tree
Hide file tree
Showing 5 changed files with 0 additions and 33 deletions.
5 changes: 0 additions & 5 deletions backends/exllamav2/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,11 +220,6 @@ async def create(cls, model_directory: pathlib.Path, quiet=False, **kwargs):
# Hardcode max output length to 16
self.config.max_output_len = 16

# Then override the base_seq_len if present
override_base_seq_len = kwargs.get("override_base_seq_len")
if override_base_seq_len:
self.config.max_seq_len = override_base_seq_len

# Grab the base model's sequence length before overrides for
# rope calculations
base_seq_len = self.config.max_seq_len
Expand Down
6 changes: 0 additions & 6 deletions colab/TabbyAPI_Colab_Example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@
"# @markdown ---\n",
"# @markdown Model parameters:\n",
"ContextSize = 4096 # @param {type:\"integer\"}\n",
"OverrideBaseSeqLen = 4096 # @param {type:\"integer\"}\n",
"RopeScale = 1.0 # @param {type:\"number\"}\n",
"RopeAlpha = 1.0 # @param {type:\"number\"}\n",
"NumExpertsPerToken = 2 # @param {type:\"integer\"}\n",
Expand Down Expand Up @@ -169,11 +168,6 @@
" # Fetched from the model's base sequence length in config.json by default\n",
" max_seq_len: {ContextSize}\n",
"\n",
" # Overrides base model context length (default: None)\n",
" # WARNING: Don't set this unless you know what you're doing!\n",
" # Only use this if the model's base sequence length in config.json is incorrect (ex. Mistral/Mixtral models)\n",
" override_base_seq_len: {OverrideBaseSeqLen}\n",
"\n",
" # Automatically allocate resources to GPUs (default: True)\n",
" gpu_split_auto: True\n",
"\n",
Expand Down
10 changes: 0 additions & 10 deletions common/config_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,16 +176,6 @@ class ModelConfig(BaseConfigModel):
),
ge=0,
)
override_base_seq_len: Optional[int] = Field(
None,
description=(
"Overrides base model context length (default: Empty).\n"
"WARNING: Don't set this unless you know what you're doing!\n"
"Again, do NOT use this for configuring context length, "
"use max_seq_len above ^"
),
ge=0,
)
tensor_parallel: Optional[bool] = Field(
False,
description=(
Expand Down
5 changes: 0 additions & 5 deletions config_sample.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,6 @@ model:
# Fetched from the model's base sequence length in config.json by default.
max_seq_len:

# Overrides base model context length (default: Empty).
# WARNING: Don't set this unless you know what you're doing!
# Again, do NOT use this for configuring context length, use max_seq_len above ^
override_base_seq_len:

# Load model with tensor parallelism.
# Falls back to autosplit if GPU split isn't provided.
# This ignores the gpu_split_auto value.
Expand Down
7 changes: 0 additions & 7 deletions endpoints/core/types/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,6 @@ class ModelLoadRequest(BaseModel):
default=None,
examples=[4096],
)
override_base_seq_len: Optional[int] = Field(
description=(
"Overrides the model's base sequence length. " "Leave blank if unsure"
),
default=None,
examples=[4096],
)
cache_size: Optional[int] = Field(
description=("Number in tokens, must be greater than or equal to max_seq_len"),
default=None,
Expand Down

0 comments on commit 603760c

Please sign in to comment.