From 362b8d5818bbe3fa46128f596d952bfb6fd9d349 Mon Sep 17 00:00:00 2001
From: Jake <84923604+SecretiveShell@users.noreply.github.com>
Date: Thu, 5 Sep 2024 18:04:56 +0100
Subject: [PATCH 01/51] config is now backed by pydantic (WIP)

- add models for config options
- add function to regenerate config.yml
- replace references to config with pydantic compatible references
- remove unnecessary unwrap() statements

TODO:

- auto generate env vars
- auto generate argparse
- test loading a model
---
 common/config_models.py       | 248 ++++++++++++++++++++++++++++++++++
 common/downloader.py          |   4 +-
 common/gen_logging.py         |  32 +----
 common/model.py               |   2 +-
 common/networking.py          |   4 +-
 common/tabby_config.py        |  29 ++--
 endpoints/OAI/router.py       |   8 +-
 endpoints/core/router.py      |  24 ++--
 endpoints/core/types/model.py |   4 +-
 endpoints/server.py           |   2 +-
 main.py                       |  34 ++---
 11 files changed, 297 insertions(+), 94 deletions(-)
 create mode 100644 common/config_models.py

diff --git a/common/config_models.py b/common/config_models.py
new file mode 100644
index 00000000..9bbf5f1c
--- /dev/null
+++ b/common/config_models.py
@@ -0,0 +1,248 @@
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+from typing import List, Optional, Union, get_type_hints
+
+from common.utils import unwrap
+
+
+class config_config_model(BaseModel):
+    config: Optional[str] = Field(
+        None, description="Path to an overriding config.yml file"
+    )
+
+
+class network_config_model(BaseModel):
+    host: Optional[str] = Field("127.0.0.1", description="The IP to host on")
+    port: Optional[int] = Field(5000, description="The port to host on")
+    disable_auth: Optional[bool] = Field(
+        False, description="Disable HTTP token authentication with requests"
+    )
+    send_tracebacks: Optional[bool] = Field(
+        False, description="Decide whether to send error tracebacks over the API"
+    )
+    api_servers: Optional[List[str]] = Field(
+        [
+            "OAI",
+        ],
+        description="API servers to enable. Options: (OAI, Kobold)",
+    )
+
+
+class logging_config_model(BaseModel):
+    log_prompt: Optional[bool] = Field(False, description="Enable prompt logging")
+    log_generation_params: Optional[bool] = Field(
+        False, description="Enable generation parameter logging"
+    )
+    log_requests: Optional[bool] = Field(False, description="Enable request logging")
+
+
+class model_config_model(BaseModel):
+    model_dir: str = Field(
+        "models",
+        description="Overrides the directory to look for models (default: models). Windows users, do NOT put this path in quotes.",
+    )
+    use_dummy_models: Optional[bool] = Field(
+        False,
+        description="Sends dummy model names when the models endpoint is queried. Enable this if looking for specific OAI models.",
+    )
+    model_name: Optional[str] = Field(
+        None,
+        description="An initial model to load. Make sure the model is located in the model directory! REQUIRED: This must be filled out to load a model on startup.",
+    )
+    use_as_default: List[str] = Field(
+        default_factory=list,
+        description="Names of args to use as a default fallback for API load requests (default: []). Example: ['max_seq_len', 'cache_mode']",
+    )
+    max_seq_len: Optional[int] = Field(
+        None,
+        description="Max sequence length. Fetched from the model's base sequence length in config.json by default.",
+    )
+    override_base_seq_len: Optional[int] = Field(
+        None,
+        description="Overrides base model context length. WARNING: Only use this if the model's base sequence length is incorrect.",
+    )
+    tensor_parallel: Optional[bool] = Field(
+        False,
+        description="Load model with tensor parallelism. Fallback to autosplit if GPU split isn't provided.",
+    )
+    gpu_split_auto: Optional[bool] = Field(
+        True,
+        description="Automatically allocate resources to GPUs (default: True). Not parsed for single GPU users.",
+    )
+    autosplit_reserve: List[int] = Field(
+        [96],
+        description="Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). Represented as an array of MB per GPU.",
+    )
+    gpu_split: List[float] = Field(
+        default_factory=list,
+        description="An integer array of GBs of VRAM to split between GPUs (default: []). Used with tensor parallelism.",
+    )
+    rope_scale: Optional[float] = Field(
+        1.0,
+        description="Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the model was trained on long context with rope.",
+    )
+    rope_alpha: Optional[Union[float, str]] = Field(
+        1.0,
+        description="Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto-calculate.",
+    )
+    cache_mode: Optional[str] = Field(
+        "FP16",
+        description="Enable different cache modes for VRAM savings (default: FP16). Possible values: FP16, Q8, Q6, Q4.",
+    )
+    cache_size: Optional[int] = Field(
+        None,
+        description="Size of the prompt cache to allocate (default: max_seq_len). Must be a multiple of 256.",
+    )
+    chunk_size: Optional[int] = Field(
+        2048,
+        description="Chunk size for prompt ingestion (default: 2048). A lower value reduces VRAM usage but decreases ingestion speed.",
+    )
+    max_batch_size: Optional[int] = Field(
+        None,
+        description="Set the maximum number of prompts to process at one time (default: None/Automatic). Automatically calculated if left blank.",
+    )
+    prompt_template: Optional[str] = Field(
+        None,
+        description="Set the prompt template for this model. If empty, attempts to look for the model's chat template.",
+    )
+    num_experts_per_token: Optional[int] = Field(
+        None,
+        description="Number of experts to use per token. Fetched from the model's config.json. For MoE models only.",
+    )
+    fasttensors: Optional[bool] = Field(
+        False,
+        description="Enables fasttensors to possibly increase model loading speeds (default: False).",
+    )
+
+
+class draft_model_config_model(BaseModel):
+    draft_model_dir: Optional[str] = Field(
+        "models",
+        description="Overrides the directory to look for draft models (default: models)",
+    )
+    draft_model_name: Optional[str] = Field(
+        None,
+        description="An initial draft model to load. Ensure the model is in the model directory.",
+    )
+    draft_rope_scale: Optional[float] = Field(
+        1.0,
+        description="Rope scale for draft models (default: 1.0). Same as compress_pos_emb. Use if the draft model was trained on long context with rope.",
+    )
+    draft_rope_alpha: Optional[float] = Field(
+        None,
+        description="Rope alpha for draft models (default: None). Same as alpha_value. Leave blank to auto-calculate the alpha value.",
+    )
+    draft_cache_mode: Optional[str] = Field(
+        "FP16",
+        description="Cache mode for draft models to save VRAM (default: FP16). Possible values: FP16, Q8, Q6, Q4.",
+    )
+
+
+class lora_instance_model(BaseModel):
+    name: str = Field(..., description="Name of the LoRA model")
+    scaling: float = Field(
+        1.0, description="Scaling factor for the LoRA model (default: 1.0)"
+    )
+
+
+class lora_config_model(BaseModel):
+    lora_dir: Optional[str] = Field(
+        "loras", description="Directory to look for LoRAs (default: 'loras')"
+    )
+    loras: Optional[List[lora_instance_model]] = Field(
+        None,
+        description="List of LoRAs to load and associated scaling factors (default scaling: 1.0)",
+    )
+
+
+class sampling_config_model(BaseModel):
+    override_preset: Optional[str] = Field(
+        None, description="Select a sampler override preset"
+    )
+
+
+class developer_config_model(BaseModel):
+    unsafe_launch: Optional[bool] = Field(
+        False, description="Skip Exllamav2 version check"
+    )
+    disable_request_streaming: Optional[bool] = Field(
+        False, description="Disables API request streaming"
+    )
+    cuda_malloc_backend: Optional[bool] = Field(
+        False, description="Runs with the pytorch CUDA malloc backend"
+    )
+    uvloop: Optional[bool] = Field(
+        False, description="Run asyncio using Uvloop or Winloop"
+    )
+    realtime_process_priority: Optional[bool] = Field(
+        False,
+        description="Set process to use a higher priority For realtime process priority, run as administrator or sudo Otherwise, the priority will be set to high",
+    )
+
+
+class embeddings_config_model(BaseModel):
+    embedding_model_dir: Optional[str] = Field(
+        "models",
+        description="Overrides directory to look for embedding models (default: models)",
+    )
+    embeddings_device: Optional[str] = Field(
+        "cpu",
+        description="Device to load embedding models on (default: cpu). Possible values: cpu, auto, cuda. If using an AMD GPU, set this value to 'cuda'.",
+    )
+    embedding_model_name: Optional[str] = Field(
+        None, description="The embeddings model to load"
+    )
+
+
+class tabby_config_model(BaseModel):
+    config: config_config_model = Field(default_factory=config_config_model)
+    network: network_config_model = Field(default_factory=network_config_model)
+    logging: logging_config_model = Field(default_factory=logging_config_model)
+    model: model_config_model = Field(default_factory=model_config_model)
+    draft_model: draft_model_config_model = Field(
+        default_factory=draft_model_config_model
+    )
+    lora: lora_config_model = Field(default_factory=lora_config_model)
+    sampling: sampling_config_model = Field(default_factory=sampling_config_model)
+    developer: developer_config_model = Field(default_factory=developer_config_model)
+    embeddings: embeddings_config_model = Field(default_factory=embeddings_config_model)
+
+    @model_validator(mode="before")
+    def set_defaults(cls, values):
+        for field_name, field_value in values.items():
+            if field_value is None:
+                default_instance = cls.__annotations__[field_name]().dict()
+                values[field_name] = cls.__annotations__[field_name](**default_instance)
+        return values
+
+    model_config = ConfigDict(validate_assignment=True)
+
+
+def generate_config_file(filename="config_sample.yml", indentation=2):
+    schema = tabby_config_model.model_json_schema()
+
+    def dump_def(id: str, indent=2):
+        yaml = ""
+        indent = " " * indentation * indent
+        id = id.split("/")[-1]
+
+        section = schema["$defs"][id]["properties"]
+        for property in section.keys():  # get type
+            comment = section[property]["description"]
+            yaml += f"{indent}# {comment}\n"
+
+            value = unwrap(section[property].get("default"), "")
+            yaml += f"{indent}{property}: {value}\n\n"
+
+        return yaml + "\n"
+
+    yaml = ""
+    for section in schema["properties"].keys():
+        yaml += f"{section}:\n"
+        yaml += dump_def(schema["properties"][section]["$ref"])
+        yaml += "\n"
+
+    with open(filename, "w") as f:
+        f.write(yaml)
+
+
+# generate_config_file("test.yml")
diff --git a/common/downloader.py b/common/downloader.py
index b0a8d93e..6813e0d8 100644
--- a/common/downloader.py
+++ b/common/downloader.py
@@ -76,9 +76,9 @@ def _get_download_folder(repo_id: str, repo_type: str, folder_name: Optional[str
     """Gets the download folder for the repo."""
 
     if repo_type == "lora":
-        download_path = pathlib.Path(config.lora.get("lora_dir") or "loras")
+        download_path = pathlib.Path(config.lora.lora_dir)
     else:
-        download_path = pathlib.Path(config.model.get("model_dir") or "models")
+        download_path = pathlib.Path(config.model.model_dir)
 
     download_path = download_path / (folder_name or repo_id.split("/")[-1])
     return download_path
diff --git a/common/gen_logging.py b/common/gen_logging.py
index 99958186..3252bb24 100644
--- a/common/gen_logging.py
+++ b/common/gen_logging.py
@@ -6,37 +6,19 @@
 from loguru import logger
 from typing import Dict, Optional
 
-
-class GenLogPreferences(BaseModel):
-    """Logging preference config."""
-
-    prompt: bool = False
-    generation_params: bool = False
-
+from common.tabby_config import config
 
 # Global logging preferences constant
-PREFERENCES = GenLogPreferences()
-
-
-def update_from_dict(options_dict: Dict[str, bool]):
-    """Wrapper to set the logging config for generations"""
-    global PREFERENCES
-
-    # Force bools on the dict
-    for value in options_dict.values():
-        if value is None:
-            value = False
-
-    PREFERENCES = GenLogPreferences.model_validate(options_dict)
+PREFERENCES = config.logging
 
 
 def broadcast_status():
     """Broadcasts the current logging status"""
     enabled = []
-    if PREFERENCES.prompt:
+    if PREFERENCES.log_prompt:
         enabled.append("prompts")
 
-    if PREFERENCES.generation_params:
+    if PREFERENCES.log_generation_params:
         enabled.append("generation params")
 
     if len(enabled) > 0:
@@ -47,13 +29,13 @@ def broadcast_status():
 
 def log_generation_params(**kwargs):
     """Logs generation parameters to console."""
-    if PREFERENCES.generation_params:
+    if PREFERENCES.log_generation_params:
         logger.info(f"Generation options: {kwargs}\n")
 
 
 def log_prompt(prompt: str, request_id: str, negative_prompt: Optional[str]):
     """Logs the prompt to console."""
-    if PREFERENCES.prompt:
+    if PREFERENCES.log_prompt:
         formatted_prompt = "\n" + prompt
         logger.info(
             f"Prompt (ID: {request_id}): {formatted_prompt if prompt else 'Empty'}\n"
@@ -66,7 +48,7 @@ def log_prompt(prompt: str, request_id: str, negative_prompt: Optional[str]):
 
 def log_response(request_id: str, response: str):
     """Logs the response to console."""
-    if PREFERENCES.prompt:
+    if PREFERENCES.log_prompt:
         formatted_response = "\n" + response
         logger.info(
             f"Response (ID: {request_id}): "
diff --git a/common/model.py b/common/model.py
index a9ddfffe..4da1d904 100644
--- a/common/model.py
+++ b/common/model.py
@@ -153,7 +153,7 @@ async def unload_embedding_model():
 def get_config_default(key: str, model_type: str = "model"):
     """Fetches a default value from model config if allowed by the user."""
 
-    default_keys = unwrap(config.model.get("use_as_default"), [])
+    default_keys = unwrap(config.model.use_as_default, [])
 
     # Add extra keys to defaults
     default_keys.append("embeddings_device")
diff --git a/common/networking.py b/common/networking.py
index be6f1abc..e0812723 100644
--- a/common/networking.py
+++ b/common/networking.py
@@ -39,7 +39,7 @@ def handle_request_error(message: str, exc_info: bool = True):
     """Log a request error to the console."""
 
     trace = traceback.format_exc()
-    send_trace = unwrap(config.network.get("send_tracebacks"), False)
+    send_trace = config.network.send_tracebacks
 
     error_message = TabbyRequestErrorMessage(
         message=message, trace=trace if send_trace else None
@@ -134,7 +134,7 @@ def get_global_depends():
 
     depends = [Depends(add_request_id)]
 
-    if config.logging.get("requests"):
+    if config.logging.log_requests:
         depends.append(Depends(log_request))
 
     return depends
diff --git a/common/tabby_config.py b/common/tabby_config.py
index c0c9e585..5aac0b84 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -4,21 +4,11 @@
 from typing import Optional
 
 from common.utils import unwrap, merge_dicts
+from common.config_models import tabby_config_model
+import common.config_models
 
 
-class TabbyConfig:
-    network: dict = {}
-    logging: dict = {}
-    model: dict = {}
-    draft_model: dict = {}
-    lora: dict = {}
-    sampling: dict = {}
-    developer: dict = {}
-    embeddings: dict = {}
-
-    def __init__(self):
-        pass
-
+class TabbyConfig(tabby_config_model):
     def load_config(self, arguments: Optional[dict] = None):
         """load the global application config"""
 
@@ -30,14 +20,11 @@ def load_config(self, arguments: Optional[dict] = None):
 
         merged_config = merge_dicts(*configs)
 
-        self.network = unwrap(merged_config.get("network"), {})
-        self.logging = unwrap(merged_config.get("logging"), {})
-        self.model = unwrap(merged_config.get("model"), {})
-        self.draft_model = unwrap(merged_config.get("draft"), {})
-        self.lora = unwrap(merged_config.get("draft"), {})
-        self.sampling = unwrap(merged_config.get("sampling"), {})
-        self.developer = unwrap(merged_config.get("developer"), {})
-        self.embeddings = unwrap(merged_config.get("embeddings"), {})
+        for field in tabby_config_model.model_fields.keys():
+            value = unwrap(merged_config.get(field), {})
+            model = getattr(common.config_models, f"{field}_config_model")
+
+            setattr(self, field, model.parse_obj(value))
 
     def _from_file(self, config_path: pathlib.Path):
         """loads config from a given file path"""
diff --git a/endpoints/OAI/router.py b/endpoints/OAI/router.py
index b888f194..7cf08d74 100644
--- a/endpoints/OAI/router.py
+++ b/endpoints/OAI/router.py
@@ -58,9 +58,7 @@ async def completion_request(
     if isinstance(data.prompt, list):
         data.prompt = "\n".join(data.prompt)
 
-    disable_request_streaming = unwrap(
-        config.developer.get("disable_request_streaming"), False
-    )
+    disable_request_streaming = config.developer.disable_request_streaming
 
     # Set an empty JSON schema if the request wants a JSON response
     if data.response_format.type == "json":
@@ -117,9 +115,7 @@ async def chat_completion_request(
     if data.response_format.type == "json":
         data.json_schema = {"type": "object"}
 
-    disable_request_streaming = unwrap(
-        config.developer.get("disable_request_streaming"), False
-    )
+    disable_request_streaming = config.developer.disable_request_streaming
 
     if data.stream and not disable_request_streaming:
         return EventSourceResponse(
diff --git a/endpoints/core/router.py b/endpoints/core/router.py
index 1f9d1948..29a615c0 100644
--- a/endpoints/core/router.py
+++ b/endpoints/core/router.py
@@ -62,17 +62,17 @@ async def list_models(request: Request) -> ModelList:
     Requires an admin key to see all models.
     """
 
-    model_dir = unwrap(config.model.get("model_dir"), "models")
+    model_dir = config.model.model_dir
     model_path = pathlib.Path(model_dir)
 
-    draft_model_dir = config.draft_model.get("draft_model_dir")
+    draft_model_dir = config.draft_model.draft_model_dir
 
     if get_key_permission(request) == "admin":
         models = get_model_list(model_path.resolve(), draft_model_dir)
     else:
         models = await get_current_model_list()
 
-    if unwrap(config.model.get("use_dummy_models"), False):
+    if config.model.use_dummy_models:
         models.data.insert(0, ModelCard(id="gpt-3.5-turbo"))
 
     return models
@@ -98,7 +98,7 @@ async def list_draft_models(request: Request) -> ModelList:
     """
 
     if get_key_permission(request) == "admin":
-        draft_model_dir = unwrap(config.draft_model.get("draft_model_dir"), "models")
+        draft_model_dir = config.draft_model.draft_model_dir
         draft_model_path = pathlib.Path(draft_model_dir)
 
         models = get_model_list(draft_model_path.resolve())
@@ -122,7 +122,7 @@ async def load_model(data: ModelLoadRequest) -> ModelLoadResponse:
 
         raise HTTPException(400, error_message)
 
-    model_path = pathlib.Path(unwrap(config.model.get("model_dir"), "models"))
+    model_path = pathlib.Path(config.model.model_dir)
     model_path = model_path / data.name
 
     draft_model_path = None
@@ -135,7 +135,7 @@ async def load_model(data: ModelLoadRequest) -> ModelLoadResponse:
 
             raise HTTPException(400, error_message)
 
-        draft_model_path = unwrap(config.draft_model.get("draft_model_dir"), "models")
+        draft_model_path = config.draft_model.draft_model_dir
 
     if not model_path.exists():
         error_message = handle_request_error(
@@ -192,7 +192,7 @@ async def list_all_loras(request: Request) -> LoraList:
     """
 
     if get_key_permission(request) == "admin":
-        lora_path = pathlib.Path(unwrap(config.lora.get("lora_dir"), "loras"))
+        lora_path = pathlib.Path(config.lora.lora_dir)
         loras = get_lora_list(lora_path.resolve())
     else:
         loras = get_active_loras()
@@ -227,7 +227,7 @@ async def load_lora(data: LoraLoadRequest) -> LoraLoadResponse:
 
         raise HTTPException(400, error_message)
 
-    lora_dir = pathlib.Path(unwrap(config.lora.get("lora_dir"), "loras"))
+    lora_dir = pathlib.Path(config.lora.lora_dir)
     if not lora_dir.exists():
         error_message = handle_request_error(
             "A parent lora directory does not exist for load. Check your config.yml?",
@@ -266,9 +266,7 @@ async def list_embedding_models(request: Request) -> ModelList:
     """
 
     if get_key_permission(request) == "admin":
-        embedding_model_dir = unwrap(
-            config.embeddings.get("embedding_model_dir"), "models"
-        )
+        embedding_model_dir = config.embeddings.embedding_model_dir
         embedding_model_path = pathlib.Path(embedding_model_dir)
 
         models = get_model_list(embedding_model_path.resolve())
@@ -302,9 +300,7 @@ async def load_embedding_model(
 
         raise HTTPException(400, error_message)
 
-    embedding_model_dir = pathlib.Path(
-        unwrap(config.embeddings.get("embedding_model_dir"), "models")
-    )
+    embedding_model_dir = pathlib.Path(config.embeddings.embedding_model_dir)
     embedding_model_path = embedding_model_dir / data.name
 
     if not embedding_model_path.exists():
diff --git a/endpoints/core/types/model.py b/endpoints/core/types/model.py
index 154a9065..69663598 100644
--- a/endpoints/core/types/model.py
+++ b/endpoints/core/types/model.py
@@ -4,7 +4,7 @@
 from time import time
 from typing import List, Literal, Optional, Union
 
-from common.gen_logging import GenLogPreferences
+from common.config_models import logging_config_model
 from common.model import get_config_default
 
 
@@ -33,7 +33,7 @@ class ModelCard(BaseModel):
     object: str = "model"
     created: int = Field(default_factory=lambda: int(time()))
     owned_by: str = "tabbyAPI"
-    logging: Optional[GenLogPreferences] = None
+    logging: Optional[logging_config_model] = None
     parameters: Optional[ModelCardParameters] = None
 
 
diff --git a/endpoints/server.py b/endpoints/server.py
index 0f6a19b0..e1c81b54 100644
--- a/endpoints/server.py
+++ b/endpoints/server.py
@@ -36,7 +36,7 @@ def setup_app(host: Optional[str] = None, port: Optional[int] = None):
         allow_headers=["*"],
     )
 
-    api_servers = unwrap(config.network.get("api_servers"), [])
+    api_servers = config.network.api_servers
 
     # Map for API id to server router
     router_mapping = {"oai": OAIRouter, "kobold": KoboldRouter}
diff --git a/main.py b/main.py
index f017ecce..89cc6bf6 100644
--- a/main.py
+++ b/main.py
@@ -27,8 +27,8 @@
 async def entrypoint_async():
     """Async entry function for program startup"""
 
-    host = unwrap(config.network.get("host"), "127.0.0.1")
-    port = unwrap(config.network.get("port"), 5000)
+    host = config.network.host
+    port = config.network.port
 
     # Check if the port is available and attempt to bind a fallback
     if is_port_in_use(port):
@@ -50,16 +50,12 @@ async def entrypoint_async():
             port = fallback_port
 
     # Initialize auth keys
-    load_auth_keys(unwrap(config.network.get("disable_auth"), False))
-
-    # Override the generation log options if given
-    if config.logging:
-        gen_logging.update_from_dict(config.logging)
+    load_auth_keys(config.network.disable_auth)
 
     gen_logging.broadcast_status()
 
     # Set sampler parameter overrides if provided
-    sampling_override_preset = config.sampling.get("override_preset")
+    sampling_override_preset = config.sampling.override_preset
     if sampling_override_preset:
         try:
             sampling.overrides_from_file(sampling_override_preset)
@@ -68,25 +64,23 @@ async def entrypoint_async():
 
     # If an initial model name is specified, create a container
     # and load the model
-    model_name = config.model.get("model_name")
+    model_name = config.model.model_name
     if model_name:
-        model_path = pathlib.Path(unwrap(config.model.get("model_dir"), "models"))
+        model_path = pathlib.Path(config.model.model_dir)
         model_path = model_path / model_name
 
         await model.load_model(model_path.resolve(), **config.model)
 
         # Load loras after loading the model
-        if config.lora.get("loras"):
-            lora_dir = pathlib.Path(unwrap(config.lora.get("lora_dir"), "loras"))
+        if config.lora.loras:
+            lora_dir = pathlib.Path(config.lora.lora_dir)
             await model.container.load_loras(lora_dir.resolve(), **config.lora)
 
     # If an initial embedding model name is specified, create a separate container
     # and load the model
-    embedding_model_name = config.embeddings.get("embedding_model_name")
+    embedding_model_name = config.embeddings.embedding_model_name
     if embedding_model_name:
-        embedding_model_path = pathlib.Path(
-            unwrap(config.embeddings.get("embedding_model_dir"), "models")
-        )
+        embedding_model_path = pathlib.Path(config.embeddings.embedding_model_dir)
         embedding_model_path = embedding_model_path / embedding_model_name
 
         try:
@@ -124,7 +118,7 @@ def entrypoint(arguments: Optional[dict] = None):
     # Check exllamav2 version and give a descriptive error if it's too old
     # Skip if launching unsafely
     print(f"MAIN.PY {config=}")
-    if unwrap(config.developer.get("unsafe_launch"), False):
+    if config.developer.unsafe_launch:
         logger.warning(
             "UNSAFE: Skipping ExllamaV2 version check.\n"
             "If you aren't a developer, please keep this off!"
@@ -133,12 +127,12 @@ def entrypoint(arguments: Optional[dict] = None):
         check_exllama_version()
 
     # Enable CUDA malloc backend
-    if unwrap(config.developer.get("cuda_malloc_backend"), False):
+    if config.developer.cuda_malloc_backend:
         os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "backend:cudaMallocAsync"
         logger.warning("EXPERIMENTAL: Enabled the pytorch CUDA malloc backend.")
 
     # Use Uvloop/Winloop
-    if unwrap(config.developer.get("uvloop"), False):
+    if config.developer.uvloop:
         if platform.system() == "Windows":
             from winloop import install
         else:
@@ -150,7 +144,7 @@ def entrypoint(arguments: Optional[dict] = None):
         logger.warning("EXPERIMENTAL: Running program with Uvloop/Winloop.")
 
     # Set the process priority
-    if unwrap(config.developer.get("realtime_process_priority"), False):
+    if config.developer.realtime_process_priority:
         import psutil
 
         current_process = psutil.Process(os.getpid())

From 36e991c16e5ec4dc00c4dbd779ada4c37ace6f00 Mon Sep 17 00:00:00 2001
From: Jake <84923604+SecretiveShell@users.noreply.github.com>
Date: Fri, 6 Sep 2024 00:27:53 +0100
Subject: [PATCH 02/51] automate arg parse

- generate arg parser dynamically
- remove legavy parser code
---
 common/args.py | 250 +++++++------------------------------------------
 1 file changed, 35 insertions(+), 215 deletions(-)

diff --git a/common/args.py b/common/args.py
index 4755c39c..ffecaaa2 100644
--- a/common/args.py
+++ b/common/args.py
@@ -1,7 +1,9 @@
 """Argparser for overriding config values"""
 
 import argparse
-
+from typing import get_origin, get_args, Optional, Union, List
+from pydantic import BaseModel
+from common.tabby_config import config
 
 def str_to_bool(value):
     """Converts a string into a boolean value"""
@@ -32,24 +34,40 @@ def argument_with_auto(value):
 
 
 def init_argparser():
-    """Creates an argument parser that any function can use"""
-
-    parser = argparse.ArgumentParser(
-        epilog="NOTE: These args serve to override parts of the config. "
-        + "It's highly recommended to edit config.yml for all options and "
-        + "better descriptions!"
-    )
-    add_network_args(parser)
-    add_model_args(parser)
-    add_embeddings_args(parser)
-    add_logging_args(parser)
-    add_developer_args(parser)
-    add_sampling_args(parser)
-    add_config_args(parser)
+    parser = argparse.ArgumentParser(description="TabbyAPI server")
+
+    # Loop through the fields in the top-level model (ModelX in this case)
+    for field_name, field_type in config.__annotations__.items():
+        # Get the sub-model type (e.g., ModelA, ModelB)
+        sub_model = field_type.__base__
+        
+        # Create argument group for the sub-model
+        group = parser.add_argument_group(field_name, description=f"Arguments for {field_name}")
+        
+        # Loop through each field in the sub-model (e.g., ModelA, ModelB)
+        for sub_field_name, sub_field_type in field_type.__annotations__.items():
+            field = field_type.__fields__[sub_field_name]
+            help_text = field.description if field.description else "No description available"
+
+            # Handle Optional types or other generic types
+            origin = get_origin(sub_field_type)
+            if origin is Union:  # Check if the type is Union (which includes Optional)
+                sub_field_type = next(t for t in get_args(sub_field_type) if t is not type(None))
+            elif origin is List : sub_field_type = get_args(sub_field_type)[0]
+
+
+            # Map Pydantic types to argparse types
+            print(sub_field_type, type(sub_field_type))
+            if isinstance(sub_field_type, type) and issubclass(sub_field_type, (int, float, str, bool)):
+                arg_type = sub_field_type
+            else:
+                arg_type = str  # Default to string for unknown types
+            
+            # Add the argument for each field in the sub-model
+            group.add_argument(f"--{sub_field_name}", type=arg_type, help=help_text)
 
     return parser
 
-
 def convert_args_to_dict(args: argparse.Namespace, parser: argparse.ArgumentParser):
     """Broad conversion of surface level arg groups to dictionaries"""
 
@@ -63,202 +81,4 @@ def convert_args_to_dict(args: argparse.Namespace, parser: argparse.ArgumentPars
 
             arg_groups[group.title] = group_dict
 
-    return arg_groups
-
-
-def add_config_args(parser: argparse.ArgumentParser):
-    """Adds config arguments"""
-
-    parser.add_argument(
-        "--config", type=str, help="Path to an overriding config.yml file"
-    )
-
-
-def add_network_args(parser: argparse.ArgumentParser):
-    """Adds networking arguments"""
-
-    network_group = parser.add_argument_group("network")
-    network_group.add_argument("--host", type=str, help="The IP to host on")
-    network_group.add_argument("--port", type=int, help="The port to host on")
-    network_group.add_argument(
-        "--disable-auth",
-        type=str_to_bool,
-        help="Disable HTTP token authenticaion with requests",
-    )
-    network_group.add_argument(
-        "--send-tracebacks",
-        type=str_to_bool,
-        help="Decide whether to send error tracebacks over the API",
-    )
-    network_group.add_argument(
-        "--api-servers",
-        type=str,
-        nargs="+",
-        help="API servers to enable. Options: (OAI, Kobold)",
-    )
-
-
-def add_model_args(parser: argparse.ArgumentParser):
-    """Adds model arguments"""
-
-    model_group = parser.add_argument_group("model")
-    model_group.add_argument(
-        "--model-dir", type=str, help="Overrides the directory to look for models"
-    )
-    model_group.add_argument("--model-name", type=str, help="An initial model to load")
-    model_group.add_argument(
-        "--use-dummy-models",
-        type=str_to_bool,
-        help="Add dummy OAI model names for API queries",
-    )
-    model_group.add_argument(
-        "--use-as-default",
-        type=str,
-        nargs="+",
-        help="Names of args to use as a default fallback for API load requests ",
-    )
-    model_group.add_argument(
-        "--max-seq-len", type=int, help="Override the maximum model sequence length"
-    )
-    model_group.add_argument(
-        "--override-base-seq-len",
-        type=str_to_bool,
-        help="Overrides base model context length",
-    )
-    model_group.add_argument(
-        "--tensor-parallel",
-        type=str_to_bool,
-        help="Use tensor parallelism to load models",
-    )
-    model_group.add_argument(
-        "--gpu-split-auto",
-        type=str_to_bool,
-        help="Automatically allocate resources to GPUs",
-    )
-    model_group.add_argument(
-        "--autosplit-reserve",
-        type=int,
-        nargs="+",
-        help="Reserve VRAM used for autosplit loading (in MBs) ",
-    )
-    model_group.add_argument(
-        "--gpu-split",
-        type=float,
-        nargs="+",
-        help="An integer array of GBs of vram to split between GPUs. "
-        + "Ignored if gpu_split_auto is true",
-    )
-    model_group.add_argument(
-        "--rope-scale", type=float, help="Sets rope_scale or compress_pos_emb"
-    )
-    model_group.add_argument(
-        "--rope-alpha",
-        type=argument_with_auto,
-        help="Sets rope_alpha for NTK",
-    )
-    model_group.add_argument(
-        "--cache-mode",
-        type=str,
-        help="Set the quantization level of the K/V cache. Options: (FP16, Q8, Q6, Q4)",
-    )
-    model_group.add_argument(
-        "--cache-size",
-        type=int,
-        help="The size of the prompt cache (in number of tokens) to allocate",
-    )
-    model_group.add_argument(
-        "--chunk-size",
-        type=int,
-        help="Chunk size for prompt ingestion",
-    )
-    model_group.add_argument(
-        "--max-batch-size",
-        type=int,
-        help="Maximum amount of prompts to process at one time",
-    )
-    model_group.add_argument(
-        "--prompt-template",
-        type=str,
-        help="Set the jinja2 prompt template for chat completions",
-    )
-    model_group.add_argument(
-        "--num-experts-per-token",
-        type=int,
-        help="Number of experts to use per token in MoE models",
-    )
-    model_group.add_argument(
-        "--fasttensors",
-        type=str_to_bool,
-        help="Possibly increases model loading speeds",
-    )
-
-
-def add_logging_args(parser: argparse.ArgumentParser):
-    """Adds logging arguments"""
-
-    logging_group = parser.add_argument_group("logging")
-    logging_group.add_argument(
-        "--log-prompt", type=str_to_bool, help="Enable prompt logging"
-    )
-    logging_group.add_argument(
-        "--log-generation-params",
-        type=str_to_bool,
-        help="Enable generation parameter logging",
-    )
-    logging_group.add_argument(
-        "--log-requests",
-        type=str_to_bool,
-        help="Enable request logging",
-    )
-
-
-def add_developer_args(parser: argparse.ArgumentParser):
-    """Adds developer-specific arguments"""
-
-    developer_group = parser.add_argument_group("developer")
-    developer_group.add_argument(
-        "--unsafe-launch", type=str_to_bool, help="Skip Exllamav2 version check"
-    )
-    developer_group.add_argument(
-        "--disable-request-streaming",
-        type=str_to_bool,
-        help="Disables API request streaming",
-    )
-    developer_group.add_argument(
-        "--cuda-malloc-backend",
-        type=str_to_bool,
-        help="Runs with the pytorch CUDA malloc backend",
-    )
-    developer_group.add_argument(
-        "--uvloop",
-        type=str_to_bool,
-        help="Run asyncio using Uvloop or Winloop",
-    )
-
-
-def add_sampling_args(parser: argparse.ArgumentParser):
-    """Adds sampling-specific arguments"""
-
-    sampling_group = parser.add_argument_group("sampling")
-    sampling_group.add_argument(
-        "--override-preset", type=str, help="Select a sampler override preset"
-    )
-
-
-def add_embeddings_args(parser: argparse.ArgumentParser):
-    """Adds arguments specific to embeddings"""
-
-    embeddings_group = parser.add_argument_group("embeddings")
-    embeddings_group.add_argument(
-        "--embedding-model-dir",
-        type=str,
-        help="Overrides the directory to look for models",
-    )
-    embeddings_group.add_argument(
-        "--embedding-model-name", type=str, help="An initial model to load"
-    )
-    embeddings_group.add_argument(
-        "--embeddings-device",
-        type=str,
-        help="Device to use for embeddings. Options: (cpu, auto, cuda)",
-    )
+    return arg_groups
\ No newline at end of file

From 8e9344642e33a4320854df96022d8c9e1fd06a6c Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Fri, 6 Sep 2024 14:31:28 +0100
Subject: [PATCH 03/51] patch pydantic config into old config

- convert pydantic to dict to avoid errors with current files
- fix formatting
---
 common/args.py | 30 ++++++++++++++++++++----------
 main.py        |  9 ++++++---
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/common/args.py b/common/args.py
index ffecaaa2..c4fbd463 100644
--- a/common/args.py
+++ b/common/args.py
@@ -5,6 +5,7 @@
 from pydantic import BaseModel
 from common.tabby_config import config
 
+
 def str_to_bool(value):
     """Converts a string into a boolean value"""
 
@@ -40,34 +41,43 @@ def init_argparser():
     for field_name, field_type in config.__annotations__.items():
         # Get the sub-model type (e.g., ModelA, ModelB)
         sub_model = field_type.__base__
-        
+
         # Create argument group for the sub-model
-        group = parser.add_argument_group(field_name, description=f"Arguments for {field_name}")
-        
+        group = parser.add_argument_group(
+            field_name, description=f"Arguments for {field_name}"
+        )
+
         # Loop through each field in the sub-model (e.g., ModelA, ModelB)
         for sub_field_name, sub_field_type in field_type.__annotations__.items():
             field = field_type.__fields__[sub_field_name]
-            help_text = field.description if field.description else "No description available"
+            help_text = (
+                field.description if field.description else "No description available"
+            )
 
             # Handle Optional types or other generic types
             origin = get_origin(sub_field_type)
             if origin is Union:  # Check if the type is Union (which includes Optional)
-                sub_field_type = next(t for t in get_args(sub_field_type) if t is not type(None))
-            elif origin is List : sub_field_type = get_args(sub_field_type)[0]
-
+                sub_field_type = next(
+                    t for t in get_args(sub_field_type) if t is not type(None)
+                )
+            elif origin is List:
+                sub_field_type = get_args(sub_field_type)[0]
 
             # Map Pydantic types to argparse types
             print(sub_field_type, type(sub_field_type))
-            if isinstance(sub_field_type, type) and issubclass(sub_field_type, (int, float, str, bool)):
+            if isinstance(sub_field_type, type) and issubclass(
+                sub_field_type, (int, float, str, bool)
+            ):
                 arg_type = sub_field_type
             else:
                 arg_type = str  # Default to string for unknown types
-            
+
             # Add the argument for each field in the sub-model
             group.add_argument(f"--{sub_field_name}", type=arg_type, help=help_text)
 
     return parser
 
+
 def convert_args_to_dict(args: argparse.Namespace, parser: argparse.ArgumentParser):
     """Broad conversion of surface level arg groups to dictionaries"""
 
@@ -81,4 +91,4 @@ def convert_args_to_dict(args: argparse.Namespace, parser: argparse.ArgumentPars
 
             arg_groups[group.title] = group_dict
 
-    return arg_groups
\ No newline at end of file
+    return arg_groups
diff --git a/main.py b/main.py
index 89cc6bf6..7385a1dd 100644
--- a/main.py
+++ b/main.py
@@ -69,12 +69,14 @@ async def entrypoint_async():
         model_path = pathlib.Path(config.model.model_dir)
         model_path = model_path / model_name
 
-        await model.load_model(model_path.resolve(), **config.model)
+        # TODO: remove model_dump()
+        await model.load_model(model_path.resolve(), **config.model.model_dump())
 
         # Load loras after loading the model
         if config.lora.loras:
             lora_dir = pathlib.Path(config.lora.lora_dir)
-            await model.container.load_loras(lora_dir.resolve(), **config.lora)
+            # TODO: remove model_dump()
+            await model.container.load_loras(lora_dir.resolve(), **config.lora.model_dump())
 
     # If an initial embedding model name is specified, create a separate container
     # and load the model
@@ -84,7 +86,8 @@ async def entrypoint_async():
         embedding_model_path = embedding_model_path / embedding_model_name
 
         try:
-            await model.load_embedding_model(embedding_model_path, **config.embeddings)
+            # TODO: remove model_dump()
+            await model.load_embedding_model(embedding_model_path, **config.embeddings.model_dump())
         except ImportError as ex:
             logger.error(ex.msg)
 

From 420fd84f6b8f1fa97f499892a2f03ab2ca64d4b4 Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Fri, 6 Sep 2024 15:05:48 +0100
Subject: [PATCH 04/51] add env var loading automation

- load config from env vars (eg. TABBY_NETWORK_HOST)
- remove print statements
- improve command line args automation
---
 common/args.py         | 12 ++----------
 common/tabby_config.py | 25 ++++++++++++++++++-------
 main.py                |  8 ++++++--
 3 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/common/args.py b/common/args.py
index c4fbd463..42f795a4 100644
--- a/common/args.py
+++ b/common/args.py
@@ -37,26 +37,20 @@ def argument_with_auto(value):
 def init_argparser():
     parser = argparse.ArgumentParser(description="TabbyAPI server")
 
-    # Loop through the fields in the top-level model (ModelX in this case)
     for field_name, field_type in config.__annotations__.items():
-        # Get the sub-model type (e.g., ModelA, ModelB)
-        sub_model = field_type.__base__
-
-        # Create argument group for the sub-model
         group = parser.add_argument_group(
             field_name, description=f"Arguments for {field_name}"
         )
 
-        # Loop through each field in the sub-model (e.g., ModelA, ModelB)
+        # Loop through each field in the sub-model
         for sub_field_name, sub_field_type in field_type.__annotations__.items():
             field = field_type.__fields__[sub_field_name]
             help_text = (
                 field.description if field.description else "No description available"
             )
 
-            # Handle Optional types or other generic types
             origin = get_origin(sub_field_type)
-            if origin is Union:  # Check if the type is Union (which includes Optional)
+            if origin is Union:
                 sub_field_type = next(
                     t for t in get_args(sub_field_type) if t is not type(None)
                 )
@@ -64,7 +58,6 @@ def init_argparser():
                 sub_field_type = get_args(sub_field_type)[0]
 
             # Map Pydantic types to argparse types
-            print(sub_field_type, type(sub_field_type))
             if isinstance(sub_field_type, type) and issubclass(
                 sub_field_type, (int, float, str, bool)
             ):
@@ -72,7 +65,6 @@ def init_argparser():
             else:
                 arg_type = str  # Default to string for unknown types
 
-            # Add the argument for each field in the sub-model
             group.add_argument(f"--{sub_field_name}", type=arg_type, help=help_text)
 
     return parser
diff --git a/common/tabby_config.py b/common/tabby_config.py
index 5aac0b84..a379ebb7 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -1,7 +1,8 @@
 import yaml
 import pathlib
 from loguru import logger
-from typing import Optional
+from typing import Optional, Union, get_origin, get_args
+from os import getenv
 
 from common.utils import unwrap, merge_dicts
 from common.config_models import tabby_config_model
@@ -15,6 +16,7 @@ def load_config(self, arguments: Optional[dict] = None):
         # config is applied in order of items in the list
         configs = [
             self._from_file(pathlib.Path("config.yml")),
+            self._from_environment(),
             self._from_args(unwrap(arguments, {})),
         ]
 
@@ -54,7 +56,7 @@ def _from_args(self, args: dict):
             config = self.from_file(pathlib.Path(config_override))
             return config  # Return early if loading from file
 
-        for key in ["network", "model", "logging", "developer", "embeddings"]:
+        for key in tabby_config_model.model_fields.keys():
             override = args.get(key)
             if override:
                 if key == "logging":
@@ -67,11 +69,20 @@ def _from_args(self, args: dict):
     def _from_environment(self):
         """loads configuration from environment variables"""
 
-        # TODO: load config from environment variables
-        # this means that we can have host default to 0.0.0.0 in docker for example
-        # this would also mean that docker containers no longer require a non
-        # default config file to be used
-        pass
+        config = {}
+
+        for field_name in tabby_config_model.model_fields.keys():
+            section_config = {}
+            for sub_field_name in getattr(
+                tabby_config_model(), field_name
+            ).model_fields.keys():
+                setting = getenv(f"TABBY_{field_name}_{sub_field_name}".upper(), None)
+                if setting is not None:
+                    section_config[sub_field_name] = setting
+
+            config[field_name] = section_config
+
+        return config
 
 
 # Create an empty instance of the shared var to make sure nothing breaks
diff --git a/main.py b/main.py
index 7385a1dd..6254bf20 100644
--- a/main.py
+++ b/main.py
@@ -76,7 +76,9 @@ async def entrypoint_async():
         if config.lora.loras:
             lora_dir = pathlib.Path(config.lora.lora_dir)
             # TODO: remove model_dump()
-            await model.container.load_loras(lora_dir.resolve(), **config.lora.model_dump())
+            await model.container.load_loras(
+                lora_dir.resolve(), **config.lora.model_dump()
+            )
 
     # If an initial embedding model name is specified, create a separate container
     # and load the model
@@ -87,7 +89,9 @@ async def entrypoint_async():
 
         try:
             # TODO: remove model_dump()
-            await model.load_embedding_model(embedding_model_path, **config.embeddings.model_dump())
+            await model.load_embedding_model(
+                embedding_model_path, **config.embeddings.model_dump()
+            )
         except ImportError as ex:
             logger.error(ex.msg)
 

From 0d7459191c77a7e12795a0b9e702cfaa66df76b5 Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Wed, 11 Sep 2024 16:13:31 +0100
Subject: [PATCH 05/51] fix arg parser for dict types

---
 common/args.py         | 71 ++++++++++++++++++++++++++++--------------
 common/tabby_config.py |  1 -
 2 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/common/args.py b/common/args.py
index 42f795a4..f737a0c7 100644
--- a/common/args.py
+++ b/common/args.py
@@ -34,39 +34,64 @@ def argument_with_auto(value):
         ) from ex
 
 
+def map_pydantic_type_to_argparse(pydantic_type):
+    """
+    Maps Pydantic types to argparse compatible types.
+    Handles special cases like Union and List.
+    """
+    origin = get_origin(pydantic_type)
+
+    # Handle optional types
+    if origin is Union:
+        # Filter out NoneType
+        pydantic_type = next(t for t in get_args(pydantic_type) if t is not type(None))
+
+    elif origin is List:
+        pydantic_type = get_args(pydantic_type)[0]  # Get the list item type
+
+    # Map basic types (int, float, str, bool)
+    if isinstance(pydantic_type, type) and issubclass(
+        pydantic_type, (int, float, str, bool)
+    ):
+        return pydantic_type
+
+    return str
+
+
+def add_field_to_group(group, field_name, field_type, field):
+    """
+    Adds a Pydantic field to an argparse argument group.
+    """
+    arg_type = map_pydantic_type_to_argparse(field_type)
+    help_text = field.description if field.description else "No description available"
+
+    group.add_argument(f"--{field_name}", type=arg_type, help=help_text)
+
+
 def init_argparser():
+    """
+    Initializes an argparse parser based on a Pydantic config schema.
+    """
     parser = argparse.ArgumentParser(description="TabbyAPI server")
 
+    # Loop through each top-level field in the config
     for field_name, field_type in config.__annotations__.items():
         group = parser.add_argument_group(
             field_name, description=f"Arguments for {field_name}"
         )
 
-        # Loop through each field in the sub-model
-        for sub_field_name, sub_field_type in field_type.__annotations__.items():
-            field = field_type.__fields__[sub_field_name]
-            help_text = (
-                field.description if field.description else "No description available"
+        # Check if the field_type is a Pydantic model
+        if hasattr(field_type, "__annotations__"):
+            for sub_field_name, sub_field_type in field_type.__annotations__.items():
+                field = field_type.__fields__[sub_field_name]
+                add_field_to_group(group, sub_field_name, sub_field_type, field)
+        else:
+            # Handle cases where the field_type is not a Pydantic mode
+            arg_type = map_pydantic_type_to_argparse(field_type)
+            group.add_argument(
+                f"--{field_name}", type=arg_type, help=f"Argument for {field_name}"
             )
 
-            origin = get_origin(sub_field_type)
-            if origin is Union:
-                sub_field_type = next(
-                    t for t in get_args(sub_field_type) if t is not type(None)
-                )
-            elif origin is List:
-                sub_field_type = get_args(sub_field_type)[0]
-
-            # Map Pydantic types to argparse types
-            if isinstance(sub_field_type, type) and issubclass(
-                sub_field_type, (int, float, str, bool)
-            ):
-                arg_type = sub_field_type
-            else:
-                arg_type = str  # Default to string for unknown types
-
-            group.add_argument(f"--{sub_field_name}", type=arg_type, help=help_text)
-
     return parser
 
 
diff --git a/common/tabby_config.py b/common/tabby_config.py
index cd7cb14f..d571319e 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -10,7 +10,6 @@
 
 
 class TabbyConfig(tabby_config_model):
-
     # Persistent defaults
     # TODO: make this pydantic?
     model_defaults: dict = {}

From c6f9806ec6368f1b286dbab9d928a7fcc96a6d19 Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Wed, 11 Sep 2024 18:00:29 +0100
Subject: [PATCH 06/51] remove unused imports

---
 common/args.py          | 3 +--
 common/config_models.py | 2 +-
 common/gen_logging.py   | 3 +--
 common/networking.py    | 1 -
 common/tabby_config.py  | 2 +-
 endpoints/OAI/router.py | 1 -
 endpoints/server.py     | 1 -
 main.py                 | 1 -
 8 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/common/args.py b/common/args.py
index f737a0c7..400af021 100644
--- a/common/args.py
+++ b/common/args.py
@@ -1,8 +1,7 @@
 """Argparser for overriding config values"""
 
 import argparse
-from typing import get_origin, get_args, Optional, Union, List
-from pydantic import BaseModel
+from typing import get_origin, get_args, Union, List
 from common.tabby_config import config
 
 
diff --git a/common/config_models.py b/common/config_models.py
index 9bbf5f1c..da983b8b 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -1,5 +1,5 @@
 from pydantic import BaseModel, ConfigDict, Field, model_validator
-from typing import List, Optional, Union, get_type_hints
+from typing import List, Optional, Union
 
 from common.utils import unwrap
 
diff --git a/common/gen_logging.py b/common/gen_logging.py
index 3252bb24..6a917dd7 100644
--- a/common/gen_logging.py
+++ b/common/gen_logging.py
@@ -2,9 +2,8 @@
 Functions for logging generation events.
 """
 
-from pydantic import BaseModel
 from loguru import logger
-from typing import Dict, Optional
+from typing import Optional
 
 from common.tabby_config import config
 
diff --git a/common/networking.py b/common/networking.py
index e0812723..597ed078 100644
--- a/common/networking.py
+++ b/common/networking.py
@@ -11,7 +11,6 @@
 from uuid import uuid4
 
 from common.tabby_config import config
-from common.utils import unwrap
 
 
 class TabbyRequestErrorMessage(BaseModel):
diff --git a/common/tabby_config.py b/common/tabby_config.py
index d571319e..9738c120 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -1,7 +1,7 @@
 import yaml
 import pathlib
 from loguru import logger
-from typing import Optional, Union, get_origin, get_args
+from typing import Optional
 from os import getenv
 
 from common.utils import unwrap, merge_dicts
diff --git a/endpoints/OAI/router.py b/endpoints/OAI/router.py
index f120e4d3..b6a44c98 100644
--- a/endpoints/OAI/router.py
+++ b/endpoints/OAI/router.py
@@ -8,7 +8,6 @@
 from common.model import check_embeddings_container, check_model_container
 from common.networking import handle_request_error, run_with_request_disconnect
 from common.tabby_config import config
-from common.utils import unwrap
 from endpoints.OAI.types.completion import CompletionRequest, CompletionResponse
 from endpoints.OAI.types.chat_completion import (
     ChatCompletionRequest,
diff --git a/endpoints/server.py b/endpoints/server.py
index e1c81b54..d6723a19 100644
--- a/endpoints/server.py
+++ b/endpoints/server.py
@@ -8,7 +8,6 @@
 from common.logger import UVICORN_LOG_CONFIG
 from common.networking import get_global_depends
 from common.tabby_config import config
-from common.utils import unwrap
 from endpoints.Kobold import router as KoboldRouter
 from endpoints.OAI import router as OAIRouter
 from endpoints.core.router import router as CoreRouter
diff --git a/main.py b/main.py
index 6e7943bf..bd706868 100644
--- a/main.py
+++ b/main.py
@@ -16,7 +16,6 @@
 from common.networking import is_port_in_use
 from common.signals import signal_handler
 from common.tabby_config import config
-from common.utils import unwrap
 from endpoints.server import export_openapi, start_api
 from endpoints.utils import do_export_openapi
 

From 05f1c3e293e373ff1b925b5dc6397fff711b554d Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Wed, 11 Sep 2024 21:43:30 +0100
Subject: [PATCH 07/51] fix line lengths

---
 common/config_models.py | 579 +++++++++++++++++++++++-----------------
 1 file changed, 331 insertions(+), 248 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index da983b8b..286057ec 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -1,248 +1,331 @@
-from pydantic import BaseModel, ConfigDict, Field, model_validator
-from typing import List, Optional, Union
-
-from common.utils import unwrap
-
-
-class config_config_model(BaseModel):
-    config: Optional[str] = Field(
-        None, description="Path to an overriding config.yml file"
-    )
-
-
-class network_config_model(BaseModel):
-    host: Optional[str] = Field("127.0.0.1", description="The IP to host on")
-    port: Optional[int] = Field(5000, description="The port to host on")
-    disable_auth: Optional[bool] = Field(
-        False, description="Disable HTTP token authentication with requests"
-    )
-    send_tracebacks: Optional[bool] = Field(
-        False, description="Decide whether to send error tracebacks over the API"
-    )
-    api_servers: Optional[List[str]] = Field(
-        [
-            "OAI",
-        ],
-        description="API servers to enable. Options: (OAI, Kobold)",
-    )
-
-
-class logging_config_model(BaseModel):
-    log_prompt: Optional[bool] = Field(False, description="Enable prompt logging")
-    log_generation_params: Optional[bool] = Field(
-        False, description="Enable generation parameter logging"
-    )
-    log_requests: Optional[bool] = Field(False, description="Enable request logging")
-
-
-class model_config_model(BaseModel):
-    model_dir: str = Field(
-        "models",
-        description="Overrides the directory to look for models (default: models). Windows users, do NOT put this path in quotes.",
-    )
-    use_dummy_models: Optional[bool] = Field(
-        False,
-        description="Sends dummy model names when the models endpoint is queried. Enable this if looking for specific OAI models.",
-    )
-    model_name: Optional[str] = Field(
-        None,
-        description="An initial model to load. Make sure the model is located in the model directory! REQUIRED: This must be filled out to load a model on startup.",
-    )
-    use_as_default: List[str] = Field(
-        default_factory=list,
-        description="Names of args to use as a default fallback for API load requests (default: []). Example: ['max_seq_len', 'cache_mode']",
-    )
-    max_seq_len: Optional[int] = Field(
-        None,
-        description="Max sequence length. Fetched from the model's base sequence length in config.json by default.",
-    )
-    override_base_seq_len: Optional[int] = Field(
-        None,
-        description="Overrides base model context length. WARNING: Only use this if the model's base sequence length is incorrect.",
-    )
-    tensor_parallel: Optional[bool] = Field(
-        False,
-        description="Load model with tensor parallelism. Fallback to autosplit if GPU split isn't provided.",
-    )
-    gpu_split_auto: Optional[bool] = Field(
-        True,
-        description="Automatically allocate resources to GPUs (default: True). Not parsed for single GPU users.",
-    )
-    autosplit_reserve: List[int] = Field(
-        [96],
-        description="Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). Represented as an array of MB per GPU.",
-    )
-    gpu_split: List[float] = Field(
-        default_factory=list,
-        description="An integer array of GBs of VRAM to split between GPUs (default: []). Used with tensor parallelism.",
-    )
-    rope_scale: Optional[float] = Field(
-        1.0,
-        description="Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the model was trained on long context with rope.",
-    )
-    rope_alpha: Optional[Union[float, str]] = Field(
-        1.0,
-        description="Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto-calculate.",
-    )
-    cache_mode: Optional[str] = Field(
-        "FP16",
-        description="Enable different cache modes for VRAM savings (default: FP16). Possible values: FP16, Q8, Q6, Q4.",
-    )
-    cache_size: Optional[int] = Field(
-        None,
-        description="Size of the prompt cache to allocate (default: max_seq_len). Must be a multiple of 256.",
-    )
-    chunk_size: Optional[int] = Field(
-        2048,
-        description="Chunk size for prompt ingestion (default: 2048). A lower value reduces VRAM usage but decreases ingestion speed.",
-    )
-    max_batch_size: Optional[int] = Field(
-        None,
-        description="Set the maximum number of prompts to process at one time (default: None/Automatic). Automatically calculated if left blank.",
-    )
-    prompt_template: Optional[str] = Field(
-        None,
-        description="Set the prompt template for this model. If empty, attempts to look for the model's chat template.",
-    )
-    num_experts_per_token: Optional[int] = Field(
-        None,
-        description="Number of experts to use per token. Fetched from the model's config.json. For MoE models only.",
-    )
-    fasttensors: Optional[bool] = Field(
-        False,
-        description="Enables fasttensors to possibly increase model loading speeds (default: False).",
-    )
-
-
-class draft_model_config_model(BaseModel):
-    draft_model_dir: Optional[str] = Field(
-        "models",
-        description="Overrides the directory to look for draft models (default: models)",
-    )
-    draft_model_name: Optional[str] = Field(
-        None,
-        description="An initial draft model to load. Ensure the model is in the model directory.",
-    )
-    draft_rope_scale: Optional[float] = Field(
-        1.0,
-        description="Rope scale for draft models (default: 1.0). Same as compress_pos_emb. Use if the draft model was trained on long context with rope.",
-    )
-    draft_rope_alpha: Optional[float] = Field(
-        None,
-        description="Rope alpha for draft models (default: None). Same as alpha_value. Leave blank to auto-calculate the alpha value.",
-    )
-    draft_cache_mode: Optional[str] = Field(
-        "FP16",
-        description="Cache mode for draft models to save VRAM (default: FP16). Possible values: FP16, Q8, Q6, Q4.",
-    )
-
-
-class lora_instance_model(BaseModel):
-    name: str = Field(..., description="Name of the LoRA model")
-    scaling: float = Field(
-        1.0, description="Scaling factor for the LoRA model (default: 1.0)"
-    )
-
-
-class lora_config_model(BaseModel):
-    lora_dir: Optional[str] = Field(
-        "loras", description="Directory to look for LoRAs (default: 'loras')"
-    )
-    loras: Optional[List[lora_instance_model]] = Field(
-        None,
-        description="List of LoRAs to load and associated scaling factors (default scaling: 1.0)",
-    )
-
-
-class sampling_config_model(BaseModel):
-    override_preset: Optional[str] = Field(
-        None, description="Select a sampler override preset"
-    )
-
-
-class developer_config_model(BaseModel):
-    unsafe_launch: Optional[bool] = Field(
-        False, description="Skip Exllamav2 version check"
-    )
-    disable_request_streaming: Optional[bool] = Field(
-        False, description="Disables API request streaming"
-    )
-    cuda_malloc_backend: Optional[bool] = Field(
-        False, description="Runs with the pytorch CUDA malloc backend"
-    )
-    uvloop: Optional[bool] = Field(
-        False, description="Run asyncio using Uvloop or Winloop"
-    )
-    realtime_process_priority: Optional[bool] = Field(
-        False,
-        description="Set process to use a higher priority For realtime process priority, run as administrator or sudo Otherwise, the priority will be set to high",
-    )
-
-
-class embeddings_config_model(BaseModel):
-    embedding_model_dir: Optional[str] = Field(
-        "models",
-        description="Overrides directory to look for embedding models (default: models)",
-    )
-    embeddings_device: Optional[str] = Field(
-        "cpu",
-        description="Device to load embedding models on (default: cpu). Possible values: cpu, auto, cuda. If using an AMD GPU, set this value to 'cuda'.",
-    )
-    embedding_model_name: Optional[str] = Field(
-        None, description="The embeddings model to load"
-    )
-
-
-class tabby_config_model(BaseModel):
-    config: config_config_model = Field(default_factory=config_config_model)
-    network: network_config_model = Field(default_factory=network_config_model)
-    logging: logging_config_model = Field(default_factory=logging_config_model)
-    model: model_config_model = Field(default_factory=model_config_model)
-    draft_model: draft_model_config_model = Field(
-        default_factory=draft_model_config_model
-    )
-    lora: lora_config_model = Field(default_factory=lora_config_model)
-    sampling: sampling_config_model = Field(default_factory=sampling_config_model)
-    developer: developer_config_model = Field(default_factory=developer_config_model)
-    embeddings: embeddings_config_model = Field(default_factory=embeddings_config_model)
-
-    @model_validator(mode="before")
-    def set_defaults(cls, values):
-        for field_name, field_value in values.items():
-            if field_value is None:
-                default_instance = cls.__annotations__[field_name]().dict()
-                values[field_name] = cls.__annotations__[field_name](**default_instance)
-        return values
-
-    model_config = ConfigDict(validate_assignment=True)
-
-
-def generate_config_file(filename="config_sample.yml", indentation=2):
-    schema = tabby_config_model.model_json_schema()
-
-    def dump_def(id: str, indent=2):
-        yaml = ""
-        indent = " " * indentation * indent
-        id = id.split("/")[-1]
-
-        section = schema["$defs"][id]["properties"]
-        for property in section.keys():  # get type
-            comment = section[property]["description"]
-            yaml += f"{indent}# {comment}\n"
-
-            value = unwrap(section[property].get("default"), "")
-            yaml += f"{indent}{property}: {value}\n\n"
-
-        return yaml + "\n"
-
-    yaml = ""
-    for section in schema["properties"].keys():
-        yaml += f"{section}:\n"
-        yaml += dump_def(schema["properties"][section]["$ref"])
-        yaml += "\n"
-
-    with open(filename, "w") as f:
-        f.write(yaml)
-
-
-# generate_config_file("test.yml")
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+from typing import List, Optional, Union
+
+from common.utils import unwrap
+
+
+class config_config_model(BaseModel):
+    config: Optional[str] = Field(
+        None, description=("Path to an overriding config.yml file")
+    )
+
+
+class network_config_model(BaseModel):
+    host: Optional[str] = Field("127.0.0.1", description=("The IP to host on"))
+    port: Optional[int] = Field(5000, description=("The port to host on"))
+    disable_auth: Optional[bool] = Field(
+        False, description=("Disable HTTP token authentication with requests")
+    )
+    send_tracebacks: Optional[bool] = Field(
+        False,
+        description=("Decide whether to send error tracebacks over the API"),
+    )
+    api_servers: Optional[List[str]] = Field(
+        [
+            "OAI",
+        ],
+        description=("API servers to enable. Options: (OAI, Kobold)"),
+    )
+
+
+class logging_config_model(BaseModel):
+    log_prompt: Optional[bool] = Field(False, description=("Enable prompt logging"))
+    log_generation_params: Optional[bool] = Field(
+        False, description=("Enable generation parameter logging")
+    )
+    log_requests: Optional[bool] = Field(False, description=("Enable request logging"))
+
+
+class model_config_model(BaseModel):
+    model_dir: str = Field(
+        "models",
+        description=(
+            "Overrides the directory to look for models (default: models). Windows"
+            "users, do NOT put this path in quotes."
+        ),
+    )
+    use_dummy_models: Optional[bool] = Field(
+        False,
+        description=(
+            "Sends dummy model names when the models endpoint is queried. Enable this"
+            "if looking for specific OAI models."
+        ),
+    )
+    model_name: Optional[str] = Field(
+        None,
+        description=(
+            "An initial model to load. Make sure the model is located in the model"
+            "directory! REQUIRED: This must be filled out to load a model on startup."
+        ),
+    )
+    use_as_default: List[str] = Field(
+        default_factory=list,
+        description=(
+            "Names of args to use as a default fallback for API load requests"
+            "(default: []). Example: ['max_seq_len', 'cache_mode']"
+        ),
+    )
+    max_seq_len: Optional[int] = Field(
+        None,
+        description=(
+            "Max sequence length. Fetched from the model's base sequence length in"
+            "config.json by default."
+        ),
+    )
+    override_base_seq_len: Optional[int] = Field(
+        None,
+        description=(
+            "Overrides base model context length. WARNING: Only use this if the"
+            "model's base sequence length is incorrect."
+        ),
+    )
+    tensor_parallel: Optional[bool] = Field(
+        False,
+        description=(
+            "Load model with tensor parallelism. Fallback to autosplit if GPU split"
+            "isn't provided."
+        ),
+    )
+    gpu_split_auto: Optional[bool] = Field(
+        True,
+        description=(
+            "Automatically allocate resources to GPUs (default: True). Not parsed for"
+            "single GPU users."
+        ),
+    )
+    autosplit_reserve: List[int] = Field(
+        [96],
+        description=(
+            "Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0)."
+            "Represented as an array of MB per GPU."
+        ),
+    )
+    gpu_split: List[float] = Field(
+        default_factory=list,
+        description=(
+            "An integer array of GBs of VRAM to split between GPUs (default: [])."
+            "Used with tensor parallelism."
+        ),
+    )
+    rope_scale: Optional[float] = Field(
+        1.0,
+        description=(
+            "Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the"
+            "model was trained on long context with rope."
+        ),
+    )
+    rope_alpha: Optional[Union[float, str]] = Field(
+        1.0,
+        description=(
+            "Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto-"
+            "calculate."
+        ),
+    )
+    cache_mode: Optional[str] = Field(
+        "FP16",
+        description=(
+            "Enable different cache modes for VRAM savings (default: FP16). Possible"
+            "values: FP16, Q8, Q6, Q4."
+        ),
+    )
+    cache_size: Optional[int] = Field(
+        None,
+        description=(
+            "Size of the prompt cache to allocate (default: max_seq_len). Must be a"
+            "multiple of 256."
+        ),
+    )
+    chunk_size: Optional[int] = Field(
+        2048,
+        description=(
+            "Chunk size for prompt ingestion (default: 2048). A lower value reduces"
+            "VRAM usage but decreases ingestion speed."
+        ),
+    )
+    max_batch_size: Optional[int] = Field(
+        None,
+        description=(
+            "Set the maximum number of prompts to process at one time (default:"
+            "None/Automatic). Automatically calculated if left blank."
+        ),
+    )
+    prompt_template: Optional[str] = Field(
+        None,
+        description=(
+            "Set the prompt template for this model. If empty, attempts to look for"
+            "the model's chat template."
+        ),
+    )
+    num_experts_per_token: Optional[int] = Field(
+        None,
+        description=(
+            "Number of experts to use per token. Fetched from the model's"
+            "config.json. For MoE models only."
+        ),
+    )
+    fasttensors: Optional[bool] = Field(
+        False,
+        description=(
+            "Enables fasttensors to possibly increase model loading speeds (default:"
+            "False)."
+        ),
+    )
+
+
+class draft_model_config_model(BaseModel):
+    draft_model_dir: Optional[str] = Field(
+        "models",
+        description=(
+            "Overrides the directory to look for draft models (default: models)"
+        ),
+    )
+    draft_model_name: Optional[str] = Field(
+        None,
+        description=(
+            "An initial draft model to load. Ensure the model is in the model"
+            "directory."
+        ),
+    )
+    draft_rope_scale: Optional[float] = Field(
+        1.0,
+        description=(
+            "Rope scale for draft models (default: 1.0). Same as compress_pos_emb."
+            "Use if the draft model was trained on long context with rope."
+        ),
+    )
+    draft_rope_alpha: Optional[float] = Field(
+        None,
+        description=(
+            "Rope alpha for draft models (default: None). Same as alpha_value. Leave"
+            "blank to auto-calculate the alpha value."
+        ),
+    )
+    draft_cache_mode: Optional[str] = Field(
+        "FP16",
+        description=(
+            "Cache mode for draft models to save VRAM (default: FP16). Possible"
+            "values: FP16, Q8, Q6, Q4."
+        ),
+    )
+
+
+class lora_instance_model(BaseModel):
+    name: str = Field(..., description=("Name of the LoRA model"))
+    scaling: float = Field(
+        1.0, description=("Scaling factor for the LoRA model (default: 1.0)")
+    )
+
+
+class lora_config_model(BaseModel):
+    lora_dir: Optional[str] = Field(
+        "loras", description=("Directory to look for LoRAs (default: 'loras')")
+    )
+    loras: Optional[List[lora_instance_model]] = Field(
+        None,
+        description=(
+            "List of LoRAs to load and associated scaling factors (default scaling:"
+            "1.0)"
+        ),
+    )
+
+
+class sampling_config_model(BaseModel):
+    override_preset: Optional[str] = Field(
+        None, description=("Select a sampler override preset")
+    )
+
+
+class developer_config_model(BaseModel):
+    unsafe_launch: Optional[bool] = Field(
+        False, description=("Skip Exllamav2 version check")
+    )
+    disable_request_streaming: Optional[bool] = Field(
+        False, description=("Disables API request streaming")
+    )
+    cuda_malloc_backend: Optional[bool] = Field(
+        False, description=("Runs with the pytorch CUDA malloc backend")
+    )
+    uvloop: Optional[bool] = Field(
+        False, description=("Run asyncio using Uvloop or Winloop")
+    )
+    realtime_process_priority: Optional[bool] = Field(
+        False,
+        description=(
+            "Set process to use a higher priority For realtime process priority, run"
+            "as administrator or sudo Otherwise, the priority will be set to high"
+        ),
+    )
+
+
+class embeddings_config_model(BaseModel):
+    embedding_model_dir: Optional[str] = Field(
+        "models",
+        description=(
+            "Overrides directory to look for embedding models (default: models)"
+        ),
+    )
+    embeddings_device: Optional[str] = Field(
+        "cpu",
+        description=(
+            "Device to load embedding models on (default: cpu). Possible values: cpu,"
+            "auto, cuda. If using an AMD GPU, set this value to 'cuda'."
+        ),
+    )
+    embedding_model_name: Optional[str] = Field(
+        None, description=("The embeddings model to load")
+    )
+
+
+class tabby_config_model(BaseModel):
+    config: config_config_model = Field(default_factory=config_config_model)
+    network: network_config_model = Field(default_factory=network_config_model)
+    logging: logging_config_model = Field(default_factory=logging_config_model)
+    model: model_config_model = Field(default_factory=model_config_model)
+    draft_model: draft_model_config_model = Field(
+        default_factory=draft_model_config_model
+    )
+    lora: lora_config_model = Field(default_factory=lora_config_model)
+    sampling: sampling_config_model = Field(default_factory=sampling_config_model)
+    developer: developer_config_model = Field(default_factory=developer_config_model)
+    embeddings: embeddings_config_model = Field(default_factory=embeddings_config_model)
+
+    @model_validator(mode="before")
+    def set_defaults(cls, values):
+        for field_name, field_value in values.items():
+            if field_value is None:
+                default_instance = cls.__annotations__[field_name]().dict()
+                values[field_name] = cls.__annotations__[field_name](**default_instance)
+        return values
+
+    model_config = ConfigDict(validate_assignment=True)
+
+
+def generate_config_file(filename="config_sample.yml", indentation=2):
+    schema = tabby_config_model.model_json_schema()
+
+    def dump_def(id: str, indent=2):
+        yaml = ""
+        indent = " " * indentation * indent
+        id = id.split("/")[-1]
+
+        section = schema["$defs"][id]["properties"]
+        for property in section.keys():  # get type
+            comment = section[property]["description"]
+            yaml += f"{indent}# {comment}\n"
+
+            value = unwrap(section[property].get("default"), "")
+            yaml += f"{indent}{property}: {value}\n\n"
+
+        return yaml + "\n"
+
+    yaml = ""
+    for section in schema["properties"].keys():
+        yaml += f"{section}:\n"
+        yaml += dump_def(schema["properties"][section]["$ref"])
+        yaml += "\n"
+
+    with open(filename, "w") as f:
+        f.write(yaml)
+
+
+# generate_config_file("test.yml")

From 8b48f00271a500b06a64f97dfb651962e51f29e2 Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Thu, 12 Sep 2024 17:00:07 +0100
Subject: [PATCH 08/51] fix model names

---
 common/config_models.py | 52 ++++++++++++++++++++++-------------------
 common/tabby_config.py  | 21 ++++++++---------
 2 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index 286057ec..5e5b5a21 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -4,13 +4,13 @@
 from common.utils import unwrap
 
 
-class config_config_model(BaseModel):
+class ConfigConfig(BaseModel):
     config: Optional[str] = Field(
         None, description=("Path to an overriding config.yml file")
     )
 
 
-class network_config_model(BaseModel):
+class NetworkConfig(BaseModel):
     host: Optional[str] = Field("127.0.0.1", description=("The IP to host on"))
     port: Optional[int] = Field(5000, description=("The port to host on"))
     disable_auth: Optional[bool] = Field(
@@ -28,7 +28,7 @@ class network_config_model(BaseModel):
     )
 
 
-class logging_config_model(BaseModel):
+class LoggingConfig(BaseModel):
     log_prompt: Optional[bool] = Field(False, description=("Enable prompt logging"))
     log_generation_params: Optional[bool] = Field(
         False, description=("Enable generation parameter logging")
@@ -36,7 +36,7 @@ class logging_config_model(BaseModel):
     log_requests: Optional[bool] = Field(False, description=("Enable request logging"))
 
 
-class model_config_model(BaseModel):
+class ModelConfig(BaseModel):
     model_dir: str = Field(
         "models",
         description=(
@@ -171,8 +171,10 @@ class model_config_model(BaseModel):
         ),
     )
 
+    model_config = ConfigDict(protected_namespaces=())
 
-class draft_model_config_model(BaseModel):
+
+class DraftModelConfig(BaseModel):
     draft_model_dir: Optional[str] = Field(
         "models",
         description=(
@@ -209,18 +211,18 @@ class draft_model_config_model(BaseModel):
     )
 
 
-class lora_instance_model(BaseModel):
+class LoraInstanceModel(BaseModel):
     name: str = Field(..., description=("Name of the LoRA model"))
     scaling: float = Field(
         1.0, description=("Scaling factor for the LoRA model (default: 1.0)")
     )
 
 
-class lora_config_model(BaseModel):
+class LoraConfig(BaseModel):
     lora_dir: Optional[str] = Field(
         "loras", description=("Directory to look for LoRAs (default: 'loras')")
     )
-    loras: Optional[List[lora_instance_model]] = Field(
+    loras: Optional[List[LoraInstanceModel]] = Field(
         None,
         description=(
             "List of LoRAs to load and associated scaling factors (default scaling:"
@@ -229,13 +231,13 @@ class lora_config_model(BaseModel):
     )
 
 
-class sampling_config_model(BaseModel):
+class SamplingConfig(BaseModel):
     override_preset: Optional[str] = Field(
         None, description=("Select a sampler override preset")
     )
 
 
-class developer_config_model(BaseModel):
+class DeveloperConfig(BaseModel):
     unsafe_launch: Optional[bool] = Field(
         False, description=("Skip Exllamav2 version check")
     )
@@ -257,7 +259,7 @@ class developer_config_model(BaseModel):
     )
 
 
-class embeddings_config_model(BaseModel):
+class EmbeddingsConfig(BaseModel):
     embedding_model_dir: Optional[str] = Field(
         "models",
         description=(
@@ -276,18 +278,20 @@ class embeddings_config_model(BaseModel):
     )
 
 
-class tabby_config_model(BaseModel):
-    config: config_config_model = Field(default_factory=config_config_model)
-    network: network_config_model = Field(default_factory=network_config_model)
-    logging: logging_config_model = Field(default_factory=logging_config_model)
-    model: model_config_model = Field(default_factory=model_config_model)
-    draft_model: draft_model_config_model = Field(
-        default_factory=draft_model_config_model
+class TabbyConfigModel(BaseModel):
+    config: ConfigConfig = Field(default_factory=ConfigConfig.model_construct)
+    network: NetworkConfig = Field(default_factory=NetworkConfig.model_construct)
+    logging: LoggingConfig = Field(default_factory=LoggingConfig.model_construct)
+    model: ModelConfig = Field(default_factory=ModelConfig.model_construct)
+    draft_model: DraftModelConfig = Field(
+        default_factory=DraftModelConfig.model_construct
+    )
+    lora: LoraConfig = Field(default_factory=LoraConfig.model_construct)
+    sampling: SamplingConfig = Field(default_factory=SamplingConfig.model_construct)
+    developer: DeveloperConfig = Field(default_factory=DeveloperConfig.model_construct)
+    embeddings: EmbeddingsConfig = Field(
+        default_factory=EmbeddingsConfig.model_construct
     )
-    lora: lora_config_model = Field(default_factory=lora_config_model)
-    sampling: sampling_config_model = Field(default_factory=sampling_config_model)
-    developer: developer_config_model = Field(default_factory=developer_config_model)
-    embeddings: embeddings_config_model = Field(default_factory=embeddings_config_model)
 
     @model_validator(mode="before")
     def set_defaults(cls, values):
@@ -297,11 +301,11 @@ def set_defaults(cls, values):
                 values[field_name] = cls.__annotations__[field_name](**default_instance)
         return values
 
-    model_config = ConfigDict(validate_assignment=True)
+    model_config = ConfigDict(validate_assignment=True, protected_namespaces=())
 
 
 def generate_config_file(filename="config_sample.yml", indentation=2):
-    schema = tabby_config_model.model_json_schema()
+    schema = TabbyConfigModel.model_json_schema()
 
     def dump_def(id: str, indent=2):
         yaml = ""
diff --git a/common/tabby_config.py b/common/tabby_config.py
index 9738c120..fd952a20 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -5,11 +5,10 @@
 from os import getenv
 
 from common.utils import unwrap, merge_dicts
-from common.config_models import tabby_config_model
-import common.config_models
+from common.config_models import TabbyConfigModel
 
 
-class TabbyConfig(tabby_config_model):
+class TabbyConfig(TabbyConfigModel):
     # Persistent defaults
     # TODO: make this pydantic?
     model_defaults: dict = {}
@@ -26,11 +25,11 @@ def load(self, arguments: Optional[dict] = None):
 
         merged_config = merge_dicts(*configs)
 
-        for field in tabby_config_model.model_fields.keys():
-            value = unwrap(merged_config.get(field), {})
-            model = getattr(common.config_models, f"{field}_config_model")
-
-            setattr(self, field, model.parse_obj(value))
+        # validate and update config
+        merged_config_model = TabbyConfigModel.model_validate(merged_config)
+        for field in TabbyConfigModel.model_fields.keys():
+            value = getattr(merged_config_model, field)
+            setattr(self, field, value)
 
         # Set model defaults dict once to prevent on-demand reconstruction
         # TODO: clean this up a bit
@@ -71,7 +70,7 @@ def _from_args(self, args: dict):
             config = self._from_file(pathlib.Path(config_override))
             return config  # Return early if loading from file
 
-        for key in tabby_config_model.model_fields.keys():
+        for key in TabbyConfigModel.model_fields.keys():
             override = args.get(key)
             if override:
                 if key == "logging":
@@ -86,10 +85,10 @@ def _from_environment(self):
 
         config = {}
 
-        for field_name in tabby_config_model.model_fields.keys():
+        for field_name in TabbyConfigModel.model_fields.keys():
             section_config = {}
             for sub_field_name in getattr(
-                tabby_config_model(), field_name
+                TabbyConfigModel(), field_name
             ).model_fields.keys():
                 setting = getenv(f"TABBY_{field_name}_{sub_field_name}".upper(), None)
                 if setting is not None:

From e11d80b28547ab7dd7a9b76d381192c91b95edcb Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Thu, 12 Sep 2024 23:32:41 +0100
Subject: [PATCH 09/51] fix missing rename

---
 endpoints/core/types/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/endpoints/core/types/model.py b/endpoints/core/types/model.py
index f2560b3e..dc5da0da 100644
--- a/endpoints/core/types/model.py
+++ b/endpoints/core/types/model.py
@@ -4,7 +4,7 @@
 from time import time
 from typing import List, Literal, Optional, Union
 
-from common.config_models import logging_config_model
+from common.config_models import LoggingConfig
 from common.tabby_config import config
 from common.utils import unwrap
 
@@ -34,7 +34,7 @@ class ModelCard(BaseModel):
     object: str = "model"
     created: int = Field(default_factory=lambda: int(time()))
     owned_by: str = "tabbyAPI"
-    logging: Optional[logging_config_model] = None
+    logging: Optional[LoggingConfig] = None
     parameters: Optional[ModelCardParameters] = None
 
 

From eb5f42c8450a9b971235997a151aa3836668401c Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Thu, 12 Sep 2024 23:48:24 +0100
Subject: [PATCH 10/51] add error message for invalid use_as_default

---
 common/tabby_config.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/common/tabby_config.py b/common/tabby_config.py
index fd952a20..2f0481d9 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -39,8 +39,9 @@ def load(self, arguments: Optional[dict] = None):
             elif hasattr(self.draft_model, field):
                 self.model_defaults[field] = getattr(config.draft_model, field)
             else:
-                # TODO: show an error
-                pass
+                logger.error(
+                    f"invalid item {field} in config option `model.use_as_default`"
+                )
 
     def _from_file(self, config_path: pathlib.Path):
         """loads config from a given file path"""

From 6e935c565e642bbeabe8e776a1cb277e314a3909 Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Fri, 13 Sep 2024 00:37:17 +0100
Subject: [PATCH 11/51] remove private attributes in args

---
 common/args.py | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/common/args.py b/common/args.py
index 400af021..0a888af2 100644
--- a/common/args.py
+++ b/common/args.py
@@ -1,8 +1,12 @@
 """Argparser for overriding config values"""
 
 import argparse
-from typing import get_origin, get_args, Union, List
-from common.tabby_config import config
+from typing import Any, Type, get_origin, get_args, Union, List
+from inspect import get_annotations, isclass
+
+from pydantic import BaseModel
+
+from common.config_models import TabbyConfigModel
 
 
 def str_to_bool(value):
@@ -33,7 +37,7 @@ def argument_with_auto(value):
         ) from ex
 
 
-def map_pydantic_type_to_argparse(pydantic_type):
+def map_pydantic_type_to_argparse(pydantic_type: Any):
     """
     Maps Pydantic types to argparse compatible types.
     Handles special cases like Union and List.
@@ -57,7 +61,7 @@ def map_pydantic_type_to_argparse(pydantic_type):
     return str
 
 
-def add_field_to_group(group, field_name, field_type, field):
+def add_field_to_group(group, field_name, field_type, field) -> None:
     """
     Adds a Pydantic field to an argparse argument group.
     """
@@ -67,22 +71,24 @@ def add_field_to_group(group, field_name, field_type, field):
     group.add_argument(f"--{field_name}", type=arg_type, help=help_text)
 
 
-def init_argparser():
+def init_argparser() -> argparse.ArgumentParser:
     """
     Initializes an argparse parser based on a Pydantic config schema.
     """
     parser = argparse.ArgumentParser(description="TabbyAPI server")
 
+    field_type: Union[Type[BaseModel], Any]
+
     # Loop through each top-level field in the config
-    for field_name, field_type in config.__annotations__.items():
+    for field_name, field_type in get_annotations(TabbyConfigModel).items():
         group = parser.add_argument_group(
             field_name, description=f"Arguments for {field_name}"
         )
 
         # Check if the field_type is a Pydantic model
-        if hasattr(field_type, "__annotations__"):
-            for sub_field_name, sub_field_type in field_type.__annotations__.items():
-                field = field_type.__fields__[sub_field_name]
+        if isclass(field_type):
+            for sub_field_name, sub_field_type in get_annotations(field_type).items():
+                field = field_type.model_fields[sub_field_name]
                 add_field_to_group(group, sub_field_name, sub_field_type, field)
         else:
             # Handle cases where the field_type is not a Pydantic mode
@@ -94,7 +100,9 @@ def init_argparser():
     return parser
 
 
-def convert_args_to_dict(args: argparse.Namespace, parser: argparse.ArgumentParser):
+def convert_args_to_dict(
+    args: argparse.Namespace, parser: argparse.ArgumentParser
+) -> dict[str, dict[str, Any]]:
     """Broad conversion of surface level arg groups to dictionaries"""
 
     arg_groups = {}

From 21747bf9e4d9835aa524073cf5415e862c7a801f Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Thu, 12 Sep 2024 22:17:51 -0400
Subject: [PATCH 12/51] Args: Switch to use model_field for everything

Pydantic provides these helpers. Better to use these instead of
the inspect lib.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/args.py          | 24 ++++++++++++++----------
 common/config_models.py |  6 ++++--
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/common/args.py b/common/args.py
index 0a888af2..b991103c 100644
--- a/common/args.py
+++ b/common/args.py
@@ -1,8 +1,7 @@
 """Argparser for overriding config values"""
 
 import argparse
-from typing import Any, Type, get_origin, get_args, Union, List
-from inspect import get_annotations, isclass
+from typing import Any, get_origin, get_args, Union, List
 
 from pydantic import BaseModel
 
@@ -42,6 +41,7 @@ def map_pydantic_type_to_argparse(pydantic_type: Any):
     Maps Pydantic types to argparse compatible types.
     Handles special cases like Union and List.
     """
+
     origin = get_origin(pydantic_type)
 
     # Handle optional types
@@ -65,6 +65,7 @@ def add_field_to_group(group, field_name, field_type, field) -> None:
     """
     Adds a Pydantic field to an argparse argument group.
     """
+
     arg_type = map_pydantic_type_to_argparse(field_type)
     help_text = field.description if field.description else "No description available"
 
@@ -75,23 +76,26 @@ def init_argparser() -> argparse.ArgumentParser:
     """
     Initializes an argparse parser based on a Pydantic config schema.
     """
-    parser = argparse.ArgumentParser(description="TabbyAPI server")
 
-    field_type: Union[Type[BaseModel], Any]
+    parser = argparse.ArgumentParser(description="TabbyAPI server")
 
     # Loop through each top-level field in the config
-    for field_name, field_type in get_annotations(TabbyConfigModel).items():
+    for field_name, field_info in TabbyConfigModel.model_fields.items():
+        field_type = field_info.annotation
         group = parser.add_argument_group(
             field_name, description=f"Arguments for {field_name}"
         )
 
         # Check if the field_type is a Pydantic model
-        if isclass(field_type):
-            for sub_field_name, sub_field_type in get_annotations(field_type).items():
-                field = field_type.model_fields[sub_field_name]
-                add_field_to_group(group, sub_field_name, sub_field_type, field)
+        if issubclass(field_type, BaseModel):
+            for sub_field_name, sub_field_info in field_type.model_fields.items():
+                sub_field_name = sub_field_name.replace("_", "-")
+                sub_field_type = sub_field_info.annotation
+                add_field_to_group(
+                    group, sub_field_name, sub_field_type, sub_field_info
+                )
         else:
-            # Handle cases where the field_type is not a Pydantic mode
+            field_name = field_name.replace("_", "-")
             arg_type = map_pydantic_type_to_argparse(field_type)
             group.add_argument(
                 f"--{field_name}", type=arg_type, help=f"Argument for {field_name}"
diff --git a/common/config_models.py b/common/config_models.py
index 5e5b5a21..ced18f99 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -4,7 +4,7 @@
 from common.utils import unwrap
 
 
-class ConfigConfig(BaseModel):
+class ConfigOverrideConfig(BaseModel):
     config: Optional[str] = Field(
         None, description=("Path to an overriding config.yml file")
     )
@@ -279,7 +279,9 @@ class EmbeddingsConfig(BaseModel):
 
 
 class TabbyConfigModel(BaseModel):
-    config: ConfigConfig = Field(default_factory=ConfigConfig.model_construct)
+    config: ConfigOverrideConfig = Field(
+        default_factory=ConfigOverrideConfig.model_construct
+    )
     network: NetworkConfig = Field(default_factory=NetworkConfig.model_construct)
     logging: LoggingConfig = Field(default_factory=LoggingConfig.model_construct)
     model: ModelConfig = Field(default_factory=ModelConfig.model_construct)

From d5b3fde3196e45137b18aff95702564e21de5435 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Thu, 12 Sep 2024 22:43:30 -0400
Subject: [PATCH 13/51] Config: Fix descriptions

Appending lines also requires a space between each one otherwise
they'll squish together.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/config_models.py | 50 ++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index ced18f99..ecc13c9b 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -40,133 +40,133 @@ class ModelConfig(BaseModel):
     model_dir: str = Field(
         "models",
         description=(
-            "Overrides the directory to look for models (default: models). Windows"
+            "Overrides the directory to look for models (default: models). Windows "
             "users, do NOT put this path in quotes."
         ),
     )
     use_dummy_models: Optional[bool] = Field(
         False,
         description=(
-            "Sends dummy model names when the models endpoint is queried. Enable this"
+            "Sends dummy model names when the models endpoint is queried. Enable this "
             "if looking for specific OAI models."
         ),
     )
     model_name: Optional[str] = Field(
         None,
         description=(
-            "An initial model to load. Make sure the model is located in the model"
+            "An initial model to load. Make sure the model is located in the model "
             "directory! REQUIRED: This must be filled out to load a model on startup."
         ),
     )
     use_as_default: List[str] = Field(
         default_factory=list,
         description=(
-            "Names of args to use as a default fallback for API load requests"
+            "Names of args to use as a default fallback for API load requests "
             "(default: []). Example: ['max_seq_len', 'cache_mode']"
         ),
     )
     max_seq_len: Optional[int] = Field(
         None,
         description=(
-            "Max sequence length. Fetched from the model's base sequence length in"
+            "Max sequence length. Fetched from the model's base sequence length in "
             "config.json by default."
         ),
     )
     override_base_seq_len: Optional[int] = Field(
         None,
         description=(
-            "Overrides base model context length. WARNING: Only use this if the"
+            "Overrides base model context length. WARNING: Only use this if the "
             "model's base sequence length is incorrect."
         ),
     )
     tensor_parallel: Optional[bool] = Field(
         False,
         description=(
-            "Load model with tensor parallelism. Fallback to autosplit if GPU split"
+            "Load model with tensor parallelism. Fallback to autosplit if GPU split "
             "isn't provided."
         ),
     )
     gpu_split_auto: Optional[bool] = Field(
         True,
         description=(
-            "Automatically allocate resources to GPUs (default: True). Not parsed for"
+            "Automatically allocate resources to GPUs (default: True). Not parsed for "
             "single GPU users."
         ),
     )
     autosplit_reserve: List[int] = Field(
         [96],
         description=(
-            "Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0)."
+            "Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). "
             "Represented as an array of MB per GPU."
         ),
     )
     gpu_split: List[float] = Field(
         default_factory=list,
         description=(
-            "An integer array of GBs of VRAM to split between GPUs (default: [])."
+            "An integer array of GBs of VRAM to split between GPUs (default: []). "
             "Used with tensor parallelism."
         ),
     )
     rope_scale: Optional[float] = Field(
         1.0,
         description=(
-            "Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the"
+            "Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the "
             "model was trained on long context with rope."
         ),
     )
     rope_alpha: Optional[Union[float, str]] = Field(
         1.0,
         description=(
-            "Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto-"
+            "Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto- "
             "calculate."
         ),
     )
     cache_mode: Optional[str] = Field(
         "FP16",
         description=(
-            "Enable different cache modes for VRAM savings (default: FP16). Possible"
+            "Enable different cache modes for VRAM savings (default: FP16). Possible "
             "values: FP16, Q8, Q6, Q4."
         ),
     )
     cache_size: Optional[int] = Field(
         None,
         description=(
-            "Size of the prompt cache to allocate (default: max_seq_len). Must be a"
+            "Size of the prompt cache to allocate (default: max_seq_len). Must be a "
             "multiple of 256."
         ),
     )
     chunk_size: Optional[int] = Field(
         2048,
         description=(
-            "Chunk size for prompt ingestion (default: 2048). A lower value reduces"
+            "Chunk size for prompt ingestion (default: 2048). A lower value reduces "
             "VRAM usage but decreases ingestion speed."
         ),
     )
     max_batch_size: Optional[int] = Field(
         None,
         description=(
-            "Set the maximum number of prompts to process at one time (default:"
+            "Set the maximum number of prompts to process at one time (default: "
             "None/Automatic). Automatically calculated if left blank."
         ),
     )
     prompt_template: Optional[str] = Field(
         None,
         description=(
-            "Set the prompt template for this model. If empty, attempts to look for"
+            "Set the prompt template for this model. If empty, attempts to look for "
             "the model's chat template."
         ),
     )
     num_experts_per_token: Optional[int] = Field(
         None,
         description=(
-            "Number of experts to use per token. Fetched from the model's"
+            "Number of experts to use per token. Fetched from the model's "
             "config.json. For MoE models only."
         ),
     )
     fasttensors: Optional[bool] = Field(
         False,
         description=(
-            "Enables fasttensors to possibly increase model loading speeds (default:"
+            "Enables fasttensors to possibly increase model loading speeds (default: "
             "False)."
         ),
     )
@@ -191,21 +191,21 @@ class DraftModelConfig(BaseModel):
     draft_rope_scale: Optional[float] = Field(
         1.0,
         description=(
-            "Rope scale for draft models (default: 1.0). Same as compress_pos_emb."
+            "Rope scale for draft models (default: 1.0). Same as compress_pos_emb. "
             "Use if the draft model was trained on long context with rope."
         ),
     )
     draft_rope_alpha: Optional[float] = Field(
         None,
         description=(
-            "Rope alpha for draft models (default: None). Same as alpha_value. Leave"
+            "Rope alpha for draft models (default: None). Same as alpha_value. Leave "
             "blank to auto-calculate the alpha value."
         ),
     )
     draft_cache_mode: Optional[str] = Field(
         "FP16",
         description=(
-            "Cache mode for draft models to save VRAM (default: FP16). Possible"
+            "Cache mode for draft models to save VRAM (default: FP16). Possible "
             "values: FP16, Q8, Q6, Q4."
         ),
     )
@@ -225,7 +225,7 @@ class LoraConfig(BaseModel):
     loras: Optional[List[LoraInstanceModel]] = Field(
         None,
         description=(
-            "List of LoRAs to load and associated scaling factors (default scaling:"
+            "List of LoRAs to load and associated scaling factors (default scaling: "
             "1.0)"
         ),
     )
@@ -253,7 +253,7 @@ class DeveloperConfig(BaseModel):
     realtime_process_priority: Optional[bool] = Field(
         False,
         description=(
-            "Set process to use a higher priority For realtime process priority, run"
+            "Set process to use a higher priority For realtime process priority, run "
             "as administrator or sudo Otherwise, the priority will be set to high"
         ),
     )
@@ -269,7 +269,7 @@ class EmbeddingsConfig(BaseModel):
     embeddings_device: Optional[str] = Field(
         "cpu",
         description=(
-            "Device to load embedding models on (default: cpu). Possible values: cpu,"
+            "Device to load embedding models on (default: cpu). Possible values: cpu, "
             "auto, cuda. If using an AMD GPU, set this value to 'cuda'."
         ),
     )

From dc4946b5653b2a46513fc008fdc3707f79fb1e15 Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Fri, 13 Sep 2024 10:21:27 +0100
Subject: [PATCH 14/51] make pydantic do all the validation

---
 common/args.py          | 63 ++---------------------------------------
 common/config_models.py | 34 ++++++++++++++++------
 2 files changed, 28 insertions(+), 69 deletions(-)

diff --git a/common/args.py b/common/args.py
index b991103c..22c76814 100644
--- a/common/args.py
+++ b/common/args.py
@@ -1,75 +1,21 @@
 """Argparser for overriding config values"""
 
 import argparse
-from typing import Any, get_origin, get_args, Union, List
+from typing import Any
 
 from pydantic import BaseModel
 
 from common.config_models import TabbyConfigModel
 
 
-def str_to_bool(value):
-    """Converts a string into a boolean value"""
-
-    if value.lower() in {"false", "f", "0", "no", "n"}:
-        return False
-    elif value.lower() in {"true", "t", "1", "yes", "y"}:
-        return True
-    raise ValueError(f"{value} is not a valid boolean value")
-
-
-def argument_with_auto(value):
-    """
-    Argparse type wrapper for any argument that has an automatic option.
-
-    Ex. rope_alpha
-    """
-
-    if value == "auto":
-        return "auto"
-
-    try:
-        return float(value)
-    except ValueError as ex:
-        raise argparse.ArgumentTypeError(
-            'This argument only takes a type of float or "auto"'
-        ) from ex
-
-
-def map_pydantic_type_to_argparse(pydantic_type: Any):
-    """
-    Maps Pydantic types to argparse compatible types.
-    Handles special cases like Union and List.
-    """
-
-    origin = get_origin(pydantic_type)
-
-    # Handle optional types
-    if origin is Union:
-        # Filter out NoneType
-        pydantic_type = next(t for t in get_args(pydantic_type) if t is not type(None))
-
-    elif origin is List:
-        pydantic_type = get_args(pydantic_type)[0]  # Get the list item type
-
-    # Map basic types (int, float, str, bool)
-    if isinstance(pydantic_type, type) and issubclass(
-        pydantic_type, (int, float, str, bool)
-    ):
-        return pydantic_type
-
-    return str
-
-
 def add_field_to_group(group, field_name, field_type, field) -> None:
     """
     Adds a Pydantic field to an argparse argument group.
     """
 
-    arg_type = map_pydantic_type_to_argparse(field_type)
     help_text = field.description if field.description else "No description available"
 
-    group.add_argument(f"--{field_name}", type=arg_type, help=help_text)
+    group.add_argument(f"--{field_name}", help=help_text)
 
 
 def init_argparser() -> argparse.ArgumentParser:
@@ -96,10 +42,7 @@ def init_argparser() -> argparse.ArgumentParser:
                 )
         else:
             field_name = field_name.replace("_", "-")
-            arg_type = map_pydantic_type_to_argparse(field_type)
-            group.add_argument(
-                f"--{field_name}", type=arg_type, help=f"Argument for {field_name}"
-            )
+            group.add_argument(f"--{field_name}", help=f"Argument for {field_name}")
 
     return parser
 
diff --git a/common/config_models.py b/common/config_models.py
index ecc13c9b..83849576 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -1,10 +1,13 @@
 from pydantic import BaseModel, ConfigDict, Field, model_validator
-from typing import List, Optional, Union
+from typing import List, Literal, Optional, Union
 
 from common.utils import unwrap
 
+CACHE_SIZES = Literal["FP16", "Q8", "Q6", "Q4"]
+
 
 class ConfigOverrideConfig(BaseModel):
+    # TODO: convert this to a pathlib.path?
     config: Optional[str] = Field(
         None, description=("Path to an overriding config.yml file")
     )
@@ -20,7 +23,7 @@ class NetworkConfig(BaseModel):
         False,
         description=("Decide whether to send error tracebacks over the API"),
     )
-    api_servers: Optional[List[str]] = Field(
+    api_servers: Optional[List[Literal["OAI", "Kobold"]]] = Field(
         [
             "OAI",
         ],
@@ -37,6 +40,7 @@ class LoggingConfig(BaseModel):
 
 
 class ModelConfig(BaseModel):
+    # TODO: convert this to a pathlib.path?
     model_dir: str = Field(
         "models",
         description=(
@@ -71,6 +75,7 @@ class ModelConfig(BaseModel):
             "Max sequence length. Fetched from the model's base sequence length in "
             "config.json by default."
         ),
+        ge=0,
     )
     override_base_seq_len: Optional[int] = Field(
         None,
@@ -78,6 +83,7 @@ class ModelConfig(BaseModel):
             "Overrides base model context length. WARNING: Only use this if the "
             "model's base sequence length is incorrect."
         ),
+        ge=0,
     )
     tensor_parallel: Optional[bool] = Field(
         False,
@@ -114,18 +120,18 @@ class ModelConfig(BaseModel):
             "model was trained on long context with rope."
         ),
     )
-    rope_alpha: Optional[Union[float, str]] = Field(
+    rope_alpha: Optional[Union[float, Literal["auto"]]] = Field(
         1.0,
         description=(
             "Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto- "
             "calculate."
         ),
     )
-    cache_mode: Optional[str] = Field(
+    cache_mode: Optional[CACHE_SIZES] = Field(
         "FP16",
         description=(
             "Enable different cache modes for VRAM savings (default: FP16). Possible "
-            "values: FP16, Q8, Q6, Q4."
+            f"values: {str(CACHE_SIZES)[15:-1]}"
         ),
     )
     cache_size: Optional[int] = Field(
@@ -134,6 +140,8 @@ class ModelConfig(BaseModel):
             "Size of the prompt cache to allocate (default: max_seq_len). Must be a "
             "multiple of 256."
         ),
+        multiple_of=256,
+        gt=0,
     )
     chunk_size: Optional[int] = Field(
         2048,
@@ -141,6 +149,7 @@ class ModelConfig(BaseModel):
             "Chunk size for prompt ingestion (default: 2048). A lower value reduces "
             "VRAM usage but decreases ingestion speed."
         ),
+        gt=0,
     )
     max_batch_size: Optional[int] = Field(
         None,
@@ -148,6 +157,7 @@ class ModelConfig(BaseModel):
             "Set the maximum number of prompts to process at one time (default: "
             "None/Automatic). Automatically calculated if left blank."
         ),
+        ge=1,
     )
     prompt_template: Optional[str] = Field(
         None,
@@ -162,6 +172,7 @@ class ModelConfig(BaseModel):
             "Number of experts to use per token. Fetched from the model's "
             "config.json. For MoE models only."
         ),
+        ge=1,
     )
     fasttensors: Optional[bool] = Field(
         False,
@@ -175,6 +186,7 @@ class ModelConfig(BaseModel):
 
 
 class DraftModelConfig(BaseModel):
+    # TODO: convert this to a pathlib.path?
     draft_model_dir: Optional[str] = Field(
         "models",
         description=(
@@ -202,11 +214,11 @@ class DraftModelConfig(BaseModel):
             "blank to auto-calculate the alpha value."
         ),
     )
-    draft_cache_mode: Optional[str] = Field(
+    draft_cache_mode: Optional[CACHE_SIZES] = Field(
         "FP16",
         description=(
             "Cache mode for draft models to save VRAM (default: FP16). Possible "
-            "values: FP16, Q8, Q6, Q4."
+            f"values: {str(CACHE_SIZES)[15:-1]}"
         ),
     )
 
@@ -214,11 +226,14 @@ class DraftModelConfig(BaseModel):
 class LoraInstanceModel(BaseModel):
     name: str = Field(..., description=("Name of the LoRA model"))
     scaling: float = Field(
-        1.0, description=("Scaling factor for the LoRA model (default: 1.0)")
+        1.0,
+        description=("Scaling factor for the LoRA model (default: 1.0)"),
+        ge=0,
     )
 
 
 class LoraConfig(BaseModel):
+    # TODO: convert this to a pathlib.path?
     lora_dir: Optional[str] = Field(
         "loras", description=("Directory to look for LoRAs (default: 'loras')")
     )
@@ -260,13 +275,14 @@ class DeveloperConfig(BaseModel):
 
 
 class EmbeddingsConfig(BaseModel):
+    # TODO: convert this to a pathlib.path?
     embedding_model_dir: Optional[str] = Field(
         "models",
         description=(
             "Overrides directory to look for embedding models (default: models)"
         ),
     )
-    embeddings_device: Optional[str] = Field(
+    embeddings_device: Optional[Literal["cpu", "auto", "cuda"]] = Field(
         "cpu",
         description=(
             "Device to load embedding models on (default: cpu). Possible values: cpu, "

From 533e7c91190b0214f3ee15582a2f1e9ecba35a64 Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Sat, 14 Sep 2024 22:49:37 +0100
Subject: [PATCH 15/51] remove unnecessary code

---
 common/config_models.py | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index 83849576..b400d5c4 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -1,8 +1,6 @@
-from pydantic import BaseModel, ConfigDict, Field, model_validator
+from pydantic import BaseModel, ConfigDict, Field
 from typing import List, Literal, Optional, Union
 
-from common.utils import unwrap
-
 CACHE_SIZES = Literal["FP16", "Q8", "Q6", "Q4"]
 
 
@@ -311,14 +309,6 @@ class TabbyConfigModel(BaseModel):
         default_factory=EmbeddingsConfig.model_construct
     )
 
-    @model_validator(mode="before")
-    def set_defaults(cls, values):
-        for field_name, field_value in values.items():
-            if field_value is None:
-                default_instance = cls.__annotations__[field_name]().dict()
-                values[field_name] = cls.__annotations__[field_name](**default_instance)
-        return values
-
     model_config = ConfigDict(validate_assignment=True, protected_namespaces=())
 
 
@@ -335,7 +325,7 @@ def dump_def(id: str, indent=2):
             comment = section[property]["description"]
             yaml += f"{indent}# {comment}\n"
 
-            value = unwrap(section[property].get("default"), "")
+            value = section[property].get("default", "")
             yaml += f"{indent}{property}: {value}\n\n"
 
         return yaml + "\n"

From 0903f852db4c3924e5107ba663f044543da64a63 Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Sun, 15 Sep 2024 00:17:36 +0100
Subject: [PATCH 16/51] add export openAPI to config

---
 .github/workflows/pages.yml |  6 ++----
 common/actions.py           | 27 +++++++++++++++++++++++++++
 common/args.py              | 18 ++++++++++++++++--
 common/config_models.py     | 18 ++++++++++++++++++
 main.py                     | 13 ++++---------
 5 files changed, 67 insertions(+), 15 deletions(-)
 create mode 100644 common/actions.py

diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index a7b3327a..fe542502 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -48,10 +48,8 @@ jobs:
           npm install @redocly/cli -g
       - name: Export OpenAPI docs
         run: |
-          EXPORT_OPENAPI=1 python main.py
-          mv openapi.json openapi-oai.json
-          EXPORT_OPENAPI=1 python main.py --api-servers kobold
-          mv openapi.json openapi-kobold.json
+          python main.py --export-openapi true --openapi-export-path "openapi-oai.json" --api-servers OAI
+          python main.py --export-openapi true --openapi-export-path "openapi-kobold.json" --api-servers kobold
       - name: Build and store Redocly site
         run: |
           mkdir static
diff --git a/common/actions.py b/common/actions.py
new file mode 100644
index 00000000..079a78d3
--- /dev/null
+++ b/common/actions.py
@@ -0,0 +1,27 @@
+import json
+from loguru import logger
+from common.tabby_config import config
+from endpoints.server import export_openapi
+from common.config_models import generate_config_file
+
+
+def branch_to_actions() -> bool:
+    if config.actions.export_openapi:
+        openapi_json = export_openapi()
+
+        with open(config.actions.openapi_export_path, "w") as f:
+            f.write(json.dumps(openapi_json))
+            logger.info(
+                "Successfully wrote OpenAPI spec to "
+                + f"{config.actions.openapi_export_path}"
+            )
+
+    elif config.actions.export_config:
+        generate_config_file(config.actions.config_export_path)
+
+    else:
+        # did not branch
+        return False
+
+    # branched and ran an action
+    return True
diff --git a/common/args.py b/common/args.py
index 22c76814..bd9c67c9 100644
--- a/common/args.py
+++ b/common/args.py
@@ -8,14 +8,28 @@
 from common.config_models import TabbyConfigModel
 
 
+def is_list_type(type_hint):
+    if hasattr(type_hint, "__origin__") and type_hint.__origin__ is list:
+        return True
+    if hasattr(type_hint, "__args__"):
+        # Recursively check for lists inside type arguments
+        return any(is_list_type(arg) for arg in type_hint.__args__)
+    return False
+
+
 def add_field_to_group(group, field_name, field_type, field) -> None:
     """
     Adds a Pydantic field to an argparse argument group.
     """
 
-    help_text = field.description if field.description else "No description available"
+    kwargs = {
+        "help": field.description if field.description else "No description available",
+    }
+
+    if is_list_type(field_type):
+        kwargs["nargs"] = "+"
 
-    group.add_argument(f"--{field_name}", help=help_text)
+    group.add_argument(f"--{field_name}", **kwargs)
 
 
 def init_argparser() -> argparse.ArgumentParser:
diff --git a/common/config_models.py b/common/config_models.py
index b400d5c4..1b371d55 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -1,5 +1,6 @@
 from pydantic import BaseModel, ConfigDict, Field
 from typing import List, Literal, Optional, Union
+from pathlib import Path
 
 CACHE_SIZES = Literal["FP16", "Q8", "Q6", "Q4"]
 
@@ -11,6 +12,22 @@ class ConfigOverrideConfig(BaseModel):
     )
 
 
+class UtilityActions(BaseModel):
+    export_config: Optional[str] = Field(
+        None, description="generate a template config file"
+    )
+    config_export_path: Optional[Path] = Field(
+        "config_sample.yml", description="path to export configuration file to"
+    )
+
+    export_openapi: Optional[bool] = Field(
+        False, description="export openapi schema files"
+    )
+    openapi_export_path: Optional[Path] = Field(
+        "openapi.json", description="path to export openapi schema to"
+    )
+
+
 class NetworkConfig(BaseModel):
     host: Optional[str] = Field("127.0.0.1", description=("The IP to host on"))
     port: Optional[int] = Field(5000, description=("The port to host on"))
@@ -308,6 +325,7 @@ class TabbyConfigModel(BaseModel):
     embeddings: EmbeddingsConfig = Field(
         default_factory=EmbeddingsConfig.model_construct
     )
+    actions: UtilityActions = Field(default_factory=UtilityActions.model_construct)
 
     model_config = ConfigDict(validate_assignment=True, protected_namespaces=())
 
diff --git a/main.py b/main.py
index bd706868..429c0e83 100644
--- a/main.py
+++ b/main.py
@@ -1,7 +1,6 @@
 """The main tabbyAPI module. Contains the FastAPI server and endpoints."""
 
 import asyncio
-import json
 import os
 import pathlib
 import platform
@@ -12,11 +11,12 @@
 from common import gen_logging, sampling, model
 from common.args import convert_args_to_dict, init_argparser
 from common.auth import load_auth_keys
+from common.actions import branch_to_actions
 from common.logger import setup_logger
 from common.networking import is_port_in_use
 from common.signals import signal_handler
 from common.tabby_config import config
-from endpoints.server import export_openapi, start_api
+from endpoints.server import start_api
 from endpoints.utils import do_export_openapi
 
 if not do_export_openapi:
@@ -112,13 +112,8 @@ def entrypoint(arguments: Optional[dict] = None):
     # load config
     config.load(arguments)
 
-    if do_export_openapi:
-        openapi_json = export_openapi()
-
-        with open("openapi.json", "w") as f:
-            f.write(json.dumps(openapi_json))
-            logger.info("Successfully wrote OpenAPI spec to openapi.json")
-
+    # branch to default paths if required
+    if branch_to_actions():
         return
 
     # Check exllamav2 version and give a descriptive error if it's too old

From a09dd802c25d820450e73cc196d4c011d2023881 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Sat, 14 Sep 2024 21:48:39 -0400
Subject: [PATCH 17/51] Config: Cleanup and organize functions

Remove access of private attributes and use safer functions. Also
move generalized functions into utils files.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/args.py          | 15 +++------------
 common/config_models.py | 31 ++++++++++++++++++++++++++++---
 common/utils.py         | 16 ++++++++++++++++
 3 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/common/args.py b/common/args.py
index bd9c67c9..7d2427f8 100644
--- a/common/args.py
+++ b/common/args.py
@@ -1,20 +1,10 @@
 """Argparser for overriding config values"""
 
 import argparse
-from typing import Any
-
 from pydantic import BaseModel
 
 from common.config_models import TabbyConfigModel
-
-
-def is_list_type(type_hint):
-    if hasattr(type_hint, "__origin__") and type_hint.__origin__ is list:
-        return True
-    if hasattr(type_hint, "__args__"):
-        # Recursively check for lists inside type arguments
-        return any(is_list_type(arg) for arg in type_hint.__args__)
-    return False
+from common.utils import is_list_type
 
 
 def add_field_to_group(group, field_name, field_type, field) -> None:
@@ -26,6 +16,7 @@ def add_field_to_group(group, field_name, field_type, field) -> None:
         "help": field.description if field.description else "No description available",
     }
 
+    # If the inner type contains a list, specify argparse as such
     if is_list_type(field_type):
         kwargs["nargs"] = "+"
 
@@ -63,7 +54,7 @@ def init_argparser() -> argparse.ArgumentParser:
 
 def convert_args_to_dict(
     args: argparse.Namespace, parser: argparse.ArgumentParser
-) -> dict[str, dict[str, Any]]:
+) -> dict:
     """Broad conversion of surface level arg groups to dictionaries"""
 
     arg_groups = {}
diff --git a/common/config_models.py b/common/config_models.py
index 1b371d55..4c124451 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -6,6 +6,8 @@
 
 
 class ConfigOverrideConfig(BaseModel):
+    """Model for overriding a provided config file."""
+
     # TODO: convert this to a pathlib.path?
     config: Optional[str] = Field(
         None, description=("Path to an overriding config.yml file")
@@ -13,6 +15,9 @@ class ConfigOverrideConfig(BaseModel):
 
 
 class UtilityActions(BaseModel):
+    """Model used for arg actions."""
+
+    # YAML export options
     export_config: Optional[str] = Field(
         None, description="generate a template config file"
     )
@@ -20,6 +25,7 @@ class UtilityActions(BaseModel):
         "config_sample.yml", description="path to export configuration file to"
     )
 
+    # OpenAPI JSON export options
     export_openapi: Optional[bool] = Field(
         False, description="export openapi schema files"
     )
@@ -29,6 +35,8 @@ class UtilityActions(BaseModel):
 
 
 class NetworkConfig(BaseModel):
+    """Model for network configuration."""
+
     host: Optional[str] = Field("127.0.0.1", description=("The IP to host on"))
     port: Optional[int] = Field(5000, description=("The port to host on"))
     disable_auth: Optional[bool] = Field(
@@ -47,6 +55,8 @@ class NetworkConfig(BaseModel):
 
 
 class LoggingConfig(BaseModel):
+    """Model for logging configuration."""
+
     log_prompt: Optional[bool] = Field(False, description=("Enable prompt logging"))
     log_generation_params: Optional[bool] = Field(
         False, description=("Enable generation parameter logging")
@@ -55,6 +65,8 @@ class LoggingConfig(BaseModel):
 
 
 class ModelConfig(BaseModel):
+    """Model for LLM configuration."""
+
     # TODO: convert this to a pathlib.path?
     model_dir: str = Field(
         "models",
@@ -201,6 +213,8 @@ class ModelConfig(BaseModel):
 
 
 class DraftModelConfig(BaseModel):
+    """Model for draft LLM model configuration."""
+
     # TODO: convert this to a pathlib.path?
     draft_model_dir: Optional[str] = Field(
         "models",
@@ -239,6 +253,8 @@ class DraftModelConfig(BaseModel):
 
 
 class LoraInstanceModel(BaseModel):
+    """Model representing an instance of a Lora."""
+
     name: str = Field(..., description=("Name of the LoRA model"))
     scaling: float = Field(
         1.0,
@@ -248,6 +264,8 @@ class LoraInstanceModel(BaseModel):
 
 
 class LoraConfig(BaseModel):
+    """Model for lora configuration."""
+
     # TODO: convert this to a pathlib.path?
     lora_dir: Optional[str] = Field(
         "loras", description=("Directory to look for LoRAs (default: 'loras')")
@@ -262,12 +280,16 @@ class LoraConfig(BaseModel):
 
 
 class SamplingConfig(BaseModel):
+    """Model for sampling (overrides) config."""
+
     override_preset: Optional[str] = Field(
         None, description=("Select a sampler override preset")
     )
 
 
 class DeveloperConfig(BaseModel):
+    """Model for developer settings configuration."""
+
     unsafe_launch: Optional[bool] = Field(
         False, description=("Skip Exllamav2 version check")
     )
@@ -290,6 +312,8 @@ class DeveloperConfig(BaseModel):
 
 
 class EmbeddingsConfig(BaseModel):
+    """Model for embeddings configuration."""
+
     # TODO: convert this to a pathlib.path?
     embedding_model_dir: Optional[str] = Field(
         "models",
@@ -310,6 +334,8 @@ class EmbeddingsConfig(BaseModel):
 
 
 class TabbyConfigModel(BaseModel):
+    """Base model for a TabbyConfig."""
+
     config: ConfigOverrideConfig = Field(
         default_factory=ConfigOverrideConfig.model_construct
     )
@@ -331,6 +357,8 @@ class TabbyConfigModel(BaseModel):
 
 
 def generate_config_file(filename="config_sample.yml", indentation=2):
+    """Creates a config.yml file from Pydantic models."""
+
     schema = TabbyConfigModel.model_json_schema()
 
     def dump_def(id: str, indent=2):
@@ -356,6 +384,3 @@ def dump_def(id: str, indent=2):
 
     with open(filename, "w") as f:
         f.write(yaml)
-
-
-# generate_config_file("test.yml")
diff --git a/common/utils.py b/common/utils.py
index d5723a0b..d933fb60 100644
--- a/common/utils.py
+++ b/common/utils.py
@@ -1,5 +1,7 @@
 """Common utility functions"""
 
+from typing import get_args, get_origin
+
 
 def unwrap(wrapped, default=None):
     """Unwrap function for Optionals."""
@@ -43,3 +45,17 @@ def flat_map(input_list):
     """Flattens a list of lists into a single list."""
 
     return [item for sublist in input_list for item in sublist]
+
+
+def is_list_type(type_hint):
+    """Checks if a type contains a list."""
+
+    if get_origin(type_hint) is list:
+        return True
+
+    # Recursively check for lists inside type arguments
+    type_args = get_args(type_hint)
+    if type_args:
+        return any(is_list_type(arg) for arg in type_args)
+
+    return False

From 6f28cfe9052a6bb91870d1e9b450c2ccb0c3d417 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Sat, 14 Sep 2024 21:49:44 -0400
Subject: [PATCH 18/51] Logging: Remove preferences global

This is no longer needed because config is a singleton.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/gen_logging.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/common/gen_logging.py b/common/gen_logging.py
index 6a917dd7..150d63ca 100644
--- a/common/gen_logging.py
+++ b/common/gen_logging.py
@@ -7,17 +7,14 @@
 
 from common.tabby_config import config
 
-# Global logging preferences constant
-PREFERENCES = config.logging
-
 
 def broadcast_status():
     """Broadcasts the current logging status"""
     enabled = []
-    if PREFERENCES.log_prompt:
+    if config.logging.log_prompt:
         enabled.append("prompts")
 
-    if PREFERENCES.log_generation_params:
+    if config.logging.log_generation_params:
         enabled.append("generation params")
 
     if len(enabled) > 0:
@@ -28,13 +25,13 @@ def broadcast_status():
 
 def log_generation_params(**kwargs):
     """Logs generation parameters to console."""
-    if PREFERENCES.log_generation_params:
+    if config.logging.log_generation_params:
         logger.info(f"Generation options: {kwargs}\n")
 
 
 def log_prompt(prompt: str, request_id: str, negative_prompt: Optional[str]):
     """Logs the prompt to console."""
-    if PREFERENCES.log_prompt:
+    if config.logging.log_prompt:
         formatted_prompt = "\n" + prompt
         logger.info(
             f"Prompt (ID: {request_id}): {formatted_prompt if prompt else 'Empty'}\n"
@@ -47,7 +44,7 @@ def log_prompt(prompt: str, request_id: str, negative_prompt: Optional[str]):
 
 def log_response(request_id: str, response: str):
     """Logs the response to console."""
-    if PREFERENCES.log_prompt:
+    if config.logging.log_prompt:
         formatted_response = "\n" + response
         logger.info(
             f"Response (ID: {request_id}): "

From d013729b7d23cc2cb43bad0abd4d9649e10795fc Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Sat, 14 Sep 2024 21:56:16 -0400
Subject: [PATCH 19/51] Config: Add aliases for logging config

Config.yml and args take in two different values.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/config_models.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index 4c124451..bf2787af 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -1,4 +1,4 @@
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import AliasChoices, BaseModel, ConfigDict, Field
 from typing import List, Literal, Optional, Union
 from pathlib import Path
 
@@ -47,21 +47,31 @@ class NetworkConfig(BaseModel):
         description=("Decide whether to send error tracebacks over the API"),
     )
     api_servers: Optional[List[Literal["OAI", "Kobold"]]] = Field(
-        [
-            "OAI",
-        ],
+        default_factory=list,
         description=("API servers to enable. Options: (OAI, Kobold)"),
     )
 
 
+# TODO: Migrate config.yml to have the log_ prefix
+# This is a breaking change.
 class LoggingConfig(BaseModel):
     """Model for logging configuration."""
 
-    log_prompt: Optional[bool] = Field(False, description=("Enable prompt logging"))
+    log_prompt: Optional[bool] = Field(
+        False,
+        description=("Enable prompt logging"),
+        validation_alias=AliasChoices("log_prompt", "prompt"),
+    )
     log_generation_params: Optional[bool] = Field(
-        False, description=("Enable generation parameter logging")
+        False,
+        description=("Enable generation parameter logging"),
+        validation_alias=AliasChoices("log_generation_params", "generation_params"),
+    )
+    log_requests: Optional[bool] = Field(
+        False,
+        description=("Enable request logging"),
+        validation_alias=AliasChoices("log_requests", "requests"),
     )
-    log_requests: Optional[bool] = Field(False, description=("Enable request logging"))
 
 
 class ModelConfig(BaseModel):

From 5bfa952671409d49aac263135f944d1edb62b9bb Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Sat, 14 Sep 2024 22:05:11 -0400
Subject: [PATCH 20/51] Actions: Format

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/actions.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/common/actions.py b/common/actions.py
index 079a78d3..44fb236b 100644
--- a/common/actions.py
+++ b/common/actions.py
@@ -1,11 +1,14 @@
 import json
 from loguru import logger
+
+from common.config_models import generate_config_file
 from common.tabby_config import config
 from endpoints.server import export_openapi
-from common.config_models import generate_config_file
 
 
 def branch_to_actions() -> bool:
+    """Checks if a optional action needs to be run."""
+
     if config.actions.export_openapi:
         openapi_json = export_openapi()
 

From 92af6567052737d166d4d40877c3b4a0de682c5e Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Sun, 15 Sep 2024 17:50:37 +0100
Subject: [PATCH 21/51] improve config generation action

---
 common/actions.py       |  2 +-
 common/config_models.py | 84 +++++++++++++++++++++++++----------------
 2 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/common/actions.py b/common/actions.py
index 44fb236b..ebf55394 100644
--- a/common/actions.py
+++ b/common/actions.py
@@ -20,7 +20,7 @@ def branch_to_actions() -> bool:
             )
 
     elif config.actions.export_config:
-        generate_config_file(config.actions.config_export_path)
+        generate_config_file(filename=config.actions.config_export_path)
 
     else:
         # did not branch
diff --git a/common/config_models.py b/common/config_models.py
index bf2787af..23e7320a 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -1,11 +1,25 @@
-from pydantic import AliasChoices, BaseModel, ConfigDict, Field
+from pydantic import AliasChoices, BaseModel, ConfigDict, Field, PrivateAttr
 from typing import List, Literal, Optional, Union
 from pathlib import Path
 
+from pydantic_core import PydanticUndefined
+
 CACHE_SIZES = Literal["FP16", "Q8", "Q6", "Q4"]
 
 
-class ConfigOverrideConfig(BaseModel):
+class Metadata(BaseModel):
+    """metadata model for config options"""
+
+    include_in_config: Optional[bool] = Field(True)
+
+
+class BaseConfigModel(BaseModel):
+    """Base model for config models with added metadata"""
+
+    _metadata: Metadata = PrivateAttr(Metadata())
+
+
+class ConfigOverrideConfig(BaseConfigModel):
     """Model for overriding a provided config file."""
 
     # TODO: convert this to a pathlib.path?
@@ -13,8 +27,10 @@ class ConfigOverrideConfig(BaseModel):
         None, description=("Path to an overriding config.yml file")
     )
 
+    _metadata: Metadata = PrivateAttr(Metadata(include_in_config=False))
+
 
-class UtilityActions(BaseModel):
+class UtilityActions(BaseConfigModel):
     """Model used for arg actions."""
 
     # YAML export options
@@ -33,8 +49,10 @@ class UtilityActions(BaseModel):
         "openapi.json", description="path to export openapi schema to"
     )
 
+    _metadata: Metadata = PrivateAttr(Metadata(include_in_config=False))
 
-class NetworkConfig(BaseModel):
+
+class NetworkConfig(BaseConfigModel):
     """Model for network configuration."""
 
     host: Optional[str] = Field("127.0.0.1", description=("The IP to host on"))
@@ -54,7 +72,7 @@ class NetworkConfig(BaseModel):
 
 # TODO: Migrate config.yml to have the log_ prefix
 # This is a breaking change.
-class LoggingConfig(BaseModel):
+class LoggingConfig(BaseConfigModel):
     """Model for logging configuration."""
 
     log_prompt: Optional[bool] = Field(
@@ -74,7 +92,7 @@ class LoggingConfig(BaseModel):
     )
 
 
-class ModelConfig(BaseModel):
+class ModelConfig(BaseConfigModel):
     """Model for LLM configuration."""
 
     # TODO: convert this to a pathlib.path?
@@ -219,10 +237,11 @@ class ModelConfig(BaseModel):
         ),
     )
 
+    _metadata: Metadata = PrivateAttr(Metadata())
     model_config = ConfigDict(protected_namespaces=())
 
 
-class DraftModelConfig(BaseModel):
+class DraftModelConfig(BaseConfigModel):
     """Model for draft LLM model configuration."""
 
     # TODO: convert this to a pathlib.path?
@@ -262,7 +281,7 @@ class DraftModelConfig(BaseModel):
     )
 
 
-class LoraInstanceModel(BaseModel):
+class LoraInstanceModel(BaseConfigModel):
     """Model representing an instance of a Lora."""
 
     name: str = Field(..., description=("Name of the LoRA model"))
@@ -273,7 +292,7 @@ class LoraInstanceModel(BaseModel):
     )
 
 
-class LoraConfig(BaseModel):
+class LoraConfig(BaseConfigModel):
     """Model for lora configuration."""
 
     # TODO: convert this to a pathlib.path?
@@ -289,7 +308,7 @@ class LoraConfig(BaseModel):
     )
 
 
-class SamplingConfig(BaseModel):
+class SamplingConfig(BaseConfigModel):
     """Model for sampling (overrides) config."""
 
     override_preset: Optional[str] = Field(
@@ -297,7 +316,7 @@ class SamplingConfig(BaseModel):
     )
 
 
-class DeveloperConfig(BaseModel):
+class DeveloperConfig(BaseConfigModel):
     """Model for developer settings configuration."""
 
     unsafe_launch: Optional[bool] = Field(
@@ -321,7 +340,7 @@ class DeveloperConfig(BaseModel):
     )
 
 
-class EmbeddingsConfig(BaseModel):
+class EmbeddingsConfig(BaseConfigModel):
     """Model for embeddings configuration."""
 
     # TODO: convert this to a pathlib.path?
@@ -366,30 +385,29 @@ class TabbyConfigModel(BaseModel):
     model_config = ConfigDict(validate_assignment=True, protected_namespaces=())
 
 
-def generate_config_file(filename="config_sample.yml", indentation=2):
+def generate_config_file(
+    model: BaseConfigModel = None,
+    filename: str = "config_sample.yml",
+    indentation: int = 2,
+) -> None:
     """Creates a config.yml file from Pydantic models."""
 
-    schema = TabbyConfigModel.model_json_schema()
-
-    def dump_def(id: str, indent=2):
-        yaml = ""
-        indent = " " * indentation * indent
-        id = id.split("/")[-1]
-
-        section = schema["$defs"][id]["properties"]
-        for property in section.keys():  # get type
-            comment = section[property]["description"]
-            yaml += f"{indent}# {comment}\n"
-
-            value = section[property].get("default", "")
-            yaml += f"{indent}{property}: {value}\n\n"
-
-        return yaml + "\n"
-
+    schema = model if model else TabbyConfigModel()
     yaml = ""
-    for section in schema["properties"].keys():
-        yaml += f"{section}:\n"
-        yaml += dump_def(schema["properties"][section]["$ref"])
+
+    for field, field_data in schema.model_fields.items():
+        subfield_model = field_data.default_factory()
+        if not subfield_model._metadata.include_in_config:
+            continue
+
+        yaml += f"# {subfield_model.__doc__}\n"
+        yaml += f"{field}:\n"
+        for subfield, subfield_data in subfield_model.model_fields.items():
+            value = subfield_data.default
+            value = value if value is not None else ""
+            value = value if value is not PydanticUndefined else ""
+            yaml += f"{' ' * indentation}# {subfield_data.description}\n"
+            yaml += f"{' ' * indentation}{subfield}: {value}\n"
         yaml += "\n"
 
     with open(filename, "w") as f:

From 250d76f5c61409ea75bae8784b36aded09fef105 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Sun, 15 Sep 2024 23:06:18 -0400
Subject: [PATCH 22/51] Config: Alter YAML generator function

These changes fix the amount and order of newlines to look pleasing
for the user. However, the changes used in here are kind of hacky
and need a proper fix that can contain the same level of efficiency.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/config_models.py | 39 +++++++++++++++++++++++++++++++++++----
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index 23e7320a..36275058 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -1,6 +1,8 @@
+from inspect import getdoc
+from pathlib import Path
 from pydantic import AliasChoices, BaseModel, ConfigDict, Field, PrivateAttr
+from textwrap import dedent
 from typing import List, Literal, Optional, Union
-from pathlib import Path
 
 from pydantic_core import PydanticUndefined
 
@@ -385,6 +387,7 @@ class TabbyConfigModel(BaseModel):
     model_config = ConfigDict(validate_assignment=True, protected_namespaces=())
 
 
+# TODO: Possibly switch to ruamel.yaml for a more native implementation
 def generate_config_file(
     model: BaseConfigModel = None,
     filename: str = "config_sample.yml",
@@ -392,23 +395,51 @@ def generate_config_file(
 ) -> None:
     """Creates a config.yml file from Pydantic models."""
 
+    # Add a preamble
+    yaml = dedent("""
+    # Sample YAML file for configuration.
+    # Comment and uncomment values as needed.
+    # Every value has a default within the application.
+    # This file serves to be a drop in for config.yml
+
+    # Unless specified in the comments, DO NOT put these options in quotes!
+    # You can use https://www.yamllint.com/ if you want to check your YAML formatting.\n
+    """)
+
     schema = model if model else TabbyConfigModel()
-    yaml = ""
 
+    # TODO: Make the disordered iteration look cleaner
+    iter_once = False
     for field, field_data in schema.model_fields.items():
         subfield_model = field_data.default_factory()
+
         if not subfield_model._metadata.include_in_config:
             continue
 
-        yaml += f"# {subfield_model.__doc__}\n"
+        # Since the list is out of order with the length
+        # Add newlines from the beginning once one iteration finishes
+        # This is a sanity check for formatting
+        if iter_once:
+            yaml += "\n"
+        else:
+            iter_once = True
+
+        yaml += f"# {getdoc(subfield_model)}\n"
         yaml += f"{field}:\n"
+
+        sub_iter_once = False
         for subfield, subfield_data in subfield_model.model_fields.items():
+            # Same logic as iter_once
+            if sub_iter_once:
+                yaml += "\n"
+            else:
+                sub_iter_once = True
+
             value = subfield_data.default
             value = value if value is not None else ""
             value = value if value is not PydanticUndefined else ""
             yaml += f"{' ' * indentation}# {subfield_data.description}\n"
             yaml += f"{' ' * indentation}{subfield}: {value}\n"
-        yaml += "\n"
 
     with open(filename, "w") as f:
         f.write(yaml)

From 8ff9f2c6c0eb03d4bc5a494947a84d8dfcc0e0ef Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Sun, 15 Sep 2024 23:14:39 -0400
Subject: [PATCH 23/51] Config: Rewrite docstrings for models

Adheres to the old config.yml's descriptions and allows for newlines
in generated YAML.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/config_models.py | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index 36275058..cf7a4736 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -55,7 +55,7 @@ class UtilityActions(BaseConfigModel):
 
 
 class NetworkConfig(BaseConfigModel):
-    """Model for network configuration."""
+    """Options for networking"""
 
     host: Optional[str] = Field("127.0.0.1", description=("The IP to host on"))
     port: Optional[int] = Field(5000, description=("The port to host on"))
@@ -75,7 +75,7 @@ class NetworkConfig(BaseConfigModel):
 # TODO: Migrate config.yml to have the log_ prefix
 # This is a breaking change.
 class LoggingConfig(BaseConfigModel):
-    """Model for logging configuration."""
+    """Options for logging"""
 
     log_prompt: Optional[bool] = Field(
         False,
@@ -95,7 +95,11 @@ class LoggingConfig(BaseConfigModel):
 
 
 class ModelConfig(BaseConfigModel):
-    """Model for LLM configuration."""
+    """
+    Options for model overrides and loading
+    Please read the comments to understand how arguments are handled
+    between initial and API loads
+    """
 
     # TODO: convert this to a pathlib.path?
     model_dir: str = Field(
@@ -244,7 +248,10 @@ class ModelConfig(BaseConfigModel):
 
 
 class DraftModelConfig(BaseConfigModel):
-    """Model for draft LLM model configuration."""
+    """
+    Options for draft models (speculative decoding)
+    This will use more VRAM!
+    """
 
     # TODO: convert this to a pathlib.path?
     draft_model_dir: Optional[str] = Field(
@@ -295,7 +302,7 @@ class LoraInstanceModel(BaseConfigModel):
 
 
 class LoraConfig(BaseConfigModel):
-    """Model for lora configuration."""
+    """Options for Loras"""
 
     # TODO: convert this to a pathlib.path?
     lora_dir: Optional[str] = Field(
@@ -311,7 +318,7 @@ class LoraConfig(BaseConfigModel):
 
 
 class SamplingConfig(BaseConfigModel):
-    """Model for sampling (overrides) config."""
+    """Options for Sampling"""
 
     override_preset: Optional[str] = Field(
         None, description=("Select a sampler override preset")
@@ -319,7 +326,7 @@ class SamplingConfig(BaseConfigModel):
 
 
 class DeveloperConfig(BaseConfigModel):
-    """Model for developer settings configuration."""
+    """Options for development and experimentation"""
 
     unsafe_launch: Optional[bool] = Field(
         False, description=("Skip Exllamav2 version check")
@@ -343,7 +350,11 @@ class DeveloperConfig(BaseConfigModel):
 
 
 class EmbeddingsConfig(BaseConfigModel):
-    """Model for embeddings configuration."""
+    """
+    Options for embedding models and loading.
+    NOTE: Embeddings requires the "extras" feature to be installed
+    Install it via "pip install .[extras]"
+    """
 
     # TODO: convert this to a pathlib.path?
     embedding_model_dir: Optional[str] = Field(
@@ -424,7 +435,9 @@ def generate_config_file(
         else:
             iter_once = True
 
-        yaml += f"# {getdoc(subfield_model)}\n"
+        for line in getdoc(subfield_model).splitlines():
+            yaml += f"# {line}\n"
+
         yaml += f"{field}:\n"
 
         sub_iter_once = False

From 4c8bb42ec1470f081585fdbb632175968edec1f7 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Sun, 15 Sep 2024 23:18:19 -0400
Subject: [PATCH 24/51] Config: Reorder models

It makes sense for the LLM model groups to be clustered around
each other with the least used groups towards the bottom.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/config_models.py | 56 ++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index cf7a4736..637348cf 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -317,6 +317,32 @@ class LoraConfig(BaseConfigModel):
     )
 
 
+class EmbeddingsConfig(BaseConfigModel):
+    """
+    Options for embedding models and loading.
+    NOTE: Embeddings requires the "extras" feature to be installed
+    Install it via "pip install .[extras]"
+    """
+
+    # TODO: convert this to a pathlib.path?
+    embedding_model_dir: Optional[str] = Field(
+        "models",
+        description=(
+            "Overrides directory to look for embedding models (default: models)"
+        ),
+    )
+    embeddings_device: Optional[Literal["cpu", "auto", "cuda"]] = Field(
+        "cpu",
+        description=(
+            "Device to load embedding models on (default: cpu). Possible values: cpu, "
+            "auto, cuda. If using an AMD GPU, set this value to 'cuda'."
+        ),
+    )
+    embedding_model_name: Optional[str] = Field(
+        None, description=("The embeddings model to load")
+    )
+
+
 class SamplingConfig(BaseConfigModel):
     """Options for Sampling"""
 
@@ -349,32 +375,6 @@ class DeveloperConfig(BaseConfigModel):
     )
 
 
-class EmbeddingsConfig(BaseConfigModel):
-    """
-    Options for embedding models and loading.
-    NOTE: Embeddings requires the "extras" feature to be installed
-    Install it via "pip install .[extras]"
-    """
-
-    # TODO: convert this to a pathlib.path?
-    embedding_model_dir: Optional[str] = Field(
-        "models",
-        description=(
-            "Overrides directory to look for embedding models (default: models)"
-        ),
-    )
-    embeddings_device: Optional[Literal["cpu", "auto", "cuda"]] = Field(
-        "cpu",
-        description=(
-            "Device to load embedding models on (default: cpu). Possible values: cpu, "
-            "auto, cuda. If using an AMD GPU, set this value to 'cuda'."
-        ),
-    )
-    embedding_model_name: Optional[str] = Field(
-        None, description=("The embeddings model to load")
-    )
-
-
 class TabbyConfigModel(BaseModel):
     """Base model for a TabbyConfig."""
 
@@ -388,11 +388,11 @@ class TabbyConfigModel(BaseModel):
         default_factory=DraftModelConfig.model_construct
     )
     lora: LoraConfig = Field(default_factory=LoraConfig.model_construct)
-    sampling: SamplingConfig = Field(default_factory=SamplingConfig.model_construct)
-    developer: DeveloperConfig = Field(default_factory=DeveloperConfig.model_construct)
     embeddings: EmbeddingsConfig = Field(
         default_factory=EmbeddingsConfig.model_construct
     )
+    sampling: SamplingConfig = Field(default_factory=SamplingConfig.model_construct)
+    developer: DeveloperConfig = Field(default_factory=DeveloperConfig.model_construct)
     actions: UtilityActions = Field(default_factory=UtilityActions.model_construct)
 
     model_config = ConfigDict(validate_assignment=True, protected_namespaces=())

From 3340c3bf2f9456270390065a35c12b53a1c8739b Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 00:01:30 -0400
Subject: [PATCH 25/51] Config: Rewrite descriptions

This makes both config.yml and args more descriptive than before.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/config_models.py | 218 ++++++++++++++++++++++++++--------------
 1 file changed, 141 insertions(+), 77 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index 637348cf..e81b358d 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -1,6 +1,6 @@
 from inspect import getdoc
 from pathlib import Path
-from pydantic import AliasChoices, BaseModel, ConfigDict, Field, PrivateAttr
+from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
 from textwrap import dedent
 from typing import List, Literal, Optional, Union
 
@@ -57,18 +57,37 @@ class UtilityActions(BaseConfigModel):
 class NetworkConfig(BaseConfigModel):
     """Options for networking"""
 
-    host: Optional[str] = Field("127.0.0.1", description=("The IP to host on"))
-    port: Optional[int] = Field(5000, description=("The port to host on"))
+    host: Optional[str] = Field(
+        "127.0.0.1",
+        description=(
+            "The IP to host on (default: 127.0.0.1).\n"
+            "Use 0.0.0.0 to expose on all network adapters."
+        ),
+    )
+    port: Optional[int] = Field(
+        5000, description=("The port to host on (default: 5000).")
+    )
     disable_auth: Optional[bool] = Field(
-        False, description=("Disable HTTP token authentication with requests")
+        False,
+        description=(
+            "Disable HTTP token authentication with requests.\n"
+            "WARNING: This will make your instance vulnerable!\n"
+            "Turn on this option if you are ONLY connecting from localhost."
+        ),
     )
     send_tracebacks: Optional[bool] = Field(
         False,
-        description=("Decide whether to send error tracebacks over the API"),
+        description=(
+            "Send tracebacks over the API (default: False).\n"
+            "NOTE: Only enable this for debug purposes."
+        ),
     )
     api_servers: Optional[List[Literal["OAI", "Kobold"]]] = Field(
         default_factory=list,
-        description=("API servers to enable. Options: (OAI, Kobold)"),
+        description=(
+            'Select API servers to enable (default: ["OAI"]).\n'
+            "Possible values: OAI, Kobold."
+        ),
     )
 
 
@@ -79,18 +98,18 @@ class LoggingConfig(BaseConfigModel):
 
     log_prompt: Optional[bool] = Field(
         False,
-        description=("Enable prompt logging"),
-        validation_alias=AliasChoices("log_prompt", "prompt"),
+        description=("Enable prompt logging (default: False)."),
     )
     log_generation_params: Optional[bool] = Field(
         False,
-        description=("Enable generation parameter logging"),
-        validation_alias=AliasChoices("log_generation_params", "generation_params"),
+        description=("Enable generation parameter logging (default: False)."),
     )
     log_requests: Optional[bool] = Field(
         False,
-        description=("Enable request logging"),
-        validation_alias=AliasChoices("log_requests", "requests"),
+        description=(
+            "Enable request logging (default: False).\n"
+            "NOTE: Only use this for debugging!"
+        ),
     )
 
 
@@ -105,101 +124,117 @@ class ModelConfig(BaseConfigModel):
     model_dir: str = Field(
         "models",
         description=(
-            "Overrides the directory to look for models (default: models). Windows "
-            "users, do NOT put this path in quotes."
+            "Directory to look for models (default: models).\n"
+            "Windows users, do NOT put this path in quotes!"
+        ),
+    )
+    inline_model_loading: Optional[bool] = Field(
+        True,
+        description=(
+            "Allow direct loading of models "
+            "from a completion or chat completion request (default: False)."
         ),
     )
     use_dummy_models: Optional[bool] = Field(
         False,
         description=(
-            "Sends dummy model names when the models endpoint is queried. Enable this "
-            "if looking for specific OAI models."
+            "Sends dummy model names when the models endpoint is queried.\n"
+            "Enable this if the client is looking for specific OAI models."
         ),
     )
     model_name: Optional[str] = Field(
         None,
         description=(
-            "An initial model to load. Make sure the model is located in the model "
-            "directory! REQUIRED: This must be filled out to load a model on startup."
+            "An initial model to load.\n"
+            "Make sure the model is located in the model directory!\n"
+            "REQUIRED: This must be filled out to load a model on startup."
         ),
     )
     use_as_default: List[str] = Field(
         default_factory=list,
         description=(
-            "Names of args to use as a default fallback for API load requests "
-            "(default: []). Example: ['max_seq_len', 'cache_mode']"
+            "Names of args to use as a fallback for API load requests (default: []).\n"
+            "For example, if you always want cache_mode to be Q4 "
+            'instead of on the inital model load, add "cache_mode" to this array.\n'
+            "Example: ['max_seq_len', 'cache_mode']."
         ),
     )
     max_seq_len: Optional[int] = Field(
         None,
         description=(
-            "Max sequence length. Fetched from the model's base sequence length in "
-            "config.json by default."
+            "Max sequence length (default: Empty).\n"
+            "Fetched from the model's base sequence length in config.json by default."
         ),
         ge=0,
     )
     override_base_seq_len: Optional[int] = Field(
         None,
         description=(
-            "Overrides base model context length. WARNING: Only use this if the "
-            "model's base sequence length is incorrect."
+            "Overrides base model context length (default: Empty).\n"
+            "WARNING: Don't set this unless you know what you're doing!\n"
+            "Again, do NOT use this for configuring context length, "
+            "use max_seq_len above ^"
         ),
         ge=0,
     )
     tensor_parallel: Optional[bool] = Field(
         False,
         description=(
-            "Load model with tensor parallelism. Fallback to autosplit if GPU split "
-            "isn't provided."
+            "Load model with tensor parallelism.\n"
+            "Falls back to autosplit if GPU split isn't provided.\n"
+            "This ignores the gpu_split_auto value."
         ),
     )
     gpu_split_auto: Optional[bool] = Field(
         True,
         description=(
-            "Automatically allocate resources to GPUs (default: True). Not parsed for "
-            "single GPU users."
+            "Automatically allocate resources to GPUs (default: True).\n"
+            "Not parsed for single GPU users."
         ),
     )
     autosplit_reserve: List[int] = Field(
         [96],
         description=(
-            "Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0). "
+            "Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0).\n"
             "Represented as an array of MB per GPU."
         ),
     )
     gpu_split: List[float] = Field(
         default_factory=list,
         description=(
-            "An integer array of GBs of VRAM to split between GPUs (default: []). "
+            "An integer array of GBs of VRAM to split between GPUs (default: []).\n"
             "Used with tensor parallelism."
         ),
     )
     rope_scale: Optional[float] = Field(
         1.0,
         description=(
-            "Rope scale (default: 1.0). Same as compress_pos_emb. Only use if the "
-            "model was trained on long context with rope."
+            "Rope scale (default: 1.0).\n"
+            "Same as compress_pos_emb.\n"
+            "Use if the model was trained on long context with rope.\n"
+            "Leave blank to pull the value from the model."
         ),
     )
     rope_alpha: Optional[Union[float, Literal["auto"]]] = Field(
         1.0,
         description=(
-            "Rope alpha (default: 1.0). Same as alpha_value. Set to 'auto' to auto- "
-            "calculate."
+            "Rope alpha (default: 1.0).\n"
+            'Same as alpha_value. Set to "auto" to auto-calculate.'
         ),
     )
     cache_mode: Optional[CACHE_SIZES] = Field(
         "FP16",
         description=(
-            "Enable different cache modes for VRAM savings (default: FP16). Possible "
-            f"values: {str(CACHE_SIZES)[15:-1]}"
+            "Enable different cache modes for VRAM savings (default: FP16).\n"
+            f"Possible values: {str(CACHE_SIZES)[15:-1]}."
         ),
     )
     cache_size: Optional[int] = Field(
         None,
         description=(
-            "Size of the prompt cache to allocate (default: max_seq_len). Must be a "
-            "multiple of 256."
+            "Size of the prompt cache to allocate (default: max_seq_len).\n"
+            "Must be a multiple of 256 and can't be less than max_seq_len.\n"
+            "For CFG, set this to 2 * max_seq_len."
         ),
         multiple_of=256,
         gt=0,
@@ -207,39 +242,48 @@ class ModelConfig(BaseConfigModel):
     chunk_size: Optional[int] = Field(
         2048,
         description=(
-            "Chunk size for prompt ingestion (default: 2048). A lower value reduces "
-            "VRAM usage but decreases ingestion speed."
+            "Chunk size for prompt ingestion (default: 2048).\n"
+            "A lower value reduces VRAM usage but decreases ingestion speed.\n"
+            "NOTE: Effects vary depending on the model.\n"
+            "An ideal value is between 512 and 4096."
         ),
         gt=0,
     )
     max_batch_size: Optional[int] = Field(
         None,
         description=(
-            "Set the maximum number of prompts to process at one time (default: "
-            "None/Automatic). Automatically calculated if left blank."
+            "Set the maximum number of prompts to process at one time "
+            "(default: None/Automatic).\n"
+            "Automatically calculated if left blank.\n"
+            "NOTE: Only available for Nvidia ampere (30 series) and above GPUs."
         ),
         ge=1,
     )
     prompt_template: Optional[str] = Field(
         None,
         description=(
-            "Set the prompt template for this model. If empty, attempts to look for "
-            "the model's chat template."
+            "Set the prompt template for this model. (default: None)\n"
+            "If empty, attempts to look for the model's chat template.\n"
+            "If a model contains multiple templates in its tokenizer_config.json,\n"
+            "set prompt_template to the name of the template you want to use.\n"
+            "NOTE: Only works with chat completion message lists!"
         ),
     )
     num_experts_per_token: Optional[int] = Field(
         None,
         description=(
-            "Number of experts to use per token. Fetched from the model's "
-            "config.json. For MoE models only."
+            "Number of experts to use per token.\n"
+            "Fetched from the model's config.json if empty.\n"
+            "NOTE: For MoE models only.\n"
+            "WARNING: Don't set this unless you know what you're doing!"
         ),
         ge=1,
     )
     fasttensors: Optional[bool] = Field(
         False,
         description=(
-            "Enables fasttensors to possibly increase model loading speeds (default: "
-            "False)."
+            "Enables fasttensors to possibly increase model loading speeds "
+            "(default: False)."
         ),
     )
 
@@ -256,36 +300,35 @@ class DraftModelConfig(BaseConfigModel):
     # TODO: convert this to a pathlib.path?
     draft_model_dir: Optional[str] = Field(
         "models",
-        description=(
-            "Overrides the directory to look for draft models (default: models)"
-        ),
+        description=("Directory to look for draft models (default: models)"),
     )
     draft_model_name: Optional[str] = Field(
         None,
         description=(
-            "An initial draft model to load. Ensure the model is in the model"
-            "directory."
+            "An initial draft model to load.\n"
+            "Ensure the model is in the model directory."
         ),
     )
     draft_rope_scale: Optional[float] = Field(
         1.0,
         description=(
-            "Rope scale for draft models (default: 1.0). Same as compress_pos_emb. "
+            "Rope scale for draft models (default: 1.0).\n"
+            "Same as compress_pos_emb.\n"
             "Use if the draft model was trained on long context with rope."
         ),
     )
     draft_rope_alpha: Optional[float] = Field(
         None,
         description=(
-            "Rope alpha for draft models (default: None). Same as alpha_value. Leave "
-            "blank to auto-calculate the alpha value."
+            "Rope alpha for draft models (default: None).\n"
+            'Same as alpha_value. Set to "auto" to auto-calculate.'
         ),
     )
     draft_cache_mode: Optional[CACHE_SIZES] = Field(
         "FP16",
         description=(
-            "Cache mode for draft models to save VRAM (default: FP16). Possible "
-            f"values: {str(CACHE_SIZES)[15:-1]}"
+            "Cache mode for draft models to save VRAM (default: FP16).\n"
+            f"Possible values: {str(CACHE_SIZES)[15:-1]}."
         ),
     )
 
@@ -293,10 +336,10 @@ class DraftModelConfig(BaseConfigModel):
 class LoraInstanceModel(BaseConfigModel):
     """Model representing an instance of a Lora."""
 
-    name: str = Field(..., description=("Name of the LoRA model"))
+    name: str = Field(..., description=("Name of the LoRA model."))
     scaling: float = Field(
         1.0,
-        description=("Scaling factor for the LoRA model (default: 1.0)"),
+        description=("Scaling factor for the LoRA model (default: 1.0)."),
         ge=0,
     )
 
@@ -306,13 +349,13 @@ class LoraConfig(BaseConfigModel):
 
     # TODO: convert this to a pathlib.path?
     lora_dir: Optional[str] = Field(
-        "loras", description=("Directory to look for LoRAs (default: 'loras')")
+        "loras", description=("Directory to look for LoRAs (default: loras).")
     )
     loras: Optional[List[LoraInstanceModel]] = Field(
         None,
         description=(
-            "List of LoRAs to load and associated scaling factors (default scaling: "
-            "1.0)"
+            "List of LoRAs to load and associated scaling factors "
+            "(default scale: 1.0)."
         ),
     )
 
@@ -327,19 +370,20 @@ class EmbeddingsConfig(BaseConfigModel):
     # TODO: convert this to a pathlib.path?
     embedding_model_dir: Optional[str] = Field(
         "models",
-        description=(
-            "Overrides directory to look for embedding models (default: models)"
-        ),
+        description=("Directory to look for embedding models (default: models)."),
     )
     embeddings_device: Optional[Literal["cpu", "auto", "cuda"]] = Field(
         "cpu",
         description=(
-            "Device to load embedding models on (default: cpu). Possible values: cpu, "
-            "auto, cuda. If using an AMD GPU, set this value to 'cuda'."
+            "Device to load embedding models on (default: cpu).\n"
+            "Possible values: cpu, auto, cuda.\n"
+            "NOTE: It's recommended to load embedding models on the CPU.\n"
+            "If using an AMD GPU, set this value to 'cuda'."
         ),
     )
     embedding_model_name: Optional[str] = Field(
-        None, description=("The embeddings model to load")
+        None,
+        description=("An initial embedding model to load on the infinity backend."),
     )
 
 
@@ -347,7 +391,13 @@ class SamplingConfig(BaseConfigModel):
     """Options for Sampling"""
 
     override_preset: Optional[str] = Field(
-        None, description=("Select a sampler override preset")
+        None,
+        description=(
+            "Select a sampler override preset (default: None).\n"
+            "Find this in the sampler-overrides folder.\n"
+            "This overrides default fallbacks for sampler values "
+            "that are passed to the API."
+        ),
     )
 
 
@@ -355,22 +405,33 @@ class DeveloperConfig(BaseConfigModel):
     """Options for development and experimentation"""
 
     unsafe_launch: Optional[bool] = Field(
-        False, description=("Skip Exllamav2 version check")
+        False,
+        description=(
+            "Skip Exllamav2 version check (default: False).\n"
+            "WARNING: It's highly recommended to update your dependencies rather "
+            "than enabling this flag."
+        ),
     )
     disable_request_streaming: Optional[bool] = Field(
-        False, description=("Disables API request streaming")
+        False, description=("Disable API request streaming (default: False).")
     )
     cuda_malloc_backend: Optional[bool] = Field(
-        False, description=("Runs with the pytorch CUDA malloc backend")
+        False, description=("Enable the torch CUDA malloc backend (default: False).")
     )
     uvloop: Optional[bool] = Field(
-        False, description=("Run asyncio using Uvloop or Winloop")
+        False,
+        description=(
+            "Run asyncio using Uvloop or Winloop which can improve performance.\n"
+            "NOTE: It's recommended to enable this, but if something breaks "
+            "turn this off."
+        ),
     )
     realtime_process_priority: Optional[bool] = Field(
         False,
         description=(
-            "Set process to use a higher priority For realtime process priority, run "
-            "as administrator or sudo Otherwise, the priority will be set to high"
+            "Set process to use a higher priority.\n"
+            "For realtime process priority, run as administrator or sudo.\n"
+            "Otherwise, the priority will be set to high."
         ),
     )
 
@@ -451,7 +512,10 @@ def generate_config_file(
             value = subfield_data.default
             value = value if value is not None else ""
             value = value if value is not PydanticUndefined else ""
-            yaml += f"{' ' * indentation}# {subfield_data.description}\n"
+
+            for line in subfield_data.description.splitlines():
+                yaml += f"{' ' * indentation}# {line}\n"
+
             yaml += f"{' ' * indentation}{subfield}: {value}\n"
 
     with open(filename, "w") as f:

From b6dd21f737048ee4ed5b8d439bde099e84f585a3 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 00:52:25 -0400
Subject: [PATCH 26/51] Config: Handle default factories in config generation

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/config_models.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/common/config_models.py b/common/config_models.py
index e81b358d..2c888f89 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -509,7 +509,11 @@ def generate_config_file(
             else:
                 sub_iter_once = True
 
-            value = subfield_data.default
+            if subfield_data.default_factory:
+                value = subfield_data.default_factory()
+            else:
+                value = subfield_data.default
+
             value = value if value is not None else ""
             value = value if value is not PydanticUndefined else ""
 

From 564bdcf0a8e71f6c94011a45887dc1fdde505c0e Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Mon, 16 Sep 2024 14:12:47 +0100
Subject: [PATCH 27/51] add legacy config converter

---
 common/args.py          |  4 ++--
 common/config_models.py | 30 ++++++++++++++++++++----------
 common/tabby_config.py  | 38 +++++++++++++++++++++++++++++++++-----
 common/utils.py         | 18 ++++++++++++++++--
 4 files changed, 71 insertions(+), 19 deletions(-)

diff --git a/common/args.py b/common/args.py
index 7d2427f8..7c09646c 100644
--- a/common/args.py
+++ b/common/args.py
@@ -4,7 +4,7 @@
 from pydantic import BaseModel
 
 from common.config_models import TabbyConfigModel
-from common.utils import is_list_type
+from common.utils import is_list_type, unwrap_optional
 
 
 def add_field_to_group(group, field_name, field_type, field) -> None:
@@ -32,7 +32,7 @@ def init_argparser() -> argparse.ArgumentParser:
 
     # Loop through each top-level field in the config
     for field_name, field_info in TabbyConfigModel.model_fields.items():
-        field_type = field_info.annotation
+        field_type = unwrap_optional(field_info.annotation)
         group = parser.add_argument_group(
             field_name, description=f"Arguments for {field_name}"
         )
diff --git a/common/config_models.py b/common/config_models.py
index 2c888f89..653280b0 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -439,22 +439,32 @@ class DeveloperConfig(BaseConfigModel):
 class TabbyConfigModel(BaseModel):
     """Base model for a TabbyConfig."""
 
-    config: ConfigOverrideConfig = Field(
+    config: Optional[ConfigOverrideConfig] = Field(
         default_factory=ConfigOverrideConfig.model_construct
     )
-    network: NetworkConfig = Field(default_factory=NetworkConfig.model_construct)
-    logging: LoggingConfig = Field(default_factory=LoggingConfig.model_construct)
-    model: ModelConfig = Field(default_factory=ModelConfig.model_construct)
-    draft_model: DraftModelConfig = Field(
+    network: Optional[NetworkConfig] = Field(
+        default_factory=NetworkConfig.model_construct
+    )
+    logging: Optional[LoggingConfig] = Field(
+        default_factory=LoggingConfig.model_construct
+    )
+    model: Optional[ModelConfig] = Field(default_factory=ModelConfig.model_construct)
+    draft_model: Optional[DraftModelConfig] = Field(
         default_factory=DraftModelConfig.model_construct
     )
-    lora: LoraConfig = Field(default_factory=LoraConfig.model_construct)
-    embeddings: EmbeddingsConfig = Field(
+    lora: Optional[LoraConfig] = Field(default_factory=LoraConfig.model_construct)
+    embeddings: Optional[EmbeddingsConfig] = Field(
         default_factory=EmbeddingsConfig.model_construct
     )
-    sampling: SamplingConfig = Field(default_factory=SamplingConfig.model_construct)
-    developer: DeveloperConfig = Field(default_factory=DeveloperConfig.model_construct)
-    actions: UtilityActions = Field(default_factory=UtilityActions.model_construct)
+    sampling: Optional[SamplingConfig] = Field(
+        default_factory=SamplingConfig.model_construct
+    )
+    developer: Optional[DeveloperConfig] = Field(
+        default_factory=DeveloperConfig.model_construct
+    )
+    actions: Optional[UtilityActions] = Field(
+        default_factory=UtilityActions.model_construct
+    )
 
     model_config = ConfigDict(validate_assignment=True, protected_namespaces=())
 
diff --git a/common/tabby_config.py b/common/tabby_config.py
index 2f0481d9..1dacac00 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -2,10 +2,10 @@
 import pathlib
 from loguru import logger
 from typing import Optional
-from os import getenv
+from os import getenv, replace
 
 from common.utils import unwrap, merge_dicts
-from common.config_models import TabbyConfigModel
+from common.config_models import TabbyConfigModel, generate_config_file
 
 
 class TabbyConfig(TabbyConfigModel):
@@ -46,10 +46,25 @@ def load(self, arguments: Optional[dict] = None):
     def _from_file(self, config_path: pathlib.Path):
         """loads config from a given file path"""
 
+        legacy = False
+        cfg = {}
+
         # try loading from file
         try:
             with open(str(config_path.resolve()), "r", encoding="utf8") as config_file:
-                return unwrap(yaml.safe_load(config_file), {})
+                cfg = yaml.safe_load(config_file)
+
+                # FIXME: remove legacy config mapper
+                # load legacy config files
+                model = cfg.get("model", {})
+
+                if model.get("draft"):
+                    legacy = True
+                    cfg["draft"] = model["draft"]
+                if model.get("lora"):
+                    legacy = True
+                    cfg["lora"] = model["lora"]
+
         except FileNotFoundError:
             logger.info(f"The '{config_path.name}' file cannot be found")
         except Exception as exc:
@@ -58,8 +73,21 @@ def _from_file(self, config_path: pathlib.Path):
                 f"the following error:\n\n{exc}"
             )
 
-        # if no config file was loaded
-        return {}
+        if legacy:
+            logger.warning(
+                "legacy config.yml files are deprecated"
+                "Please upadte to the new version"
+                "Attempting auto migrationy"
+            )
+            new_cfg = TabbyConfigModel.model_validate(cfg)
+
+            try:
+                replace(config_path, f"{config_path}.bak")
+                generate_config_file(model=new_cfg, filename=config_path)
+            except Exception as e:
+                logger.error(f"Auto migration failed: {e}")
+
+        return unwrap(cfg, {})
 
     def _from_args(self, args: dict):
         """loads config from the provided arguments"""
diff --git a/common/utils.py b/common/utils.py
index d933fb60..acc0fc9b 100644
--- a/common/utils.py
+++ b/common/utils.py
@@ -1,6 +1,7 @@
 """Common utility functions"""
 
-from typing import get_args, get_origin
+from types import NoneType
+from typing import Optional, Type, Union, get_args, get_origin
 
 
 def unwrap(wrapped, default=None):
@@ -47,7 +48,7 @@ def flat_map(input_list):
     return [item for sublist in input_list for item in sublist]
 
 
-def is_list_type(type_hint):
+def is_list_type(type_hint) -> bool:
     """Checks if a type contains a list."""
 
     if get_origin(type_hint) is list:
@@ -59,3 +60,16 @@ def is_list_type(type_hint):
         return any(is_list_type(arg) for arg in type_args)
 
     return False
+
+
+def unwrap_optional(type_hint) -> Type:
+    """unwrap Optional[type] annotations"""
+
+    if get_origin(type_hint) is Union:
+        args = get_args(type_hint)
+        if NoneType in args:
+            for arg in args:
+                if arg is not NoneType:
+                    return arg
+
+    return type_hint

From 7f030034374a14bcf0b1a0260f5ef67e5aec2906 Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Mon, 16 Sep 2024 14:18:54 +0100
Subject: [PATCH 28/51] rephrase info message

---
 common/tabby_config.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/common/tabby_config.py b/common/tabby_config.py
index 1dacac00..8e4d2c52 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -75,9 +75,9 @@ def _from_file(self, config_path: pathlib.Path):
 
         if legacy:
             logger.warning(
-                "legacy config.yml files are deprecated"
-                "Please upadte to the new version"
-                "Attempting auto migrationy"
+                "legacy config.yml files are deprecated, "
+                "please upadte to the new version.\n"
+                "Attempting auto migration"
             )
             new_cfg = TabbyConfigModel.model_validate(cfg)
 

From 81ae461eb84d1f1128580647aad1a73d1f4d68fd Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 12:19:58 -0400
Subject: [PATCH 29/51] Config: Allow existing values to get included in
 generated file

Allows for generation from an existing config file. Primarily used
for migration purposes.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/args.py          |  4 ++--
 common/config_models.py | 17 +++++++++++++----
 common/utils.py         |  7 +++++--
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/common/args.py b/common/args.py
index 7c09646c..38635801 100644
--- a/common/args.py
+++ b/common/args.py
@@ -4,7 +4,7 @@
 from pydantic import BaseModel
 
 from common.config_models import TabbyConfigModel
-from common.utils import is_list_type, unwrap_optional
+from common.utils import is_list_type, unwrap_optional_type
 
 
 def add_field_to_group(group, field_name, field_type, field) -> None:
@@ -32,7 +32,7 @@ def init_argparser() -> argparse.ArgumentParser:
 
     # Loop through each top-level field in the config
     for field_name, field_info in TabbyConfigModel.model_fields.items():
-        field_type = unwrap_optional(field_info.annotation)
+        field_type = unwrap_optional_type(field_info.annotation)
         group = parser.add_argument_group(
             field_name, description=f"Arguments for {field_name}"
         )
diff --git a/common/config_models.py b/common/config_models.py
index 653280b0..52f4f433 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -1,10 +1,11 @@
 from inspect import getdoc
 from pathlib import Path
 from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
+from pydantic_core import PydanticUndefined
 from textwrap import dedent
 from typing import List, Literal, Optional, Union
 
-from pydantic_core import PydanticUndefined
+from common.utils import unwrap
 
 CACHE_SIZES = Literal["FP16", "Q8", "Q6", "Q4"]
 
@@ -488,12 +489,17 @@ def generate_config_file(
     # You can use https://www.yamllint.com/ if you want to check your YAML formatting.\n
     """)
 
-    schema = model if model else TabbyConfigModel()
+    schema = unwrap(model, TabbyConfigModel())
 
     # TODO: Make the disordered iteration look cleaner
     iter_once = False
     for field, field_data in schema.model_fields.items():
-        subfield_model = field_data.default_factory()
+        # Fetch from the existing model class if it's passed
+        # Probably can use this on schema too, but play it safe
+        if model:
+            subfield_model = getattr(model, field, None)
+        else:
+            subfield_model = field_data.default_factory()
 
         if not subfield_model._metadata.include_in_config:
             continue
@@ -519,7 +525,10 @@ def generate_config_file(
             else:
                 sub_iter_once = True
 
-            if subfield_data.default_factory:
+            # If a value already exists, use it
+            if hasattr(subfield_model, subfield):
+                value = getattr(subfield_model, subfield)
+            elif subfield_data.default_factory:
                 value = subfield_data.default_factory()
             else:
                 value = subfield_data.default
diff --git a/common/utils.py b/common/utils.py
index acc0fc9b..77958ced 100644
--- a/common/utils.py
+++ b/common/utils.py
@@ -62,8 +62,11 @@ def is_list_type(type_hint) -> bool:
     return False
 
 
-def unwrap_optional(type_hint) -> Type:
-    """unwrap Optional[type] annotations"""
+def unwrap_optional_type(type_hint) -> Type:
+    """
+    Unwrap Optional[type] annotations.
+    This is not the same as unwrap.
+    """
 
     if get_origin(type_hint) is Union:
         args = get_args(type_hint)

From c715094cdced9b9c810919461db901e16db859e2 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 12:35:00 -0400
Subject: [PATCH 30/51] Config: Add logging config to migration checks

These keys were changed as well to include a "log_" prefix like the
CLI arguments.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/tabby_config.py | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/common/tabby_config.py b/common/tabby_config.py
index 8e4d2c52..a1f258b3 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -54,17 +54,31 @@ def _from_file(self, config_path: pathlib.Path):
             with open(str(config_path.resolve()), "r", encoding="utf8") as config_file:
                 cfg = yaml.safe_load(config_file)
 
-                # FIXME: remove legacy config mapper
+                # NOTE: Remove migration wrapper after a period of time
                 # load legacy config files
-                model = cfg.get("model", {})
 
-                if model.get("draft"):
-                    legacy = True
-                    cfg["draft"] = model["draft"]
-                if model.get("lora"):
+                # Model config migration
+                model_cfg = unwrap(cfg.get("model"), {})
+
+                if model_cfg.get("draft"):
                     legacy = True
-                    cfg["lora"] = model["lora"]
+                    cfg["draft"] = model_cfg["draft"]
 
+                if model_cfg.get("lora"):
+                    legacy = True
+                    cfg["lora"] = model_cfg["lora"]
+
+                # Logging config migration
+                # This will catch the majority of legacy config files
+                logging_cfg = unwrap(cfg.get("logging"), {})
+                unmigrated_log_keys = [
+                    key for key in logging_cfg.keys() if not key.startswith("log_")
+                ]
+                if unmigrated_log_keys:
+                    legacy = True
+                    for key in unmigrated_log_keys:
+                        cfg["logging"][f"log_{key}"] = cfg["logging"][key]
+                        del cfg["logging"][key]
         except FileNotFoundError:
             logger.info(f"The '{config_path.name}' file cannot be found")
         except Exception as exc:
@@ -79,6 +93,8 @@ def _from_file(self, config_path: pathlib.Path):
                 "please upadte to the new version.\n"
                 "Attempting auto migration"
             )
+
+            # Create a temporary base config model
             new_cfg = TabbyConfigModel.model_validate(cfg)
 
             try:

From e60c4ba5bcc955ccba7e50045f97f8ef88330896 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 17:51:40 -0400
Subject: [PATCH 31/51] Config: Fix existing value check

If a sub-field exists in the model provided to the file generator,
use it. Otherwise always fallback to the default factory. This prevents
any subsequent errors from setting None.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/config_models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index 52f4f433..bbe2d85d 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -496,8 +496,8 @@ def generate_config_file(
     for field, field_data in schema.model_fields.items():
         # Fetch from the existing model class if it's passed
         # Probably can use this on schema too, but play it safe
-        if model:
-            subfield_model = getattr(model, field, None)
+        if model and hasattr(model, field):
+            subfield_model = getattr(model, field)
         else:
             subfield_model = field_data.default_factory()
 

From ebe7f3567e5bc633403c9a1bf455db7d7abcae26 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 18:02:18 -0400
Subject: [PATCH 32/51] Config: Alter migration error handling and cleanup

Rollback to the old config if automigration fails.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/tabby_config.py | 11 ++++++-----
 common/utils.py        |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/common/tabby_config.py b/common/tabby_config.py
index a1f258b3..13b87184 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -2,7 +2,7 @@
 import pathlib
 from loguru import logger
 from typing import Optional
-from os import getenv, replace
+from os import getenv
 
 from common.utils import unwrap, merge_dicts
 from common.config_models import TabbyConfigModel, generate_config_file
@@ -98,11 +98,15 @@ def _from_file(self, config_path: pathlib.Path):
             new_cfg = TabbyConfigModel.model_validate(cfg)
 
             try:
-                replace(config_path, f"{config_path}.bak")
+                config_path.rename(f"{config_path}.bak")
                 generate_config_file(model=new_cfg, filename=config_path)
             except Exception as e:
                 logger.error(f"Auto migration failed: {e}")
 
+                # Restore the old config
+                config_path.unlink(missing_ok=True)
+                pathlib.Path(f"{config_path}.bak").rename(config_path)
+
         return unwrap(cfg, {})
 
     def _from_args(self, args: dict):
@@ -118,9 +122,6 @@ def _from_args(self, args: dict):
         for key in TabbyConfigModel.model_fields.keys():
             override = args.get(key)
             if override:
-                if key == "logging":
-                    # Strip the "log_" prefix from logging keys if present
-                    override = {k.replace("log_", ""): v for k, v in override.items()}
                 config[key] = override
 
         return config
diff --git a/common/utils.py b/common/utils.py
index 77958ced..f8b46711 100644
--- a/common/utils.py
+++ b/common/utils.py
@@ -1,7 +1,7 @@
 """Common utility functions"""
 
 from types import NoneType
-from typing import Optional, Type, Union, get_args, get_origin
+from typing import Type, Union, get_args, get_origin
 
 
 def unwrap(wrapped, default=None):

From d2d07ed92d9eb8bc245023fcde8c19f49e437c77 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 18:15:50 -0400
Subject: [PATCH 33/51] Config: Update auto-migration flow

- Let the user know that migration is going to be attempted
- Have a more informative error message if auto-migration fails
- Revert back to the old config file on failure
- Don't load with a partially parsed config

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/tabby_config.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/common/tabby_config.py b/common/tabby_config.py
index 13b87184..8b92f843 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -89,9 +89,7 @@ def _from_file(self, config_path: pathlib.Path):
 
         if legacy:
             logger.warning(
-                "legacy config.yml files are deprecated, "
-                "please upadte to the new version.\n"
-                "Attempting auto migration"
+                "Legacy config.yml file detected. Attempting auto-migration."
             )
 
             # Create a temporary base config model
@@ -100,13 +98,27 @@ def _from_file(self, config_path: pathlib.Path):
             try:
                 config_path.rename(f"{config_path}.bak")
                 generate_config_file(model=new_cfg, filename=config_path)
+                logger.info(
+                    "Auto-migration successful. "
+                    'The old configuration is stored in "config.yml.bak".'
+                )
             except Exception as e:
-                logger.error(f"Auto migration failed: {e}")
+                logger.error(
+                    f"Auto-migration failed because of: {e}\n\n"
+                    "Reverted all changes.\n"
+                    "Either fix your config.yml and restart or\n"
+                    "Delete your old YAML file and create a new "
+                    'config by copying "config_sample.yml" to "config.yml".'
+                )
 
                 # Restore the old config
                 config_path.unlink(missing_ok=True)
                 pathlib.Path(f"{config_path}.bak").rename(config_path)
 
+                # Don't use the partially loaded config
+                logger.warning("Starting with no config loaded.")
+                return {}
+
         return unwrap(cfg, {})
 
     def _from_args(self, args: dict):

From 46f9fff2104623cf5d5301c48882e4cb49ee4d30 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 22:22:24 -0400
Subject: [PATCH 34/51] Config: Move config file generation to tabby_config

Keep the models as a separate reference file without any extra
functions.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/actions.py       |  5 +--
 common/config_models.py | 79 ----------------------------------------
 common/tabby_config.py  | 80 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 80 insertions(+), 84 deletions(-)

diff --git a/common/actions.py b/common/actions.py
index ebf55394..c7f0a717 100644
--- a/common/actions.py
+++ b/common/actions.py
@@ -1,8 +1,7 @@
 import json
 from loguru import logger
 
-from common.config_models import generate_config_file
-from common.tabby_config import config
+from common.tabby_config import config, generate_config_file
 from endpoints.server import export_openapi
 
 
@@ -18,10 +17,8 @@ def branch_to_actions() -> bool:
                 "Successfully wrote OpenAPI spec to "
                 + f"{config.actions.openapi_export_path}"
             )
-
     elif config.actions.export_config:
         generate_config_file(filename=config.actions.config_export_path)
-
     else:
         # did not branch
         return False
diff --git a/common/config_models.py b/common/config_models.py
index bbe2d85d..58ce7542 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -1,11 +1,7 @@
-from inspect import getdoc
 from pathlib import Path
 from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
-from pydantic_core import PydanticUndefined
-from textwrap import dedent
 from typing import List, Literal, Optional, Union
 
-from common.utils import unwrap
 
 CACHE_SIZES = Literal["FP16", "Q8", "Q6", "Q4"]
 
@@ -468,78 +464,3 @@ class TabbyConfigModel(BaseModel):
     )
 
     model_config = ConfigDict(validate_assignment=True, protected_namespaces=())
-
-
-# TODO: Possibly switch to ruamel.yaml for a more native implementation
-def generate_config_file(
-    model: BaseConfigModel = None,
-    filename: str = "config_sample.yml",
-    indentation: int = 2,
-) -> None:
-    """Creates a config.yml file from Pydantic models."""
-
-    # Add a preamble
-    yaml = dedent("""
-    # Sample YAML file for configuration.
-    # Comment and uncomment values as needed.
-    # Every value has a default within the application.
-    # This file serves to be a drop in for config.yml
-
-    # Unless specified in the comments, DO NOT put these options in quotes!
-    # You can use https://www.yamllint.com/ if you want to check your YAML formatting.\n
-    """)
-
-    schema = unwrap(model, TabbyConfigModel())
-
-    # TODO: Make the disordered iteration look cleaner
-    iter_once = False
-    for field, field_data in schema.model_fields.items():
-        # Fetch from the existing model class if it's passed
-        # Probably can use this on schema too, but play it safe
-        if model and hasattr(model, field):
-            subfield_model = getattr(model, field)
-        else:
-            subfield_model = field_data.default_factory()
-
-        if not subfield_model._metadata.include_in_config:
-            continue
-
-        # Since the list is out of order with the length
-        # Add newlines from the beginning once one iteration finishes
-        # This is a sanity check for formatting
-        if iter_once:
-            yaml += "\n"
-        else:
-            iter_once = True
-
-        for line in getdoc(subfield_model).splitlines():
-            yaml += f"# {line}\n"
-
-        yaml += f"{field}:\n"
-
-        sub_iter_once = False
-        for subfield, subfield_data in subfield_model.model_fields.items():
-            # Same logic as iter_once
-            if sub_iter_once:
-                yaml += "\n"
-            else:
-                sub_iter_once = True
-
-            # If a value already exists, use it
-            if hasattr(subfield_model, subfield):
-                value = getattr(subfield_model, subfield)
-            elif subfield_data.default_factory:
-                value = subfield_data.default_factory()
-            else:
-                value = subfield_data.default
-
-            value = value if value is not None else ""
-            value = value if value is not PydanticUndefined else ""
-
-            for line in subfield_data.description.splitlines():
-                yaml += f"{' ' * indentation}# {line}\n"
-
-            yaml += f"{' ' * indentation}{subfield}: {value}\n"
-
-    with open(filename, "w") as f:
-        f.write(yaml)
diff --git a/common/tabby_config.py b/common/tabby_config.py
index 8b92f843..8bec9ede 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -1,11 +1,14 @@
 import yaml
 import pathlib
+from inspect import getdoc
+from pydantic_core import PydanticUndefined
 from loguru import logger
+from textwrap import dedent
 from typing import Optional
 from os import getenv
 
 from common.utils import unwrap, merge_dicts
-from common.config_models import TabbyConfigModel, generate_config_file
+from common.config_models import BaseConfigModel, TabbyConfigModel
 
 
 class TabbyConfig(TabbyConfigModel):
@@ -159,3 +162,78 @@ def _from_environment(self):
 
 # Create an empty instance of the config class
 config: TabbyConfig = TabbyConfig()
+
+
+# TODO: Possibly switch to ruamel.yaml for a more native implementation
+def generate_config_file(
+    model: BaseConfigModel = None,
+    filename: str = "config_sample.yml",
+    indentation: int = 2,
+) -> None:
+    """Creates a config.yml file from Pydantic models."""
+
+    # Add a preamble
+    yaml = dedent("""
+    # Sample YAML file for configuration.
+    # Comment and uncomment values as needed.
+    # Every value has a default within the application.
+    # This file serves to be a drop in for config.yml
+
+    # Unless specified in the comments, DO NOT put these options in quotes!
+    # You can use https://www.yamllint.com/ if you want to check your YAML formatting.\n
+    """)
+
+    schema = unwrap(model, TabbyConfigModel())
+
+    # TODO: Make the disordered iteration look cleaner
+    iter_once = False
+    for field, field_data in schema.model_fields.items():
+        # Fetch from the existing model class if it's passed
+        # Probably can use this on schema too, but play it safe
+        if model and hasattr(model, field):
+            subfield_model = getattr(model, field)
+        else:
+            subfield_model = field_data.default_factory()
+
+        if not subfield_model._metadata.include_in_config:
+            continue
+
+        # Since the list is out of order with the length
+        # Add newlines from the beginning once one iteration finishes
+        # This is a sanity check for formatting
+        if iter_once:
+            yaml += "\n"
+        else:
+            iter_once = True
+
+        for line in getdoc(subfield_model).splitlines():
+            yaml += f"# {line}\n"
+
+        yaml += f"{field}:\n"
+
+        sub_iter_once = False
+        for subfield, subfield_data in subfield_model.model_fields.items():
+            # Same logic as iter_once
+            if sub_iter_once:
+                yaml += "\n"
+            else:
+                sub_iter_once = True
+
+            # If a value already exists, use it
+            if hasattr(subfield_model, subfield):
+                value = getattr(subfield_model, subfield)
+            elif subfield_data.default_factory:
+                value = subfield_data.default_factory()
+            else:
+                value = subfield_data.default
+
+            value = value if value is not None else ""
+            value = value if value is not PydanticUndefined else ""
+
+            for line in subfield_data.description.splitlines():
+                yaml += f"{' ' * indentation}# {line}\n"
+
+            yaml += f"{' ' * indentation}{subfield}: {value}\n"
+
+    with open(filename, "w") as f:
+        f.write(yaml)

From 06a798d96841c554e5761f89407c2645109f3379 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 22:24:42 -0400
Subject: [PATCH 35/51] Main: Remove debug print statement for config object

Signed-off-by: kingbri <bdashore3@proton.me>
---
 main.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/main.py b/main.py
index 429c0e83..e340b70b 100644
--- a/main.py
+++ b/main.py
@@ -118,7 +118,6 @@ def entrypoint(arguments: Optional[dict] = None):
 
     # Check exllamav2 version and give a descriptive error if it's too old
     # Skip if launching unsafely
-    print(f"MAIN.PY {config=}")
     if config.developer.unsafe_launch:
         logger.warning(
             "UNSAFE: Skipping ExllamaV2 version check.\n"

From 26ad0ef744507ca201940e962bc96e20062f3364 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 22:42:59 -0400
Subject: [PATCH 36/51] API: Fix model info reporting

A deprecated preferences global var was being referenced.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 endpoints/core/utils/model.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/endpoints/core/utils/model.py b/endpoints/core/utils/model.py
index 5a77802b..d151fdd1 100644
--- a/endpoints/core/utils/model.py
+++ b/endpoints/core/utils/model.py
@@ -2,8 +2,9 @@
 from asyncio import CancelledError
 from typing import Optional
 
-from common import gen_logging, model
+from common import model
 from common.networking import get_generator_error, handle_request_disconnect
+from common.tabby_config import config
 from common.utils import unwrap
 from endpoints.core.types.model import (
     ModelCard,
@@ -77,7 +78,7 @@ def get_current_model():
     model_card = ModelCard(
         id=unwrap(model_params.pop("name", None), "unknown"),
         parameters=ModelCardParameters.model_validate(model_params),
-        logging=gen_logging.PREFERENCES,
+        logging=config.logging,
     )
 
     if draft_model_params:

From 8e6b8bd8422fb57fbab1639f54044396ee3a55c8 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 22:48:13 -0400
Subject: [PATCH 37/51] Update .gitignore

Ignore all "backup" files

Signed-off-by: kingbri <bdashore3@proton.me>
---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 5e5c9ee5..1d71e65c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -213,3 +213,6 @@ openapi.json
 
 # Infinity-emb cache
 .infinity_cache/
+
+# Backup files
+*.bak

From f6fb60a6edb0d131f8b40196f4eeacddb799aa2a Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 22:54:35 -0400
Subject: [PATCH 38/51] Config: Inline model loading is False

This is not a True default.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/config_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/config_models.py b/common/config_models.py
index 58ce7542..c1d4ad0c 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -126,7 +126,7 @@ class ModelConfig(BaseConfigModel):
         ),
     )
     inline_model_loading: Optional[bool] = Field(
-        True,
+        False,
         description=(
             "Allow direct loading of models "
             "from a completion or chat completion request (default: False)."

From ececce172ec04298b83b5551d65242795a8c74b7 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 23:06:01 -0400
Subject: [PATCH 39/51] Config: Fix addition of preamble

Remove the extraneous newlines from the beginning of the preamble.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/tabby_config.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/common/tabby_config.py b/common/tabby_config.py
index 8bec9ede..01ad200e 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -172,8 +172,8 @@ def generate_config_file(
 ) -> None:
     """Creates a config.yml file from Pydantic models."""
 
-    # Add a preamble
-    yaml = dedent("""
+    # Add a cleaned up preamble
+    preamble = """
     # Sample YAML file for configuration.
     # Comment and uncomment values as needed.
     # Every value has a default within the application.
@@ -181,7 +181,10 @@ def generate_config_file(
 
     # Unless specified in the comments, DO NOT put these options in quotes!
     # You can use https://www.yamllint.com/ if you want to check your YAML formatting.\n
-    """)
+    """
+
+    # Trim and cleanup preamble
+    yaml = dedent(preamble).lstrip()
 
     schema = unwrap(model, TabbyConfigModel())
 

From 852ea8faaaa2d066dff7e7ecd25aadb16506a35b Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 23:29:07 -0400
Subject: [PATCH 40/51] Config: Don't load from file if actions present

Loading from file adds extra overhead for actions that don't rely
on file loading.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/tabby_config.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/common/tabby_config.py b/common/tabby_config.py
index 01ad200e..5491b9c8 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -20,11 +20,13 @@ def load(self, arguments: Optional[dict] = None):
         """Synchronously loads the global application config"""
 
         # config is applied in order of items in the list
-        configs = [
-            self._from_file(pathlib.Path("config.yml")),
-            self._from_environment(),
-            self._from_args(unwrap(arguments, {})),
-        ]
+        arguments_dict = unwrap(arguments, {})
+        configs = [self._from_environment(), self._from_args(arguments_dict)]
+
+        # If actions aren't present, also look from the file
+        # TODO: Change logic if file loading requires actions in the future
+        if not arguments_dict.get("actions"):
+            configs.insert(0, self._from_file(pathlib.Path("config.yml")))
 
         merged_config = merge_dicts(*configs)
 

From 63f8c46a9249b6714da9fc53c1622798cc79ec92 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 23:29:39 -0400
Subject: [PATCH 41/51] Config: Make a better description for lora config

This is not ideal because users may still have trouble understanding
what a lora includes, but adding an example comment will help instead
of leaving a blank line.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/config_models.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index c1d4ad0c..fff248eb 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -333,7 +333,7 @@ class DraftModelConfig(BaseConfigModel):
 class LoraInstanceModel(BaseConfigModel):
     """Model representing an instance of a Lora."""
 
-    name: str = Field(..., description=("Name of the LoRA model."))
+    name: Optional[str] = Field(None, description=("Name of the LoRA model."))
     scaling: float = Field(
         1.0,
         description=("Scaling factor for the LoRA model (default: 1.0)."),
@@ -352,7 +352,10 @@ class LoraConfig(BaseConfigModel):
         None,
         description=(
             "List of LoRAs to load and associated scaling factors "
-            "(default scale: 1.0)."
+            "(default scale: 1.0).\n"
+            "For the YAML file, add each entry as a YAML list:\n"
+            "- name: lora1\n"
+            "  scaling: 1.0"
         ),
     )
 

From 7fe0dbd62f7a8d66d3ca5eb7f8ade52d98e91c98 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Mon, 16 Sep 2024 23:32:54 -0400
Subject: [PATCH 42/51] Tree: Update config_sample

Uses the new YAML generator.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 config_sample.yml | 346 ++++++++++++++++++++++------------------------
 1 file changed, 165 insertions(+), 181 deletions(-)

diff --git a/config_sample.yml b/config_sample.yml
index 3b4f2479..bd790dbf 100644
--- a/config_sample.yml
+++ b/config_sample.yml
@@ -1,5 +1,6 @@
 # Sample YAML file for configuration.
-# Comment and uncomment values as needed. Every value has a default within the application.
+# Comment and uncomment values as needed.
+# Every value has a default within the application.
 # This file serves to be a drop in for config.yml
 
 # Unless specified in the comments, DO NOT put these options in quotes!
@@ -8,225 +9,208 @@
 # Options for networking
 network:
   # The IP to host on (default: 127.0.0.1).
-  # Use 0.0.0.0 to expose on all network adapters
+  # Use 0.0.0.0 to expose on all network adapters.
   host: 127.0.0.1
 
-  # The port to host on (default: 5000)
+  # The port to host on (default: 5000).
   port: 5000
 
-  # Disable HTTP token authenticaion with requests
+  # Disable HTTP token authentication with requests.
   # WARNING: This will make your instance vulnerable!
-  # Turn on this option if you are ONLY connecting from localhost
+  # Turn on this option if you are ONLY connecting from localhost.
   disable_auth: False
 
-  # Send tracebacks over the API to clients (default: False)
-  # NOTE: Only enable this for debug purposes
+  # Send tracebacks over the API (default: False).
+  # NOTE: Only enable this for debug purposes.
   send_tracebacks: False
 
-  # Select API servers to enable (default: ["OAI"])
-  # Possible values: OAI
-  api_servers: ["OAI"]
+  # Select API servers to enable (default: ["OAI"]).
+  # Possible values: OAI, Kobold.
+  api_servers: []
 
 # Options for logging
 logging:
-  # Enable prompt logging (default: False)
-  prompt: False
+  # Enable prompt logging (default: False).
+  log_prompt: False
 
-  # Enable generation parameter logging (default: False)
-  generation_params: False
+  # Enable generation parameter logging (default: False).
+  log_generation_params: False
 
-  # Enable request logging (default: False)
+  # Enable request logging (default: False).
   # NOTE: Only use this for debugging!
-  requests: False
-
-# Options for sampling
-sampling:
-  # Override preset name. Find this in the sampler-overrides folder (default: None)
-  # This overrides default fallbacks for sampler values that are passed to the API
-  # Server-side overrides are NOT needed by default
-  # WARNING: Using this can result in a generation speed penalty
-  #override_preset: 
-
-# Options for development and experimentation
-developer:
-  # Skips exllamav2 version check (default: False)
-  # It's highly recommended to update your dependencies rather than enabling this flag
-  # WARNING: Don't set this unless you know what you're doing!
-  #unsafe_launch: False
-
-  # Disable all request streaming (default: False)
-  # A kill switch for turning off SSE in the API server
-  #disable_request_streaming: False
-
-  # Enable the torch CUDA malloc backend (default: False)
-  # This can save a few MBs of VRAM, but has a risk of errors. Use at your own risk.
-  #cuda_malloc_backend: False
-
-  # Enable Uvloop or Winloop (default: False)
-  # Make the program utilize a faster async event loop which can improve performance
-  # NOTE: It's recommended to enable this, but if something breaks, turn this off.
-  #uvloop: False
-
-  # Set process to use a higher priority
-  # For realtime process priority, run as administrator or sudo
-  # Otherwise, the priority will be set to high
-  #realtime_process_priority: False
+  log_requests: False
 
 # Options for model overrides and loading
-# Please read the comments to understand how arguments are handled between initial and API loads
+# Please read the comments to understand how arguments are handled
+# between initial and API loads
 model:
-  # Overrides the directory to look for models (default: models)
-  # Windows users, DO NOT put this path in quotes! This directory will be invalid otherwise.
+  # Directory to look for models (default: models).
+  # Windows users, do NOT put this path in quotes!
   model_dir: models
 
-  # Sends dummy model names when the models endpoint is queried
-  # Enable this if the program is looking for a specific OAI model
-  #use_dummy_models: False
-
-  # Allow direct loading of models from a completion or chat completion request
+  # Allow direct loading of models from a completion or chat completion request (default: False).
   inline_model_loading: False
 
-  # An initial model to load. Make sure the model is located in the model directory!
-  # A model can be loaded later via the API.
-  # REQUIRED: This must be filled out to load a model on startup!
-  model_name:
+  # Sends dummy model names when the models endpoint is queried.
+  # Enable this if the client is looking for specific OAI models.
+  use_dummy_models: False
 
-  # The below parameters only apply for initial loads
-  # All API based loads do NOT inherit these settings unless specified in use_as_default
+  # An initial model to load.
+  # Make sure the model is located in the model directory!
+  # REQUIRED: This must be filled out to load a model on startup.
+  model_name: 
 
-  # Names of args to use as a default fallback for API load requests (default: [])
-  # For example, if you always want cache_mode to be Q4 instead of on the inital model load,
-  # Add "cache_mode" to this array
-  # Ex. ["max_seq_len", "cache_mode"]
-  #use_as_default: []
+  # Names of args to use as a fallback for API load requests (default: []).
+  # For example, if you always want cache_mode to be Q4 instead of on the inital model load, add "cache_mode" to this array.
+  # Example: ['max_seq_len', 'cache_mode'].
+  use_as_default: []
 
-  # The below parameters apply only if model_name is set
+  # Max sequence length (default: Empty).
+  # Fetched from the model's base sequence length in config.json by default.
+  max_seq_len: 
 
-  # Max sequence length (default: Empty)
-  # Fetched from the model's base sequence length in config.json by default
-  #max_seq_len:
-
-  # Overrides base model context length (default: Empty)
+  # Overrides base model context length (default: Empty).
   # WARNING: Don't set this unless you know what you're doing!
   # Again, do NOT use this for configuring context length, use max_seq_len above ^
-  # Only use this if the model's base sequence length in config.json is incorrect (ex. Mistral 7B)
-  #override_base_seq_len:
-
-  # Load model with tensor parallelism
-  # If a GPU split isn't provided, the TP loader will fallback to autosplit
-  # Enabling ignores the gpu_split_auto and autosplit_reserve values
-  #tensor_parallel: False
-
-  # Automatically allocate resources to GPUs (default: True)
-  # NOTE: Not parsed for single GPU users
-  #gpu_split_auto: True
-
-  # Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0)
-  # This is represented as an array of MB per GPU used
-  #autosplit_reserve: [96]
-
-  # An integer array of GBs of vram to split between GPUs (default: [])
-  # Used with tensor parallelism
-  # NOTE: Not parsed for single GPU users
-  #gpu_split: [20.6, 24]
-
-  # Rope scale (default: 1.0)
-  # Same thing as compress_pos_emb
-  # Only use if your model was trained on long context with rope (check config.json)
-  # Leave blank to pull the value from the model
-  #rope_scale: 1.0
-
-  # Rope alpha (default: 1.0)
-  # Same thing as alpha_value
-  # Set to "auto" to automatically calculate
-  # Leave blank to pull the value from the model
-  #rope_alpha: 1.0
-
-  # Enable different cache modes for VRAM savings (slight performance hit).
-  # Possible values FP16, Q8, Q6, Q4. (default: FP16)
-  #cache_mode: FP16
-
-  # Size of the prompt cache to allocate (default: max_seq_len)
-  # This must be a multiple of 256. A larger cache uses more VRAM, but allows for more prompts to be processed at once.
-  # NOTE: Cache size should not be less than max_seq_len.
-  # For CFG, set this to 2 * max_seq_len to make room for both positive and negative prompts.
-  #cache_size:
-
-  # Chunk size for prompt ingestion. A lower value reduces VRAM usage at the cost of ingestion speed (default: 2048)
-  # NOTE: Effects vary depending on the model. An ideal value is between 512 and 4096
-  #chunk_size: 2048
-
-  # Set the maximum amount of prompts to process at one time (default: None/Automatic)
-  # This will be automatically calculated if left blank.
-  # A max batch size of 1 processes prompts one at a time.
-  # NOTE: Only available for Nvidia ampere (30 series) and above GPUs
-  #max_batch_size:
-
-  # Set the prompt template for this model. If empty, attempts to look for the model's chat template. (default: None)
-  # If a model contains multiple templates in its tokenizer_config.json, set prompt_template to the name
-  # of the template you want to use.
+  override_base_seq_len: 
+
+  # Load model with tensor parallelism.
+  # Falls back to autosplit if GPU split isn't provided.
+  # This ignores the gpu_split_auto value.
+  tensor_parallel: False
+
+  # Automatically allocate resources to GPUs (default: True).
+  # Not parsed for single GPU users.
+  gpu_split_auto: True
+
+  # Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0).
+  # Represented as an array of MB per GPU.
+  autosplit_reserve: [96]
+
+  # An integer array of GBs of VRAM to split between GPUs (default: []).
+  # Used with tensor parallelism.
+  gpu_split: []
+
+  # Rope scale (default: 1.0).
+  # Same as compress_pos_emb.
+  # Use if the model was trained on long context with rope.
+  # Leave blank to pull the value from the model.
+  rope_scale: 1.0
+
+  # Rope alpha (default: 1.0).
+  # Same as alpha_value. Set to "auto" to auto-calculate.
+  rope_alpha: 1.0
+
+  # Enable different cache modes for VRAM savings (default: FP16).
+  # Possible values: 'FP16', 'Q8', 'Q6', 'Q4'.
+  cache_mode: FP16
+
+  # Size of the prompt cache to allocate (default: max_seq_len).
+  # Must be a multiple of 256 and can't be less than max_seq_len.
+  # For CFG, set this to 2 * max_seq_len.
+  cache_size: 
+
+  # Chunk size for prompt ingestion (default: 2048).
+  # A lower value reduces VRAM usage but decreases ingestion speed.
+  # NOTE: Effects vary depending on the model.
+  # An ideal value is between 512 and 4096.
+  chunk_size: 2048
+
+  # Set the maximum number of prompts to process at one time (default: None/Automatic).
+  # Automatically calculated if left blank.
+  # NOTE: Only available for Nvidia ampere (30 series) and above GPUs.
+  max_batch_size: 
+
+  # Set the prompt template for this model. (default: None)
+  # If empty, attempts to look for the model's chat template.
+  # If a model contains multiple templates in its tokenizer_config.json,
+  # set prompt_template to the name of the template you want to use.
   # NOTE: Only works with chat completion message lists!
-  #prompt_template:
+  prompt_template: 
 
-  # Number of experts to use PER TOKEN. Fetched from the model's config.json if not specified (default: Empty)
+  # Number of experts to use per token.
+  # Fetched from the model's config.json if empty.
+  # NOTE: For MoE models only.
   # WARNING: Don't set this unless you know what you're doing!
-  # NOTE: For MoE models (ex. Mixtral) only!
-  #num_experts_per_token:
-
-  # Enables fasttensors to possibly increase model loading speeds (default: False)
-  #fasttensors: true
-
-  # Options for draft models (speculative decoding). This will use more VRAM!
-  #draft:
-    # Overrides the directory to look for draft (default: models)
-    #draft_model_dir: models
-
-    # An initial draft model to load. Make sure this model is located in the model directory!
-    # A draft model can be loaded later via the API.
-    #draft_model_name: A model name
-  
-    # The below parameters only apply for initial loads
-    # All API based loads do NOT inherit these settings unless specified in use_as_default
-
-    # Rope scale for draft models (default: 1.0)
-    # Same thing as compress_pos_emb
-    # Only use if your draft model was trained on long context with rope (check config.json)
-    #draft_rope_scale: 1.0
-
-    # Rope alpha for draft model (default: 1.0)
-    # Same thing as alpha_value
-    # Leave blank to automatically calculate alpha value
-    #draft_rope_alpha: 1.0
-
-    # Enable different draft model cache modes for VRAM savings (slight performance hit).
-    # Possible values FP16, Q8, Q6, Q4. (default: FP16)
-    #draft_cache_mode: FP16
-  
-  # Options for loras
-  #lora:
-    # Overrides the directory to look for loras (default: loras)
-    #lora_dir: loras
-    
-    # List of loras to load and associated scaling factors (default: 1.0). Comment out unused entries or add more rows as needed.
-    #loras:
-    #- name: lora1
-    #  scaling: 1.0
+  num_experts_per_token: 
+
+  # Enables fasttensors to possibly increase model loading speeds (default: False).
+  fasttensors: False
+
+# Options for draft models (speculative decoding)
+# This will use more VRAM!
+draft_model:
+  # Directory to look for draft models (default: models)
+  draft_model_dir: models
+
+  # An initial draft model to load.
+  # Ensure the model is in the model directory.
+  draft_model_name: 
+
+  # Rope scale for draft models (default: 1.0).
+  # Same as compress_pos_emb.
+  # Use if the draft model was trained on long context with rope.
+  draft_rope_scale: 1.0
+
+  # Rope alpha for draft models (default: None).
+  # Same as alpha_value. Set to "auto" to auto-calculate.
+  draft_rope_alpha: 
+
+  # Cache mode for draft models to save VRAM (default: FP16).
+  # Possible values: 'FP16', 'Q8', 'Q6', 'Q4'.
+  draft_cache_mode: FP16
+
+# Options for Loras
+lora:
+  # Directory to look for LoRAs (default: loras).
+  lora_dir: loras
+
+  # List of LoRAs to load and associated scaling factors (default scale: 1.0).
+  # For the YAML file, add each entry as a YAML list:
+  # - name: lora1
+  #   scaling: 1.0
+  loras: 
 
 # Options for embedding models and loading.
 # NOTE: Embeddings requires the "extras" feature to be installed
 # Install it via "pip install .[extras]"
 embeddings:
-  # Overrides directory to look for embedding models (default: models)
+  # Directory to look for embedding models (default: models).
   embedding_model_dir: models
 
-  # Device to load embedding models on (default: cpu)
-  # Possible values: cpu, auto, cuda
+  # Device to load embedding models on (default: cpu).
+  # Possible values: cpu, auto, cuda.
   # NOTE: It's recommended to load embedding models on the CPU.
-  # If you'd like to load on an AMD gpu, set this value to "cuda" as well.
+  # If using an AMD GPU, set this value to 'cuda'.
   embeddings_device: cpu
 
-  # The below parameters only apply for initial loads
-  # All API based loads do NOT inherit these settings unless specified in use_as_default
+  # An initial embedding model to load on the infinity backend.
+  embedding_model_name: 
+
+# Options for Sampling
+sampling:
+  # Select a sampler override preset (default: None).
+  # Find this in the sampler-overrides folder.
+  # This overrides default fallbacks for sampler values that are passed to the API.
+  override_preset: 
+
+# Options for development and experimentation
+developer:
+  # Skip Exllamav2 version check (default: False).
+  # WARNING: It's highly recommended to update your dependencies rather than enabling this flag.
+  unsafe_launch: False
+
+  # Disable API request streaming (default: False).
+  disable_request_streaming: False
+
+  # Enable the torch CUDA malloc backend (default: False).
+  cuda_malloc_backend: False
+
+  # Run asyncio using Uvloop or Winloop which can improve performance.
+  # NOTE: It's recommended to enable this, but if something breaks turn this off.
+  uvloop: False
 
-  # An initial embedding model to load on the infinity backend (default: None)
-  embedding_model_name:
+  # Set process to use a higher priority.
+  # For realtime process priority, run as administrator or sudo.
+  # Otherwise, the priority will be set to high.
+  realtime_process_priority: False

From daa57ceada328422ac06f9889d709866202cdc2c Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Tue, 17 Sep 2024 00:42:39 -0400
Subject: [PATCH 43/51] API: Upgrade config declarations

Some were using the old unwrap methods.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 endpoints/OAI/utils/completion.py | 4 ++--
 endpoints/core/types/model.py     | 6 +-----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/endpoints/OAI/utils/completion.py b/endpoints/OAI/utils/completion.py
index df4bf197..c8b02c81 100644
--- a/endpoints/OAI/utils/completion.py
+++ b/endpoints/OAI/utils/completion.py
@@ -130,7 +130,7 @@ async def load_inline_model(model_name: str, request: Request):
 
         raise HTTPException(401, error_message)
 
-    if not unwrap(config.model.get("inline_model_loading"), False):
+    if not config.model.inline_model_loading:
         logger.warning(
             f"Unable to switch model to {model_name} because "
             '"inline_model_loading" is not True in config.yml.'
@@ -138,7 +138,7 @@ async def load_inline_model(model_name: str, request: Request):
 
         return
 
-    model_path = pathlib.Path(unwrap(config.model.get("model_dir"), "models"))
+    model_path = pathlib.Path(config.model.model_dir)
     model_path = model_path / model_name
 
     # Model path doesn't exist
diff --git a/endpoints/core/types/model.py b/endpoints/core/types/model.py
index dc5da0da..eb2d4311 100644
--- a/endpoints/core/types/model.py
+++ b/endpoints/core/types/model.py
@@ -118,11 +118,7 @@ class EmbeddingModelLoadRequest(BaseModel):
     name: str
 
     # Set default from the config
-    embeddings_device: Optional[str] = Field(
-        default_factory=lambda: unwrap(
-            config.embeddings.get("embeddings_device"), "cpu"
-        )
-    )
+    embeddings_device: Optional[str] = Field(config.embeddings.embeddings_device)
 
 
 class ModelLoadResponse(BaseModel):

From bb4dd7200ebe2efc03d2655d90eb1a2783688bbc Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Tue, 17 Sep 2024 15:41:32 +0100
Subject: [PATCH 44/51] fix defaults for api_servers

---
 common/config_models.py       |   2 +-
 config_sample.yml             | 432 +++++++++++++++++-----------------
 endpoints/core/types/model.py |   1 -
 3 files changed, 217 insertions(+), 218 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index fff248eb..2892b2c2 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -80,7 +80,7 @@ class NetworkConfig(BaseConfigModel):
         ),
     )
     api_servers: Optional[List[Literal["OAI", "Kobold"]]] = Field(
-        default_factory=list,
+        ["OAI"],
         description=(
             'Select API servers to enable (default: ["OAI"]).\n'
             "Possible values: OAI, Kobold."
diff --git a/config_sample.yml b/config_sample.yml
index bd790dbf..10e812fb 100644
--- a/config_sample.yml
+++ b/config_sample.yml
@@ -1,216 +1,216 @@
-# Sample YAML file for configuration.
-# Comment and uncomment values as needed.
-# Every value has a default within the application.
-# This file serves to be a drop in for config.yml
-
-# Unless specified in the comments, DO NOT put these options in quotes!
-# You can use https://www.yamllint.com/ if you want to check your YAML formatting.
-
-# Options for networking
-network:
-  # The IP to host on (default: 127.0.0.1).
-  # Use 0.0.0.0 to expose on all network adapters.
-  host: 127.0.0.1
-
-  # The port to host on (default: 5000).
-  port: 5000
-
-  # Disable HTTP token authentication with requests.
-  # WARNING: This will make your instance vulnerable!
-  # Turn on this option if you are ONLY connecting from localhost.
-  disable_auth: False
-
-  # Send tracebacks over the API (default: False).
-  # NOTE: Only enable this for debug purposes.
-  send_tracebacks: False
-
-  # Select API servers to enable (default: ["OAI"]).
-  # Possible values: OAI, Kobold.
-  api_servers: []
-
-# Options for logging
-logging:
-  # Enable prompt logging (default: False).
-  log_prompt: False
-
-  # Enable generation parameter logging (default: False).
-  log_generation_params: False
-
-  # Enable request logging (default: False).
-  # NOTE: Only use this for debugging!
-  log_requests: False
-
-# Options for model overrides and loading
-# Please read the comments to understand how arguments are handled
-# between initial and API loads
-model:
-  # Directory to look for models (default: models).
-  # Windows users, do NOT put this path in quotes!
-  model_dir: models
-
-  # Allow direct loading of models from a completion or chat completion request (default: False).
-  inline_model_loading: False
-
-  # Sends dummy model names when the models endpoint is queried.
-  # Enable this if the client is looking for specific OAI models.
-  use_dummy_models: False
-
-  # An initial model to load.
-  # Make sure the model is located in the model directory!
-  # REQUIRED: This must be filled out to load a model on startup.
-  model_name: 
-
-  # Names of args to use as a fallback for API load requests (default: []).
-  # For example, if you always want cache_mode to be Q4 instead of on the inital model load, add "cache_mode" to this array.
-  # Example: ['max_seq_len', 'cache_mode'].
-  use_as_default: []
-
-  # Max sequence length (default: Empty).
-  # Fetched from the model's base sequence length in config.json by default.
-  max_seq_len: 
-
-  # Overrides base model context length (default: Empty).
-  # WARNING: Don't set this unless you know what you're doing!
-  # Again, do NOT use this for configuring context length, use max_seq_len above ^
-  override_base_seq_len: 
-
-  # Load model with tensor parallelism.
-  # Falls back to autosplit if GPU split isn't provided.
-  # This ignores the gpu_split_auto value.
-  tensor_parallel: False
-
-  # Automatically allocate resources to GPUs (default: True).
-  # Not parsed for single GPU users.
-  gpu_split_auto: True
-
-  # Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0).
-  # Represented as an array of MB per GPU.
-  autosplit_reserve: [96]
-
-  # An integer array of GBs of VRAM to split between GPUs (default: []).
-  # Used with tensor parallelism.
-  gpu_split: []
-
-  # Rope scale (default: 1.0).
-  # Same as compress_pos_emb.
-  # Use if the model was trained on long context with rope.
-  # Leave blank to pull the value from the model.
-  rope_scale: 1.0
-
-  # Rope alpha (default: 1.0).
-  # Same as alpha_value. Set to "auto" to auto-calculate.
-  rope_alpha: 1.0
-
-  # Enable different cache modes for VRAM savings (default: FP16).
-  # Possible values: 'FP16', 'Q8', 'Q6', 'Q4'.
-  cache_mode: FP16
-
-  # Size of the prompt cache to allocate (default: max_seq_len).
-  # Must be a multiple of 256 and can't be less than max_seq_len.
-  # For CFG, set this to 2 * max_seq_len.
-  cache_size: 
-
-  # Chunk size for prompt ingestion (default: 2048).
-  # A lower value reduces VRAM usage but decreases ingestion speed.
-  # NOTE: Effects vary depending on the model.
-  # An ideal value is between 512 and 4096.
-  chunk_size: 2048
-
-  # Set the maximum number of prompts to process at one time (default: None/Automatic).
-  # Automatically calculated if left blank.
-  # NOTE: Only available for Nvidia ampere (30 series) and above GPUs.
-  max_batch_size: 
-
-  # Set the prompt template for this model. (default: None)
-  # If empty, attempts to look for the model's chat template.
-  # If a model contains multiple templates in its tokenizer_config.json,
-  # set prompt_template to the name of the template you want to use.
-  # NOTE: Only works with chat completion message lists!
-  prompt_template: 
-
-  # Number of experts to use per token.
-  # Fetched from the model's config.json if empty.
-  # NOTE: For MoE models only.
-  # WARNING: Don't set this unless you know what you're doing!
-  num_experts_per_token: 
-
-  # Enables fasttensors to possibly increase model loading speeds (default: False).
-  fasttensors: False
-
-# Options for draft models (speculative decoding)
-# This will use more VRAM!
-draft_model:
-  # Directory to look for draft models (default: models)
-  draft_model_dir: models
-
-  # An initial draft model to load.
-  # Ensure the model is in the model directory.
-  draft_model_name: 
-
-  # Rope scale for draft models (default: 1.0).
-  # Same as compress_pos_emb.
-  # Use if the draft model was trained on long context with rope.
-  draft_rope_scale: 1.0
-
-  # Rope alpha for draft models (default: None).
-  # Same as alpha_value. Set to "auto" to auto-calculate.
-  draft_rope_alpha: 
-
-  # Cache mode for draft models to save VRAM (default: FP16).
-  # Possible values: 'FP16', 'Q8', 'Q6', 'Q4'.
-  draft_cache_mode: FP16
-
-# Options for Loras
-lora:
-  # Directory to look for LoRAs (default: loras).
-  lora_dir: loras
-
-  # List of LoRAs to load and associated scaling factors (default scale: 1.0).
-  # For the YAML file, add each entry as a YAML list:
-  # - name: lora1
-  #   scaling: 1.0
-  loras: 
-
-# Options for embedding models and loading.
-# NOTE: Embeddings requires the "extras" feature to be installed
-# Install it via "pip install .[extras]"
-embeddings:
-  # Directory to look for embedding models (default: models).
-  embedding_model_dir: models
-
-  # Device to load embedding models on (default: cpu).
-  # Possible values: cpu, auto, cuda.
-  # NOTE: It's recommended to load embedding models on the CPU.
-  # If using an AMD GPU, set this value to 'cuda'.
-  embeddings_device: cpu
-
-  # An initial embedding model to load on the infinity backend.
-  embedding_model_name: 
-
-# Options for Sampling
-sampling:
-  # Select a sampler override preset (default: None).
-  # Find this in the sampler-overrides folder.
-  # This overrides default fallbacks for sampler values that are passed to the API.
-  override_preset: 
-
-# Options for development and experimentation
-developer:
-  # Skip Exllamav2 version check (default: False).
-  # WARNING: It's highly recommended to update your dependencies rather than enabling this flag.
-  unsafe_launch: False
-
-  # Disable API request streaming (default: False).
-  disable_request_streaming: False
-
-  # Enable the torch CUDA malloc backend (default: False).
-  cuda_malloc_backend: False
-
-  # Run asyncio using Uvloop or Winloop which can improve performance.
-  # NOTE: It's recommended to enable this, but if something breaks turn this off.
-  uvloop: False
-
-  # Set process to use a higher priority.
-  # For realtime process priority, run as administrator or sudo.
-  # Otherwise, the priority will be set to high.
-  realtime_process_priority: False
+# Sample YAML file for configuration.
+# Comment and uncomment values as needed.
+# Every value has a default within the application.
+# This file serves to be a drop in for config.yml
+
+# Unless specified in the comments, DO NOT put these options in quotes!
+# You can use https://www.yamllint.com/ if you want to check your YAML formatting.
+
+# Options for networking
+network:
+  # The IP to host on (default: 127.0.0.1).
+  # Use 0.0.0.0 to expose on all network adapters.
+  host: 127.0.0.1
+
+  # The port to host on (default: 5000).
+  port: 5000
+
+  # Disable HTTP token authentication with requests.
+  # WARNING: This will make your instance vulnerable!
+  # Turn on this option if you are ONLY connecting from localhost.
+  disable_auth: False
+
+  # Send tracebacks over the API (default: False).
+  # NOTE: Only enable this for debug purposes.
+  send_tracebacks: False
+
+  # Select API servers to enable (default: ["OAI"]).
+  # Possible values: OAI, Kobold.
+  api_servers: ['OAI']
+
+# Options for logging
+logging:
+  # Enable prompt logging (default: False).
+  log_prompt: False
+
+  # Enable generation parameter logging (default: False).
+  log_generation_params: False
+
+  # Enable request logging (default: False).
+  # NOTE: Only use this for debugging!
+  log_requests: False
+
+# Options for model overrides and loading
+# Please read the comments to understand how arguments are handled
+# between initial and API loads
+model:
+  # Directory to look for models (default: models).
+  # Windows users, do NOT put this path in quotes!
+  model_dir: models
+
+  # Allow direct loading of models from a completion or chat completion request (default: False).
+  inline_model_loading: False
+
+  # Sends dummy model names when the models endpoint is queried.
+  # Enable this if the client is looking for specific OAI models.
+  use_dummy_models: False
+
+  # An initial model to load.
+  # Make sure the model is located in the model directory!
+  # REQUIRED: This must be filled out to load a model on startup.
+  model_name: 
+
+  # Names of args to use as a fallback for API load requests (default: []).
+  # For example, if you always want cache_mode to be Q4 instead of on the inital model load, add "cache_mode" to this array.
+  # Example: ['max_seq_len', 'cache_mode'].
+  use_as_default: []
+
+  # Max sequence length (default: Empty).
+  # Fetched from the model's base sequence length in config.json by default.
+  max_seq_len: 
+
+  # Overrides base model context length (default: Empty).
+  # WARNING: Don't set this unless you know what you're doing!
+  # Again, do NOT use this for configuring context length, use max_seq_len above ^
+  override_base_seq_len: 
+
+  # Load model with tensor parallelism.
+  # Falls back to autosplit if GPU split isn't provided.
+  # This ignores the gpu_split_auto value.
+  tensor_parallel: False
+
+  # Automatically allocate resources to GPUs (default: True).
+  # Not parsed for single GPU users.
+  gpu_split_auto: True
+
+  # Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0).
+  # Represented as an array of MB per GPU.
+  autosplit_reserve: [96]
+
+  # An integer array of GBs of VRAM to split between GPUs (default: []).
+  # Used with tensor parallelism.
+  gpu_split: []
+
+  # Rope scale (default: 1.0).
+  # Same as compress_pos_emb.
+  # Use if the model was trained on long context with rope.
+  # Leave blank to pull the value from the model.
+  rope_scale: 1.0
+
+  # Rope alpha (default: 1.0).
+  # Same as alpha_value. Set to "auto" to auto-calculate.
+  rope_alpha: 1.0
+
+  # Enable different cache modes for VRAM savings (default: FP16).
+  # Possible values: 'FP16', 'Q8', 'Q6', 'Q4'.
+  cache_mode: FP16
+
+  # Size of the prompt cache to allocate (default: max_seq_len).
+  # Must be a multiple of 256 and can't be less than max_seq_len.
+  # For CFG, set this to 2 * max_seq_len.
+  cache_size: 
+
+  # Chunk size for prompt ingestion (default: 2048).
+  # A lower value reduces VRAM usage but decreases ingestion speed.
+  # NOTE: Effects vary depending on the model.
+  # An ideal value is between 512 and 4096.
+  chunk_size: 2048
+
+  # Set the maximum number of prompts to process at one time (default: None/Automatic).
+  # Automatically calculated if left blank.
+  # NOTE: Only available for Nvidia ampere (30 series) and above GPUs.
+  max_batch_size: 
+
+  # Set the prompt template for this model. (default: None)
+  # If empty, attempts to look for the model's chat template.
+  # If a model contains multiple templates in its tokenizer_config.json,
+  # set prompt_template to the name of the template you want to use.
+  # NOTE: Only works with chat completion message lists!
+  prompt_template: 
+
+  # Number of experts to use per token.
+  # Fetched from the model's config.json if empty.
+  # NOTE: For MoE models only.
+  # WARNING: Don't set this unless you know what you're doing!
+  num_experts_per_token: 
+
+  # Enables fasttensors to possibly increase model loading speeds (default: False).
+  fasttensors: False
+
+# Options for draft models (speculative decoding)
+# This will use more VRAM!
+draft_model:
+  # Directory to look for draft models (default: models)
+  draft_model_dir: models
+
+  # An initial draft model to load.
+  # Ensure the model is in the model directory.
+  draft_model_name: 
+
+  # Rope scale for draft models (default: 1.0).
+  # Same as compress_pos_emb.
+  # Use if the draft model was trained on long context with rope.
+  draft_rope_scale: 1.0
+
+  # Rope alpha for draft models (default: None).
+  # Same as alpha_value. Set to "auto" to auto-calculate.
+  draft_rope_alpha: 
+
+  # Cache mode for draft models to save VRAM (default: FP16).
+  # Possible values: 'FP16', 'Q8', 'Q6', 'Q4'.
+  draft_cache_mode: FP16
+
+# Options for Loras
+lora:
+  # Directory to look for LoRAs (default: loras).
+  lora_dir: loras
+
+  # List of LoRAs to load and associated scaling factors (default scale: 1.0).
+  # For the YAML file, add each entry as a YAML list:
+  # - name: lora1
+  #   scaling: 1.0
+  loras: 
+
+# Options for embedding models and loading.
+# NOTE: Embeddings requires the "extras" feature to be installed
+# Install it via "pip install .[extras]"
+embeddings:
+  # Directory to look for embedding models (default: models).
+  embedding_model_dir: models
+
+  # Device to load embedding models on (default: cpu).
+  # Possible values: cpu, auto, cuda.
+  # NOTE: It's recommended to load embedding models on the CPU.
+  # If using an AMD GPU, set this value to 'cuda'.
+  embeddings_device: cpu
+
+  # An initial embedding model to load on the infinity backend.
+  embedding_model_name: 
+
+# Options for Sampling
+sampling:
+  # Select a sampler override preset (default: None).
+  # Find this in the sampler-overrides folder.
+  # This overrides default fallbacks for sampler values that are passed to the API.
+  override_preset: 
+
+# Options for development and experimentation
+developer:
+  # Skip Exllamav2 version check (default: False).
+  # WARNING: It's highly recommended to update your dependencies rather than enabling this flag.
+  unsafe_launch: False
+
+  # Disable API request streaming (default: False).
+  disable_request_streaming: False
+
+  # Enable the torch CUDA malloc backend (default: False).
+  cuda_malloc_backend: False
+
+  # Run asyncio using Uvloop or Winloop which can improve performance.
+  # NOTE: It's recommended to enable this, but if something breaks turn this off.
+  uvloop: False
+
+  # Set process to use a higher priority.
+  # For realtime process priority, run as administrator or sudo.
+  # Otherwise, the priority will be set to high.
+  realtime_process_priority: False
diff --git a/endpoints/core/types/model.py b/endpoints/core/types/model.py
index eb2d4311..b169162d 100644
--- a/endpoints/core/types/model.py
+++ b/endpoints/core/types/model.py
@@ -6,7 +6,6 @@
 
 from common.config_models import LoggingConfig
 from common.tabby_config import config
-from common.utils import unwrap
 
 
 class ModelCardParameters(BaseModel):

From 948fcb7f5b8eaf829fcab6bb947615e6e78e7b4d Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Wed, 18 Sep 2024 01:06:34 +0100
Subject: [PATCH 45/51] migrate to ruamel.yaml

---
 common/tabby_config.py | 127 ++++++++++++++++++++---------------------
 common/utils.py        |  12 ++--
 pyproject.toml         |   2 +-
 3 files changed, 70 insertions(+), 71 deletions(-)

diff --git a/common/tabby_config.py b/common/tabby_config.py
index 5491b9c8..283dd176 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -1,14 +1,19 @@
-import yaml
 import pathlib
 from inspect import getdoc
-from pydantic_core import PydanticUndefined
-from loguru import logger
-from textwrap import dedent
-from typing import Optional
 from os import getenv
+from textwrap import dedent
+from typing import Any, Optional
+
+from loguru import logger
+from pydantic import BaseModel
+from pydantic_core import PydanticUndefined
+from ruamel.yaml import YAML
+from ruamel.yaml.comments import CommentedMap, CommentedSeq
+
+from common.config_models import TabbyConfigModel
+from common.utils import merge_dicts, unwrap
 
-from common.utils import unwrap, merge_dicts
-from common.config_models import BaseConfigModel, TabbyConfigModel
+yaml = YAML()
 
 
 class TabbyConfig(TabbyConfigModel):
@@ -57,7 +62,7 @@ def _from_file(self, config_path: pathlib.Path):
         # try loading from file
         try:
             with open(str(config_path.resolve()), "r", encoding="utf8") as config_file:
-                cfg = yaml.safe_load(config_file)
+                cfg = yaml.load(config_file)
 
                 # NOTE: Remove migration wrapper after a period of time
                 # load legacy config files
@@ -130,7 +135,7 @@ def _from_args(self, args: dict):
         """loads config from the provided arguments"""
         config = {}
 
-        config_override = unwrap(args.get("options", {}).get("config"))
+        config_override = args.get("options", {}).get("config", None)
         if config_override:
             logger.info("Config file override detected in args.")
             config = self._from_file(pathlib.Path(config_override))
@@ -166,15 +171,25 @@ def _from_environment(self):
 config: TabbyConfig = TabbyConfig()
 
 
-# TODO: Possibly switch to ruamel.yaml for a more native implementation
 def generate_config_file(
-    model: BaseConfigModel = None,
+    model: BaseModel = None,
     filename: str = "config_sample.yml",
     indentation: int = 2,
 ) -> None:
     """Creates a config.yml file from Pydantic models."""
 
-    # Add a cleaned up preamble
+    schema = unwrap(model, TabbyConfigModel())
+    preamble = get_preamble()
+
+    yaml_content = pydantic_model_to_yaml(schema)
+
+    with open(filename, "w") as f:
+        f.write(preamble)
+        yaml.dump(yaml_content, f)
+
+
+def get_preamble() -> str:
+    """Returns the cleaned up preamble for the config file."""
     preamble = """
     # Sample YAML file for configuration.
     # Comment and uncomment values as needed.
@@ -184,61 +199,43 @@ def generate_config_file(
     # Unless specified in the comments, DO NOT put these options in quotes!
     # You can use https://www.yamllint.com/ if you want to check your YAML formatting.\n
     """
+    return dedent(preamble).lstrip()
 
-    # Trim and cleanup preamble
-    yaml = dedent(preamble).lstrip()
-
-    schema = unwrap(model, TabbyConfigModel())
 
-    # TODO: Make the disordered iteration look cleaner
-    iter_once = False
-    for field, field_data in schema.model_fields.items():
-        # Fetch from the existing model class if it's passed
-        # Probably can use this on schema too, but play it safe
-        if model and hasattr(model, field):
-            subfield_model = getattr(model, field)
-        else:
-            subfield_model = field_data.default_factory()
-
-        if not subfield_model._metadata.include_in_config:
-            continue
-
-        # Since the list is out of order with the length
-        # Add newlines from the beginning once one iteration finishes
-        # This is a sanity check for formatting
-        if iter_once:
-            yaml += "\n"
+# Function to convert pydantic model to dict with field descriptions as comments
+def pydantic_model_to_yaml(model: BaseModel) -> CommentedMap:
+    """
+    Recursively converts a Pydantic model into a CommentedMap,
+    with descriptions as comments in YAML.
+    """
+    # Create a CommentedMap to hold the output data
+    yaml_data = CommentedMap()
+
+    # Loop through all fields in the model
+    for field_name, field_info in model.model_fields.items():
+        value = getattr(model, field_name)
+
+        # If the field is another Pydantic model
+        if isinstance(value, BaseModel):
+            yaml_data[field_name] = pydantic_model_to_yaml(value)
+        # If the field is a list of Pydantic models
+        elif (
+            isinstance(value, list)
+            and len(value) > 0
+            and isinstance(value[0], BaseModel)
+        ):
+            yaml_list = CommentedSeq()
+            for item in value:
+                yaml_list.append(pydantic_model_to_yaml(item))
+            yaml_data[field_name] = yaml_list
+        # Otherwise, just assign the value
         else:
-            iter_once = True
-
-        for line in getdoc(subfield_model).splitlines():
-            yaml += f"# {line}\n"
+            yaml_data[field_name] = value
 
-        yaml += f"{field}:\n"
-
-        sub_iter_once = False
-        for subfield, subfield_data in subfield_model.model_fields.items():
-            # Same logic as iter_once
-            if sub_iter_once:
-                yaml += "\n"
-            else:
-                sub_iter_once = True
-
-            # If a value already exists, use it
-            if hasattr(subfield_model, subfield):
-                value = getattr(subfield_model, subfield)
-            elif subfield_data.default_factory:
-                value = subfield_data.default_factory()
-            else:
-                value = subfield_data.default
-
-            value = value if value is not None else ""
-            value = value if value is not PydanticUndefined else ""
-
-            for line in subfield_data.description.splitlines():
-                yaml += f"{' ' * indentation}# {line}\n"
-
-            yaml += f"{' ' * indentation}{subfield}: {value}\n"
+        # Add field description as a comment if available
+        if field_info.description:
+            yaml_data.yaml_set_comment_before_after_key(
+                field_name, before=field_info.description
+            )
 
-    with open(filename, "w") as f:
-        f.write(yaml)
+    return yaml_data
diff --git a/common/utils.py b/common/utils.py
index f8b46711..dfa7e924 100644
--- a/common/utils.py
+++ b/common/utils.py
@@ -1,10 +1,12 @@
 """Common utility functions"""
 
 from types import NoneType
-from typing import Type, Union, get_args, get_origin
+from typing import Dict, Optional, Type, Union, get_args, get_origin, TypeVar
 
+T = TypeVar("T")
 
-def unwrap(wrapped, default=None):
+
+def unwrap(wrapped: Optional[T], default: T = None) -> T:
     """Unwrap function for Optionals."""
     if wrapped is None:
         return default
@@ -17,13 +19,13 @@ def coalesce(*args):
     return next((arg for arg in args if arg is not None), None)
 
 
-def prune_dict(input_dict):
+def prune_dict(input_dict: Dict) -> Dict:
     """Trim out instances of None from a dictionary."""
 
     return {k: v for k, v in input_dict.items() if v is not None}
 
 
-def merge_dict(dict1, dict2):
+def merge_dict(dict1: Dict, dict2: Dict) -> Dict:
     """Merge 2 dictionaries"""
     for key, value in dict2.items():
         if isinstance(value, dict) and key in dict1 and isinstance(dict1[key], dict):
@@ -33,7 +35,7 @@ def merge_dict(dict1, dict2):
     return dict1
 
 
-def merge_dicts(*dicts):
+def merge_dicts(*dicts: Dict) -> Dict:
     """Merge an arbitrary amount of dictionaries"""
     result = {}
     for dictionary in dicts:
diff --git a/pyproject.toml b/pyproject.toml
index ad6f9455..3289288e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ requires-python = ">=3.10"
 dependencies = [
     "fastapi-slim >= 0.110.0",
     "pydantic >= 2.0.0",
-    "PyYAML",
+    "ruamel.yaml",
     "rich",
     "uvicorn >= 0.28.1",
     "jinja2 >= 3.0.0",

From a34bd9a684b8b7eb925fe439ed1dfab8fc9edae0 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Tue, 17 Sep 2024 22:44:09 -0400
Subject: [PATCH 46/51] Config: Alter YAML generation script for formatting
 adherence

Properly add comments and newlines where they need to go.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/config_models.py |  8 +---
 common/tabby_config.py  | 91 ++++++++++++++++++++++++++---------------
 config_sample.yml       | 56 ++++++++++++-------------
 3 files changed, 88 insertions(+), 67 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index 2892b2c2..6aac505c 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -333,12 +333,8 @@ class DraftModelConfig(BaseConfigModel):
 class LoraInstanceModel(BaseConfigModel):
     """Model representing an instance of a Lora."""
 
-    name: Optional[str] = Field(None, description=("Name of the LoRA model."))
-    scaling: float = Field(
-        1.0,
-        description=("Scaling factor for the LoRA model (default: 1.0)."),
-        ge=0,
-    )
+    name: Optional[str] = None
+    scaling: float = Field(1.0, ge=0)
 
 
 class LoraConfig(BaseConfigModel):
diff --git a/common/tabby_config.py b/common/tabby_config.py
index 283dd176..0bf3563e 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -2,15 +2,15 @@
 from inspect import getdoc
 from os import getenv
 from textwrap import dedent
-from typing import Any, Optional
+from typing import Optional
 
 from loguru import logger
 from pydantic import BaseModel
-from pydantic_core import PydanticUndefined
 from ruamel.yaml import YAML
 from ruamel.yaml.comments import CommentedMap, CommentedSeq
+from ruamel.yaml.scalarstring import PreservedScalarString
 
-from common.config_models import TabbyConfigModel
+from common.config_models import BaseConfigModel, TabbyConfigModel
 from common.utils import merge_dicts, unwrap
 
 yaml = YAML()
@@ -174,22 +174,10 @@ def _from_environment(self):
 def generate_config_file(
     model: BaseModel = None,
     filename: str = "config_sample.yml",
-    indentation: int = 2,
 ) -> None:
     """Creates a config.yml file from Pydantic models."""
 
     schema = unwrap(model, TabbyConfigModel())
-    preamble = get_preamble()
-
-    yaml_content = pydantic_model_to_yaml(schema)
-
-    with open(filename, "w") as f:
-        f.write(preamble)
-        yaml.dump(yaml_content, f)
-
-
-def get_preamble() -> str:
-    """Returns the cleaned up preamble for the config file."""
     preamble = """
     # Sample YAML file for configuration.
     # Comment and uncomment values as needed.
@@ -199,43 +187,80 @@ def get_preamble() -> str:
     # Unless specified in the comments, DO NOT put these options in quotes!
     # You can use https://www.yamllint.com/ if you want to check your YAML formatting.\n
     """
-    return dedent(preamble).lstrip()
+
+    yaml_content = pydantic_model_to_yaml(schema)
+
+    with open(filename, "w") as f:
+        f.write(dedent(preamble).lstrip())
+        yaml.dump(yaml_content, f)
 
 
-# Function to convert pydantic model to dict with field descriptions as comments
-def pydantic_model_to_yaml(model: BaseModel) -> CommentedMap:
+def pydantic_model_to_yaml(model: BaseModel, indentation: int = 0) -> CommentedMap:
     """
     Recursively converts a Pydantic model into a CommentedMap,
     with descriptions as comments in YAML.
     """
+
     # Create a CommentedMap to hold the output data
     yaml_data = CommentedMap()
 
     # Loop through all fields in the model
+    iteration = 1
     for field_name, field_info in model.model_fields.items():
+        # Get the inner pydantic model
         value = getattr(model, field_name)
 
-        # If the field is another Pydantic model
-        if isinstance(value, BaseModel):
-            yaml_data[field_name] = pydantic_model_to_yaml(value)
-        # If the field is a list of Pydantic models
-        elif (
-            isinstance(value, list)
-            and len(value) > 0
-            and isinstance(value[0], BaseModel)
-        ):
+        if isinstance(value, BaseConfigModel):
+            # If the field is another Pydantic model
+
+            if not value._metadata.include_in_config:
+                continue
+
+            yaml_data[field_name] = pydantic_model_to_yaml(
+                value, indentation=indentation + 2
+            )
+            comment = getdoc(value)
+        elif isinstance(value, list) and len(value) > 0:
+            # If the field is a list
+
             yaml_list = CommentedSeq()
-            for item in value:
-                yaml_list.append(pydantic_model_to_yaml(item))
+            if isinstance(value[0], BaseModel):
+                # If the field is a list of Pydantic models
+                # Do not add comments for these items
+
+                for item in value:
+                    yaml_list.append(
+                        pydantic_model_to_yaml(item, indentation=indentation + 2)
+                    )
+            else:
+                # If the field is a normal list, prefer the YAML flow style
+
+                yaml_list.fa.set_flow_style()
+                yaml_list += [
+                    PreservedScalarString(element)
+                    if isinstance(element, str)
+                    else element
+                    for element in value
+                ]
+
             yaml_data[field_name] = yaml_list
-        # Otherwise, just assign the value
+            comment = field_info.description
         else:
+            # Otherwise, just assign the value
+
             yaml_data[field_name] = value
+            comment = field_info.description
+
+        if comment:
+            # Add a newline to every comment but the first one
+            if iteration != 1:
+                comment = f"\n{comment}"
 
-        # Add field description as a comment if available
-        if field_info.description:
             yaml_data.yaml_set_comment_before_after_key(
-                field_name, before=field_info.description
+                field_name, before=comment, indent=indentation
             )
 
+        # Increment the iteration counter
+        iteration += 1
+
     return yaml_data
diff --git a/config_sample.yml b/config_sample.yml
index 10e812fb..0da6a361 100644
--- a/config_sample.yml
+++ b/config_sample.yml
@@ -18,27 +18,27 @@ network:
   # Disable HTTP token authentication with requests.
   # WARNING: This will make your instance vulnerable!
   # Turn on this option if you are ONLY connecting from localhost.
-  disable_auth: False
+  disable_auth: false
 
   # Send tracebacks over the API (default: False).
   # NOTE: Only enable this for debug purposes.
-  send_tracebacks: False
+  send_tracebacks: false
 
   # Select API servers to enable (default: ["OAI"]).
   # Possible values: OAI, Kobold.
-  api_servers: ['OAI']
+  api_servers: ["OAI"]
 
 # Options for logging
 logging:
   # Enable prompt logging (default: False).
-  log_prompt: False
+  log_prompt: false
 
   # Enable generation parameter logging (default: False).
-  log_generation_params: False
+  log_generation_params: false
 
   # Enable request logging (default: False).
   # NOTE: Only use this for debugging!
-  log_requests: False
+  log_requests: false
 
 # Options for model overrides and loading
 # Please read the comments to understand how arguments are handled
@@ -49,16 +49,16 @@ model:
   model_dir: models
 
   # Allow direct loading of models from a completion or chat completion request (default: False).
-  inline_model_loading: False
+  inline_model_loading: false
 
   # Sends dummy model names when the models endpoint is queried.
   # Enable this if the client is looking for specific OAI models.
-  use_dummy_models: False
+  use_dummy_models: false
 
   # An initial model to load.
   # Make sure the model is located in the model directory!
   # REQUIRED: This must be filled out to load a model on startup.
-  model_name: 
+  model_name:
 
   # Names of args to use as a fallback for API load requests (default: []).
   # For example, if you always want cache_mode to be Q4 instead of on the inital model load, add "cache_mode" to this array.
@@ -67,21 +67,21 @@ model:
 
   # Max sequence length (default: Empty).
   # Fetched from the model's base sequence length in config.json by default.
-  max_seq_len: 
+  max_seq_len:
 
   # Overrides base model context length (default: Empty).
   # WARNING: Don't set this unless you know what you're doing!
   # Again, do NOT use this for configuring context length, use max_seq_len above ^
-  override_base_seq_len: 
+  override_base_seq_len:
 
   # Load model with tensor parallelism.
   # Falls back to autosplit if GPU split isn't provided.
   # This ignores the gpu_split_auto value.
-  tensor_parallel: False
+  tensor_parallel: false
 
   # Automatically allocate resources to GPUs (default: True).
   # Not parsed for single GPU users.
-  gpu_split_auto: True
+  gpu_split_auto: true
 
   # Reserve VRAM used for autosplit loading (default: 96 MB on GPU 0).
   # Represented as an array of MB per GPU.
@@ -108,7 +108,7 @@ model:
   # Size of the prompt cache to allocate (default: max_seq_len).
   # Must be a multiple of 256 and can't be less than max_seq_len.
   # For CFG, set this to 2 * max_seq_len.
-  cache_size: 
+  cache_size:
 
   # Chunk size for prompt ingestion (default: 2048).
   # A lower value reduces VRAM usage but decreases ingestion speed.
@@ -119,23 +119,23 @@ model:
   # Set the maximum number of prompts to process at one time (default: None/Automatic).
   # Automatically calculated if left blank.
   # NOTE: Only available for Nvidia ampere (30 series) and above GPUs.
-  max_batch_size: 
+  max_batch_size:
 
   # Set the prompt template for this model. (default: None)
   # If empty, attempts to look for the model's chat template.
   # If a model contains multiple templates in its tokenizer_config.json,
   # set prompt_template to the name of the template you want to use.
   # NOTE: Only works with chat completion message lists!
-  prompt_template: 
+  prompt_template:
 
   # Number of experts to use per token.
   # Fetched from the model's config.json if empty.
   # NOTE: For MoE models only.
   # WARNING: Don't set this unless you know what you're doing!
-  num_experts_per_token: 
+  num_experts_per_token:
 
   # Enables fasttensors to possibly increase model loading speeds (default: False).
-  fasttensors: False
+  fasttensors: false
 
 # Options for draft models (speculative decoding)
 # This will use more VRAM!
@@ -145,7 +145,7 @@ draft_model:
 
   # An initial draft model to load.
   # Ensure the model is in the model directory.
-  draft_model_name: 
+  draft_model_name:
 
   # Rope scale for draft models (default: 1.0).
   # Same as compress_pos_emb.
@@ -154,7 +154,7 @@ draft_model:
 
   # Rope alpha for draft models (default: None).
   # Same as alpha_value. Set to "auto" to auto-calculate.
-  draft_rope_alpha: 
+  draft_rope_alpha:
 
   # Cache mode for draft models to save VRAM (default: FP16).
   # Possible values: 'FP16', 'Q8', 'Q6', 'Q4'.
@@ -169,7 +169,7 @@ lora:
   # For the YAML file, add each entry as a YAML list:
   # - name: lora1
   #   scaling: 1.0
-  loras: 
+  loras:
 
 # Options for embedding models and loading.
 # NOTE: Embeddings requires the "extras" feature to be installed
@@ -185,32 +185,32 @@ embeddings:
   embeddings_device: cpu
 
   # An initial embedding model to load on the infinity backend.
-  embedding_model_name: 
+  embedding_model_name:
 
 # Options for Sampling
 sampling:
   # Select a sampler override preset (default: None).
   # Find this in the sampler-overrides folder.
   # This overrides default fallbacks for sampler values that are passed to the API.
-  override_preset: 
+  override_preset:
 
 # Options for development and experimentation
 developer:
   # Skip Exllamav2 version check (default: False).
   # WARNING: It's highly recommended to update your dependencies rather than enabling this flag.
-  unsafe_launch: False
+  unsafe_launch: false
 
   # Disable API request streaming (default: False).
-  disable_request_streaming: False
+  disable_request_streaming: false
 
   # Enable the torch CUDA malloc backend (default: False).
-  cuda_malloc_backend: False
+  cuda_malloc_backend: false
 
   # Run asyncio using Uvloop or Winloop which can improve performance.
   # NOTE: It's recommended to enable this, but if something breaks turn this off.
-  uvloop: False
+  uvloop: false
 
   # Set process to use a higher priority.
   # For realtime process priority, run as administrator or sudo.
   # Otherwise, the priority will be set to high.
-  realtime_process_priority: False
+  realtime_process_priority: false

From 754fb15f234b5d725f825af8594bb9b344aa0be6 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Tue, 17 Sep 2024 22:48:56 -0400
Subject: [PATCH 47/51] Config: Fix draft model migration and loading

The loader takes in the "draft" parameter, so map the config model
to that when creating kwargs for initial load.

Also map the old "draft" key to the new "draft_model" key.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/tabby_config.py | 2 +-
 main.py                | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/common/tabby_config.py b/common/tabby_config.py
index 0bf3563e..43f227ee 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -72,7 +72,7 @@ def _from_file(self, config_path: pathlib.Path):
 
                 if model_cfg.get("draft"):
                     legacy = True
-                    cfg["draft"] = model_cfg["draft"]
+                    cfg["draft_model"] = model_cfg["draft"]
 
                 if model_cfg.get("lora"):
                     legacy = True
diff --git a/main.py b/main.py
index e340b70b..b83fda70 100644
--- a/main.py
+++ b/main.py
@@ -69,7 +69,11 @@ async def entrypoint_async():
         model_path = model_path / model_name
 
         # TODO: remove model_dump()
-        await model.load_model(model_path.resolve(), **config.model.model_dump())
+        await model.load_model(
+            model_path.resolve(),
+            **config.model.model_dump(),
+            draft=config.draft_model.model_dump(),
+        )
 
         # Load loras after loading the model
         if config.lora.loras:

From 63634beb5e61fdd4c1362559c4d72383a8e0cd2d Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Tue, 17 Sep 2024 23:03:28 -0400
Subject: [PATCH 48/51] Config: Clarify Rope alpha options

Leaving blank will use the model's set value or auto-calculate.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/config_models.py | 12 ++++++++----
 config_sample.yml       |  6 ++++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/common/config_models.py b/common/config_models.py
index 6aac505c..b1266eb1 100644
--- a/common/config_models.py
+++ b/common/config_models.py
@@ -213,10 +213,12 @@ class ModelConfig(BaseConfigModel):
         ),
     )
     rope_alpha: Optional[Union[float, Literal["auto"]]] = Field(
-        1.0,
+        None,
         description=(
-            "Rope alpha (default: 1.0).\n"
-            'Same as alpha_value. Set to "auto" to auto-calculate.'
+            "Rope alpha (default: None).\n"
+            'Same as alpha_value. Set to "auto" to auto-calculate.\n'
+            "Leaving this value blank will either pull from the model "
+            "or auto-calculate."
         ),
     )
     cache_mode: Optional[CACHE_SIZES] = Field(
@@ -318,7 +320,9 @@ class DraftModelConfig(BaseConfigModel):
         None,
         description=(
             "Rope alpha for draft models (default: None).\n"
-            'Same as alpha_value. Set to "auto" to auto-calculate.'
+            'Same as alpha_value. Set to "auto" to auto-calculate.\n'
+            "Leaving this value blank will either pull from the model "
+            "or auto-calculate."
         ),
     )
     draft_cache_mode: Optional[CACHE_SIZES] = Field(
diff --git a/config_sample.yml b/config_sample.yml
index 0da6a361..507d7d54 100644
--- a/config_sample.yml
+++ b/config_sample.yml
@@ -97,9 +97,10 @@ model:
   # Leave blank to pull the value from the model.
   rope_scale: 1.0
 
-  # Rope alpha (default: 1.0).
+  # Rope alpha (default: None).
   # Same as alpha_value. Set to "auto" to auto-calculate.
-  rope_alpha: 1.0
+  # Leaving this value blank will either pull from the model or auto-calculate.
+  rope_alpha:
 
   # Enable different cache modes for VRAM savings (default: FP16).
   # Possible values: 'FP16', 'Q8', 'Q6', 'Q4'.
@@ -154,6 +155,7 @@ draft_model:
 
   # Rope alpha for draft models (default: None).
   # Same as alpha_value. Set to "auto" to auto-calculate.
+  # Leaving this value blank will either pull from the model or auto-calculate.
   draft_rope_alpha:
 
   # Cache mode for draft models to save VRAM (default: FP16).

From 6c7542de9fd203863ffc50851b1502d5604e519b Mon Sep 17 00:00:00 2001
From: TerminalMan <84923604+SecretiveShell@users.noreply.github.com>
Date: Wed, 18 Sep 2024 11:33:15 +0100
Subject: [PATCH 49/51] migrate all yaml loaders to ruamel.yaml

---
 backends/exllamav2/model.py | 4 +++-
 common/auth.py              | 6 ++++--
 common/sampling.py          | 4 +++-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py
index 207232a2..0c67388b 100644
--- a/backends/exllamav2/model.py
+++ b/backends/exllamav2/model.py
@@ -30,7 +30,7 @@
 from loguru import logger
 from typing import List, Optional, Union
 
-import yaml
+from ruamel.yaml import YAML
 
 from backends.exllamav2.grammar import (
     ExLlamaV2Grammar,
@@ -56,6 +56,8 @@
 from common.transformers_utils import GenerationConfig, HuggingFaceConfig
 from common.utils import coalesce, unwrap
 
+yaml = YAML()
+
 
 class ExllamaV2Container:
     """The model container class for ExLlamaV2 models."""
diff --git a/common/auth.py b/common/auth.py
index 6fcfec98..c822b888 100644
--- a/common/auth.py
+++ b/common/auth.py
@@ -5,7 +5,7 @@
 
 import aiofiles
 import secrets
-import yaml
+from ruamel.yaml import YAML
 from fastapi import Header, HTTPException, Request
 from pydantic import BaseModel
 from loguru import logger
@@ -13,6 +13,8 @@
 
 from common.utils import coalesce
 
+yaml = YAML()
+
 
 class AuthKeys(BaseModel):
     """
@@ -60,7 +62,7 @@ async def load_auth_keys(disable_from_config: bool):
     try:
         async with aiofiles.open("api_tokens.yml", "r", encoding="utf8") as auth_file:
             contents = await auth_file.read()
-            auth_keys_dict = yaml.safe_load(contents)
+            auth_keys_dict = yaml.load(contents)
             AUTH_KEYS = AuthKeys.model_validate(auth_keys_dict)
     except FileNotFoundError:
         new_auth_keys = AuthKeys(
diff --git a/common/sampling.py b/common/sampling.py
index a7da3ca7..7005794b 100644
--- a/common/sampling.py
+++ b/common/sampling.py
@@ -3,7 +3,7 @@
 import aiofiles
 import json
 import pathlib
-import yaml
+from ruamel.yaml import YAML
 from copy import deepcopy
 from loguru import logger
 from pydantic import AliasChoices, BaseModel, Field
@@ -11,6 +11,8 @@
 
 from common.utils import unwrap, prune_dict
 
+yaml = YAML()
+
 
 # Common class for sampler params
 class BaseSamplerRequest(BaseModel):

From 24ea85b3c56baecf6b1f41c7a7404bbef0bc66a4 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Wed, 18 Sep 2024 12:12:49 -0400
Subject: [PATCH 50/51] Tree: Use safe loader for YAML

Loaders that read use a safe type while loaders that write use both
round-trip and safe options.

Also don't create module-level parsers where they're not needed.

Signed-off-by: kingbri <bdashore3@proton.me>
---
 backends/exllamav2/model.py |  7 ++++---
 common/auth.py              | 14 +++++++++-----
 common/sampling.py          |  7 ++++---
 common/tabby_config.py      |  2 +-
 4 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/backends/exllamav2/model.py b/backends/exllamav2/model.py
index 0c67388b..15c51c19 100644
--- a/backends/exllamav2/model.py
+++ b/backends/exllamav2/model.py
@@ -56,8 +56,6 @@
 from common.transformers_utils import GenerationConfig, HuggingFaceConfig
 from common.utils import coalesce, unwrap
 
-yaml = YAML()
-
 
 class ExllamaV2Container:
     """The model container class for ExLlamaV2 models."""
@@ -381,7 +379,10 @@ async def set_model_overrides(self, **kwargs):
             override_config_path, "r", encoding="utf8"
         ) as override_config_file:
             contents = await override_config_file.read()
-            override_args = unwrap(yaml.safe_load(contents), {})
+
+            # Create a temporary YAML parser
+            yaml = YAML(typ="safe")
+            override_args = unwrap(yaml.load(contents), {})
 
             # Merge draft overrides beforehand
             draft_override_args = unwrap(override_args.get("draft"), {})
diff --git a/common/auth.py b/common/auth.py
index c822b888..67e393bf 100644
--- a/common/auth.py
+++ b/common/auth.py
@@ -4,6 +4,7 @@
 """
 
 import aiofiles
+import io
 import secrets
 from ruamel.yaml import YAML
 from fastapi import Header, HTTPException, Request
@@ -13,8 +14,6 @@
 
 from common.utils import coalesce
 
-yaml = YAML()
-
 
 class AuthKeys(BaseModel):
     """
@@ -59,6 +58,9 @@ async def load_auth_keys(disable_from_config: bool):
 
         return
 
+    # Create a temporary YAML parser
+    yaml = YAML(typ=["rt", "safe"])
+
     try:
         async with aiofiles.open("api_tokens.yml", "r", encoding="utf8") as auth_file:
             contents = await auth_file.read()
@@ -71,10 +73,12 @@ async def load_auth_keys(disable_from_config: bool):
         AUTH_KEYS = new_auth_keys
 
         async with aiofiles.open("api_tokens.yml", "w", encoding="utf8") as auth_file:
-            new_auth_yaml = yaml.safe_dump(
-                AUTH_KEYS.model_dump(), default_flow_style=False
+            string_stream = io.StringIO()
+            yaml.dump(
+                AUTH_KEYS.model_dump(), string_stream
             )
-            await auth_file.write(new_auth_yaml)
+
+            await auth_file.write(string_stream.getvalue())
 
     logger.info(
         f"Your API key is: {AUTH_KEYS.api_key}\n"
diff --git a/common/sampling.py b/common/sampling.py
index 7005794b..e49811d2 100644
--- a/common/sampling.py
+++ b/common/sampling.py
@@ -11,8 +11,6 @@
 
 from common.utils import unwrap, prune_dict
 
-yaml = YAML()
-
 
 # Common class for sampler params
 class BaseSamplerRequest(BaseModel):
@@ -418,7 +416,10 @@ async def overrides_from_file(preset_name: str):
         overrides_container.selected_preset = preset_path.stem
         async with aiofiles.open(preset_path, "r", encoding="utf8") as raw_preset:
             contents = await raw_preset.read()
-            preset = yaml.safe_load(contents)
+
+            # Create a temporary YAML parser
+            yaml = YAML(typ="safe")
+            preset = yaml.load(contents)
             overrides_from_dict(preset)
 
             logger.info("Applied sampler overrides from file.")
diff --git a/common/tabby_config.py b/common/tabby_config.py
index 43f227ee..219e180b 100644
--- a/common/tabby_config.py
+++ b/common/tabby_config.py
@@ -13,7 +13,7 @@
 from common.config_models import BaseConfigModel, TabbyConfigModel
 from common.utils import merge_dicts, unwrap
 
-yaml = YAML()
+yaml = YAML(typ=["rt", "safe"])
 
 
 class TabbyConfig(TabbyConfigModel):

From 4cf85514f7ed2c9eb5cd477cdcf78cd0308b9008 Mon Sep 17 00:00:00 2001
From: kingbri <bdashore3@proton.me>
Date: Wed, 18 Sep 2024 20:36:17 -0400
Subject: [PATCH 51/51] Tree: Format

Signed-off-by: kingbri <bdashore3@proton.me>
---
 common/auth.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/common/auth.py b/common/auth.py
index 67e393bf..b02cdd02 100644
--- a/common/auth.py
+++ b/common/auth.py
@@ -74,9 +74,7 @@ async def load_auth_keys(disable_from_config: bool):
 
         async with aiofiles.open("api_tokens.yml", "w", encoding="utf8") as auth_file:
             string_stream = io.StringIO()
-            yaml.dump(
-                AUTH_KEYS.model_dump(), string_stream
-            )
+            yaml.dump(AUTH_KEYS.model_dump(), string_stream)
 
             await auth_file.write(string_stream.getvalue())