From 7967607f120a7616f80ded9ea5d8b2bc9388de33 Mon Sep 17 00:00:00 2001 From: DocShotgun <126566557+DocShotgun@users.noreply.github.com> Date: Fri, 22 Dec 2023 01:53:13 -0800 Subject: [PATCH] Colab: Expose new config arguments --- TabbyAPI_Colab_Example.ipynb | 62 ++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/TabbyAPI_Colab_Example.ipynb b/TabbyAPI_Colab_Example.ipynb index b98f861c..ee0f0b2c 100644 --- a/TabbyAPI_Colab_Example.ipynb +++ b/TabbyAPI_Colab_Example.ipynb @@ -34,8 +34,8 @@ "# @title # Install and download model { display-mode: \"form\" }\n", "# @markdown ---\n", "# @markdown Select model:\n", - "repo_id = \"royallab/Noromaid-13b-v0.1.1-exl2\" # @param {type:\"string\"}\n", - "revision = \"4bpw\" # @param {type:\"string\"}\n", + "repo_id = \"Kooten/FlatOrcamaid-13b-v0.2-5bpw-exl2\" # @param {type:\"string\"}\n", + "revision = \"main\" # @param {type:\"string\"}\n", "if revision == \"\": revision = \"main\"\n", "# @markdown ---\n", "# @markdown Select draft model (optional, for speculative decoding):\n", @@ -92,8 +92,11 @@ "# @markdown ---\n", "# @markdown Model parameters:\n", "ContextSize = 4096 # @param {type:\"integer\"}\n", + "OverrideBaseSeqLen = 4096 # @param {type:\"integer\"}\n", "RopeScale = 1.0 # @param {type:\"number\"}\n", "RopeAlpha = 1.0 # @param {type:\"number\"}\n", + "NumExpertsPerToken = 2 # @param {type:\"integer\"}\n", + "PromptTemplate = \"\" # @param {type:\"string\"}\n", "# @markdown ---\n", "# @markdown Draft model parameters (optional, for speculative decoding):\n", "DraftRopeScale = 1.0 # @param {type:\"number\"}\n", @@ -102,10 +105,15 @@ "# @markdown Lora parameters (optional, for loras):\n", "LoraScaling = 1.0 # @param {type:\"number\"}\n", "# @markdown ---\n", + "# @markdown Logging options:\n", + "PromptLogging = False # @param {type:\"boolean\"}\n", + "GenParamLogging = False # @param {type:\"boolean\"}\n", + "# @markdown ---\n", "# @markdown Misc options:\n", "CacheMode = \"FP16\" # @param [\"FP8\", \"FP16\"] {type:\"string\"}\n", "UseDummyModels = False # @param {type:\"boolean\"}\n", "NoFlashAttention = False # @param {type:\"boolean\"}\n", + "DisableAuth = False # @param {type:\"boolean\"}\n", "# @markdown ---\n", "# @markdown To connect, make note of the cloudflared URL and your auto-generated API key after launching and provide it to your preferred frontend.\n", "\n", @@ -128,6 +136,19 @@ " # The port to host on (default: 5000)\n", " port: 5000\n", "\n", + " # Disable HTTP token authenticaion with requests\n", + " # WARNING: This will make your instance vulnerable!\n", + " # Turn on this option if you are ONLY connecting from localhost\n", + " disable_auth: {DisableAuth}\n", + "\n", + "# Options for logging\n", + "logging:\n", + " # Enable prompt logging (default: False)\n", + " prompt: {PromptLogging}\n", + "\n", + " # Enable generation parameter logging (default: False)\n", + " generation_params: {GenParamLogging}\n", + "\n", "# Options for model overrides and loading\n", "model:\n", " # Overrides the directory to look for models (default: models)\n", @@ -144,17 +165,29 @@ "\n", " # The below parameters apply only if model_name is set\n", "\n", - " # Maximum model context length (default: 4096)\n", + " # Max sequence length (default: None)\n", + " # Fetched from the model's base sequence length in config.json by default\n", " max_seq_len: {ContextSize}\n", "\n", + " # Overrides base model context length (default: None)\n", + " # WARNING: Don't set this unless you know what you're doing!\n", + " # Only use this if the model's base sequence length in config.json is incorrect (ex. Mistral/Mixtral models)\n", + " override_base_seq_len: {OverrideBaseSeqLen}\n", + "\n", " # Automatically allocate resources to GPUs (default: True)\n", " gpu_split_auto: True\n", "\n", " # An integer array of GBs of vram to split between GPUs (default: [])\n", " # gpu_split: [20.6, 24]\n", "\n", - " # Rope scaling parameters (default: 1.0)\n", + " # Rope scale (default: 1.0)\n", + " # Same thing as compress_pos_emb\n", + " # Only use if your model was trained on long context with rope (check config.json)\n", " rope_scale: {RopeScale}\n", + "\n", + " # Rope scale (default: 1.0)\n", + " # Same thing as compress_pos_emb\n", + " # Only use if your model was trained on long context with rope (check config.json)\n", " rope_alpha: {RopeAlpha}\n", "\n", " # Disable Flash-attention 2. Set to True for GPUs lower than Nvidia's 3000 series. (default: False)\n", @@ -163,6 +196,15 @@ " # Enable 8 bit cache mode for VRAM savings (slight performance hit). Possible values FP16, FP8. (default: FP16)\n", " cache_mode: {CacheMode}\n", "\n", + " # Set the prompt template for this model. If empty, chat completions will be disabled. (default: None)\n", + " # NOTE: Only works with chat completion message lists!\n", + " prompt_template: {PromptTemplate}\n", + "\n", + " # Number of experts to use per token. Loads from the model's config.json if not specified (default: None)\n", + " # WARNING: Don't set this unless you know what you're doing!\n", + " # NOTE: For MoE models (ex. Mixtral) only!\n", + " num_experts_per_token: {NumExpertsPerToken}\n", + "\n", " # Options for draft models (speculative decoding). This will use more VRAM!\n", " draft:\n", " # Overrides the directory to look for draft (default: models)\n", @@ -170,10 +212,16 @@ "\n", " # An initial draft model to load. Make sure this model is located in the model directory!\n", " # A draft model can be loaded later via the API.\n", - " draft_model_name: {draft_model}\n", + " #draft_model_name: {draft_model}\n", "\n", - " # Rope parameters for draft models (default: 1.0)\n", + " # Rope scale for draft models (default: 1.0)\n", + " # Same thing as compress_pos_emb\n", + " # Only use if your draft model was trained on long context with rope (check config.json)\n", " draft_rope_scale: {DraftRopeScale}\n", + "\n", + " # Rope alpha for draft model (default: 1.0)\n", + " # Same thing as alpha_value\n", + " # Leave blank to automatically calculate alpha value\n", " draft_rope_alpha: {DraftRopeAlpha}\n", "\n", " # Options for loras\n", @@ -212,4 +260,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file