Skip to content

Commit

Permalink
Merge pull request #37 from DocShotgun/main
Browse files Browse the repository at this point in the history
Colab: Expose new config arguments
  • Loading branch information
bdashore3 authored Dec 22, 2023
2 parents 71f6a58 + 7967607 commit f5314fc
Showing 1 changed file with 55 additions and 7 deletions.
62 changes: 55 additions & 7 deletions TabbyAPI_Colab_Example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
"# @title # Install and download model { display-mode: \"form\" }\n",
"# @markdown ---\n",
"# @markdown Select model:\n",
"repo_id = \"royallab/Noromaid-13b-v0.1.1-exl2\" # @param {type:\"string\"}\n",
"revision = \"4bpw\" # @param {type:\"string\"}\n",
"repo_id = \"Kooten/FlatOrcamaid-13b-v0.2-5bpw-exl2\" # @param {type:\"string\"}\n",
"revision = \"main\" # @param {type:\"string\"}\n",
"if revision == \"\": revision = \"main\"\n",
"# @markdown ---\n",
"# @markdown Select draft model (optional, for speculative decoding):\n",
Expand Down Expand Up @@ -92,8 +92,11 @@
"# @markdown ---\n",
"# @markdown Model parameters:\n",
"ContextSize = 4096 # @param {type:\"integer\"}\n",
"OverrideBaseSeqLen = 4096 # @param {type:\"integer\"}\n",
"RopeScale = 1.0 # @param {type:\"number\"}\n",
"RopeAlpha = 1.0 # @param {type:\"number\"}\n",
"NumExpertsPerToken = 2 # @param {type:\"integer\"}\n",
"PromptTemplate = \"\" # @param {type:\"string\"}\n",
"# @markdown ---\n",
"# @markdown Draft model parameters (optional, for speculative decoding):\n",
"DraftRopeScale = 1.0 # @param {type:\"number\"}\n",
Expand All @@ -102,10 +105,15 @@
"# @markdown Lora parameters (optional, for loras):\n",
"LoraScaling = 1.0 # @param {type:\"number\"}\n",
"# @markdown ---\n",
"# @markdown Logging options:\n",
"PromptLogging = False # @param {type:\"boolean\"}\n",
"GenParamLogging = False # @param {type:\"boolean\"}\n",
"# @markdown ---\n",
"# @markdown Misc options:\n",
"CacheMode = \"FP16\" # @param [\"FP8\", \"FP16\"] {type:\"string\"}\n",
"UseDummyModels = False # @param {type:\"boolean\"}\n",
"NoFlashAttention = False # @param {type:\"boolean\"}\n",
"DisableAuth = False # @param {type:\"boolean\"}\n",
"# @markdown ---\n",
"# @markdown To connect, make note of the cloudflared URL and your auto-generated API key after launching and provide it to your preferred frontend.\n",
"\n",
Expand All @@ -128,6 +136,19 @@
" # The port to host on (default: 5000)\n",
" port: 5000\n",
"\n",
" # Disable HTTP token authenticaion with requests\n",
" # WARNING: This will make your instance vulnerable!\n",
" # Turn on this option if you are ONLY connecting from localhost\n",
" disable_auth: {DisableAuth}\n",
"\n",
"# Options for logging\n",
"logging:\n",
" # Enable prompt logging (default: False)\n",
" prompt: {PromptLogging}\n",
"\n",
" # Enable generation parameter logging (default: False)\n",
" generation_params: {GenParamLogging}\n",
"\n",
"# Options for model overrides and loading\n",
"model:\n",
" # Overrides the directory to look for models (default: models)\n",
Expand All @@ -144,17 +165,29 @@
"\n",
" # The below parameters apply only if model_name is set\n",
"\n",
" # Maximum model context length (default: 4096)\n",
" # Max sequence length (default: None)\n",
" # Fetched from the model's base sequence length in config.json by default\n",
" max_seq_len: {ContextSize}\n",
"\n",
" # Overrides base model context length (default: None)\n",
" # WARNING: Don't set this unless you know what you're doing!\n",
" # Only use this if the model's base sequence length in config.json is incorrect (ex. Mistral/Mixtral models)\n",
" override_base_seq_len: {OverrideBaseSeqLen}\n",
"\n",
" # Automatically allocate resources to GPUs (default: True)\n",
" gpu_split_auto: True\n",
"\n",
" # An integer array of GBs of vram to split between GPUs (default: [])\n",
" # gpu_split: [20.6, 24]\n",
"\n",
" # Rope scaling parameters (default: 1.0)\n",
" # Rope scale (default: 1.0)\n",
" # Same thing as compress_pos_emb\n",
" # Only use if your model was trained on long context with rope (check config.json)\n",
" rope_scale: {RopeScale}\n",
"\n",
" # Rope scale (default: 1.0)\n",
" # Same thing as compress_pos_emb\n",
" # Only use if your model was trained on long context with rope (check config.json)\n",
" rope_alpha: {RopeAlpha}\n",
"\n",
" # Disable Flash-attention 2. Set to True for GPUs lower than Nvidia's 3000 series. (default: False)\n",
Expand All @@ -163,17 +196,32 @@
" # Enable 8 bit cache mode for VRAM savings (slight performance hit). Possible values FP16, FP8. (default: FP16)\n",
" cache_mode: {CacheMode}\n",
"\n",
" # Set the prompt template for this model. If empty, chat completions will be disabled. (default: None)\n",
" # NOTE: Only works with chat completion message lists!\n",
" prompt_template: {PromptTemplate}\n",
"\n",
" # Number of experts to use per token. Loads from the model's config.json if not specified (default: None)\n",
" # WARNING: Don't set this unless you know what you're doing!\n",
" # NOTE: For MoE models (ex. Mixtral) only!\n",
" num_experts_per_token: {NumExpertsPerToken}\n",
"\n",
" # Options for draft models (speculative decoding). This will use more VRAM!\n",
" draft:\n",
" # Overrides the directory to look for draft (default: models)\n",
" draft_model_dir: models\n",
"\n",
" # An initial draft model to load. Make sure this model is located in the model directory!\n",
" # A draft model can be loaded later via the API.\n",
" draft_model_name: {draft_model}\n",
" #draft_model_name: {draft_model}\n",
"\n",
" # Rope parameters for draft models (default: 1.0)\n",
" # Rope scale for draft models (default: 1.0)\n",
" # Same thing as compress_pos_emb\n",
" # Only use if your draft model was trained on long context with rope (check config.json)\n",
" draft_rope_scale: {DraftRopeScale}\n",
"\n",
" # Rope alpha for draft model (default: 1.0)\n",
" # Same thing as alpha_value\n",
" # Leave blank to automatically calculate alpha value\n",
" draft_rope_alpha: {DraftRopeAlpha}\n",
"\n",
" # Options for loras\n",
Expand Down Expand Up @@ -212,4 +260,4 @@
},
"nbformat": 4,
"nbformat_minor": 0
}
}

0 comments on commit f5314fc

Please sign in to comment.