Cleanup and prepare PR for the new providers

nvms · Aug 30, 2023 · 0cd66c8 · 0cd66c8
1 parent 17cb1d7
commit 0cd66c8
Show file tree

Hide file tree

Showing 8 changed files with 60 additions and 21 deletions.
diff --git a/package.json b/package.json
@@ -89,16 +89,6 @@
           "default": 60000,
           "description": "Request timeout in milliseconds"
         },
-        "wingman.defaultCtx": {
-          "type": "number",
-          "default": 2048,
-          "description": "Default context window length for the model (a fallback if the command does not specify)"
-        },
-        "wingman.defaultTemplate": {
-          "type": "string",
-          "default": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n### User:\n{prompt}\n\n### Assistant:",
-          "description": "Default template (a fallback if the command does not specify)"
-        },
         "wingman.anthropic.model": {
           "type": "string",
           "default": "claude-instant-v1",
@@ -360,6 +350,7 @@
     "cheerio": "1.0.0-rc.12",
     "fast-glob": "^3.2.12",
     "fetch": "^1.1.0",
+    "llama-tokenizer-js": "^1.1.3",
     "node-fetch": "^3.3.1"
   }
 }
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/src/extension.ts b/src/extension.ts
@@ -46,7 +46,7 @@ function createCommandMap(templates: Command[]) {
   const allCommands = templates.map((template) => {
     return {
       ...template,
-      provider: template.provider ?? AIProvider.KoboldCpp,
+      provider: template.provider ?? AIProvider.OpenAI,
       command: template.command ? `wingman.command.${template.command}` : `wingman.command.${generateCommandName(template)}-${randomString()}`,
       category: template.category ?? BuiltinCategory.Misc,
     };

diff --git a/src/providers/goinfer.ts b/src/providers/goinfer.ts
@@ -2,10 +2,10 @@
 import * as vscode from "vscode";
 
 import { type PostableViewProvider, type ProviderResponse, type Provider } from ".";
-import { Client, type InferParams, type InferResult, type StreamedMessage } from "./sdks/goinfer";
+import { Client, type InferParams, type InferResult, type StreamedMessage, DEFAULT_CTX, DEFAULT_TEMPLATE } from "./sdks/goinfer";
 import { type Command } from "../templates/render";
 import { handleResponseCallbackType } from "../templates/runner";
-import { displayWarning, getConfig, getSecret, getSelectionInfo, setSecret, unsetConfig } from "../utils";
+import { displayWarning, getConfig, getSecret, getSelectionInfo, llamaMaxTokens, setSecret, unsetConfig } from "../utils";
 
 let lastMessage: string | undefined;
 let lastTemplate: Command | undefined;
@@ -84,16 +84,17 @@ export class GoinferProvider implements Provider {
       prompt = `${this.conversationTextHistory ?? ""}${message}`;
     }
 
-    const modelTemplate = template?.completionParams?.template ?? (getConfig("defaultTemplate") as string);
+    const modelTemplate = template?.completionParams?.template ?? DEFAULT_TEMPLATE;
     const samplingParameters: InferParams = {
       prompt,
       template: modelTemplate.replace("{system}", systemMessage),
       ...template?.completionParams,
       temperature: template?.completionParams?.temperature ?? (getConfig("openai.temperature") as number),
       model: {
         name: template?.completionParams?.model ?? (getConfig("openai.model") as string) ?? "llama2",
-        ctx: template?.completionParams?.ctx ?? (getConfig("defaultCtx") as number) ?? 2048,
+        ctx: template?.completionParams?.ctx ?? DEFAULT_CTX,
       },
+      n_predict: llamaMaxTokens(prompt, DEFAULT_CTX),
     };
 
     try {

diff --git a/src/providers/koboldcpp.ts b/src/providers/koboldcpp.ts
@@ -2,10 +2,10 @@
 import * as vscode from "vscode";
 
 import { type PostableViewProvider, type ProviderResponse, type Provider } from ".";
-import { Client, type KoboldInferParams } from "./sdks/koboldcpp";
+import { Client, DEFAULT_TEMPLATE, DEFAULT_CTX, type KoboldInferParams } from "./sdks/koboldcpp";
 import { type Command } from "../templates/render";
 import { handleResponseCallbackType } from "../templates/runner";
-import { displayWarning, getConfig, getSelectionInfo } from "../utils";
+import { displayWarning, formatPrompt, getConfig, getSelectionInfo, llamaMaxTokens } from "../utils";
 
 let lastMessage: string | undefined;
 let lastTemplate: Command | undefined;
@@ -75,13 +75,14 @@ export class KoboldcppProvider implements Provider {
       prompt = `${this.conversationTextHistory ?? ""}${message}`;
     }
 
-    const modelTemplate = template?.completionParams?.template ?? (getConfig("defaultTemplate") as string);
+    const modelTemplate = template?.completionParams?.template ?? DEFAULT_TEMPLATE;
     const samplingParameters: KoboldInferParams = {
-      prompt: modelTemplate.replace("{system}", systemMessage).replace("{prompt}", prompt),
+      prompt: formatPrompt(prompt, modelTemplate, systemMessage),
       ...template?.completionParams,
       temperature: template?.completionParams?.temperature ?? (getConfig("openai.temperature") as number),
-      max_length: 512,
+      max_length: llamaMaxTokens(prompt, DEFAULT_CTX),
     };
+    console.log("Params", samplingParameters);
 
     try {
       this.viewProvider?.postMessage({ type: "requestMessage", value: message });

diff --git a/src/providers/sdks/goinfer.ts b/src/providers/sdks/goinfer.ts
@@ -56,6 +56,8 @@ export type OnOpen = (response: NodeFetchResponse) => void | Promise<void>;
 export type OnUpdate = (completion: StreamedMessage) => void | Promise<void>;
 
 const DEFAULT_API_URL = "https://localhost:5143";
+export const DEFAULT_CTX = 2048;
+export const DEFAULT_TEMPLATE = "{system}\n\n{prompt}";
 
 export class Client {
   private apiUrl: string;

diff --git a/src/providers/sdks/koboldcpp.ts b/src/providers/sdks/koboldcpp.ts
@@ -15,6 +15,8 @@ export type OnOpen = (response: NodeFetchResponse) => void | Promise<void>;
 export type OnUpdate = (completion: string) => void | Promise<void>;
 
 const DEFAULT_API_URL = "https://localhost:5001";
+export const DEFAULT_TEMPLATE = "{system}\n\n{prompt}";
+export const DEFAULT_CTX = 2048;
 
 export class Client {
   private apiUrl: string;
@@ -47,6 +49,8 @@ export class Client {
   completeStream(params: KoboldInferParams, { onOpen, onUpdate, signal }: { onOpen?: OnOpen; onUpdate?: OnUpdate; signal?: AbortSignal }): Promise<void> {
     const abortController = new AbortController();
 
+    console.log("Url", this.apiUrl);
+
     return new Promise((resolve, reject) => {
       signal?.addEventListener("abort", (event) => {
         abortController.abort(event);

diff --git a/src/utils.ts b/src/utils.ts
@@ -2,6 +2,7 @@ import fs from "node:fs";
 import path from "node:path";
 
 import Glob from "fast-glob";
+import llamaTokenizer from "llama-tokenizer-js";
 import * as vscode from "vscode";
 
 import { ExtensionState } from "./extension";
@@ -231,3 +232,12 @@ export const randomString = (): string => {
   }
   return result;
 };
+
+export function llamaMaxTokens(prompt: string, ctx: number) {
+  const n = llamaTokenizer.encode(prompt).length;
+  return ctx - n;
+}
+
+export function formatPrompt(prompt: string, template: string, systemMessage: string) {
+  return template.replace("{system}", systemMessage).replace("{prompt}", prompt);
+}