Update rag generation to support multiple models

subquery · Dec 17, 2024 · 4012d77 · 4012d77
1 parent a79548f
commit 4012d77
Show file tree

Hide file tree

Showing 7 changed files with 105 additions and 40 deletions.
diff --git a/src/context/context.ts b/src/context/context.ts
@@ -9,7 +9,7 @@ import * as lancedb from "@lancedb/lancedb";
 
 const logger = await getLogger("ToolContext");
 
-type GetEmbedding = (input: string | string[]) => Promise<number[]>;
+type GetEmbedding = (input: string /*  | string[] */) => Promise<number[]>;
 
 export class Context implements IContext {
   #getEmbedding: GetEmbedding;

diff --git a/src/embeddings/generator/generator.ts b/src/embeddings/generator/generator.ts
@@ -2,9 +2,8 @@ import {
   type BaseEmbeddingSource,
   MarkdownEmbeddingSource,
 } from "./mdSource.ts";
-import ollama from "ollama";
 import { glob } from "glob";
-import { LanceWriter } from "../lance/index.ts";
+import { type GenerateEmbedding, LanceWriter } from "../lance/index.ts";
 import { getLogger } from "../../logger.ts";
 import { getSpinner } from "../../util.ts";
 
@@ -20,8 +19,8 @@ export async function generate(
   path: string,
   lanceDbPath: string,
   tableName: string,
+  generateEmbedding: GenerateEmbedding,
   ignoredPaths = DEFAULT_IGNORED_PATHS,
-  model = "nomic-embed-text",
   overwrite = false,
 ) {
   const embeddingSources: BaseEmbeddingSource[] =
@@ -37,8 +36,7 @@ export async function generate(
   const lanceWriter = await LanceWriter.createNewTable(
     lanceDbPath,
     tableName,
-    ollama,
-    model,
+    generateEmbedding,
     overwrite,
   );
 

diff --git a/src/embeddings/lance/writer.ts b/src/embeddings/lance/writer.ts
@@ -1,12 +1,14 @@
 import * as lancedb from "@lancedb/lancedb";
 import { Field, FixedSizeList, Float64, Schema, Utf8 } from "apache-arrow";
 import type { IEmbeddingWriter } from "../embeddings.ts";
-import ollama, { type Ollama } from "ollama";
+
+export type GenerateEmbedding = (
+  input: string | string[],
+) => Promise<number[][]>;
 
 export class LanceWriter implements IEmbeddingWriter {
   #table: lancedb.Table;
-  #model: Ollama;
-  #embedModel: string;
+  #generateEmbedding: GenerateEmbedding;
 
   static #dim = 768;
 
@@ -21,19 +23,16 @@ export class LanceWriter implements IEmbeddingWriter {
 
   constructor(
     table: lancedb.Table,
-    model: Ollama,
-    embedModel = "nomic-embed-text",
+    generateEmbedding: GenerateEmbedding,
   ) {
     this.#table = table;
-    this.#model = model;
-    this.#embedModel = embedModel;
+    this.#generateEmbedding = generateEmbedding;
   }
 
   static async createNewTable(
     dbPath: string,
     tableName: string,
-    model: Ollama = ollama,
-    embedModel = "nomic-embed-text",
+    generateEmbedding: GenerateEmbedding,
     overwrite = false,
   ): Promise<LanceWriter> {
     const db = await lancedb.connect(dbPath);
@@ -44,14 +43,11 @@ export class LanceWriter implements IEmbeddingWriter {
       { mode: overwrite ? "overwrite" : "create" },
     );
 
-    return new LanceWriter(table, model, embedModel);
+    return new LanceWriter(table, generateEmbedding);
   }
 
   async write(input: string | string[]): Promise<void> {
-    const { embeddings } = await this.#model.embed({
-      model: this.#embedModel,
-      input,
-    });
+    const embeddings = await this.#generateEmbedding(input);
 
     const inputArr = Array.isArray(input) ? input : [input];
     const data = inputArr.map((input, idx) => {

diff --git a/src/index.ts b/src/index.ts
@@ -21,6 +21,7 @@ import ora from "ora";
 import { getPrompt, getVersion, setSpinner } from "./util.ts";
 import { initLogger } from "./logger.ts";
 import { DEFAULT_LLM_HOST, DEFAULT_PORT } from "./constants.ts";
+import { getGenerateFunction } from "./runners/runner.ts";
 
 const sharedArgs = {
   project: {
@@ -46,6 +47,21 @@ const sharedArgs = {
   },
 } satisfies Record<string, Options>;
 
+const llmHostArgs = {
+  host: {
+    alias: "h",
+    description:
+      "The LLM RPC host. If the project model uses ChatGPT then the default value is not used.",
+    default: DEFAULT_LLM_HOST,
+    type: "string",
+  },
+  openAiApiKey: {
+    description:
+      "If the project models use OpenAI models, then this api key will be parsed on to the OpenAI client",
+    type: "string",
+  },
+} satisfies Record<string, Options>;
+
 const debugArgs = {
   debug: {
     description: "Enable debug logging",
@@ -78,13 +94,7 @@ yargs(Deno.args)
     {
       ...sharedArgs,
       ...debugArgs,
-      host: {
-        alias: "h",
-        description:
-          "The LLM RPC host. If the project model uses ChatGPT then the default value is not used.",
-        default: DEFAULT_LLM_HOST,
-        type: "string",
-      },
+      ...llmHostArgs,
       interface: {
         alias: "i",
         description: "The interface to interact with the app",
@@ -116,11 +126,6 @@ yargs(Deno.args)
         type: "number",
         default: 5_000, // 5s
       },
-      openAiApiKey: {
-        description:
-          "If the project models use OpenAI models, then this api key will be parsed on to the OpenAI client",
-        type: "string",
-      },
     },
     async (argv) => {
       try {
@@ -181,6 +186,7 @@ yargs(Deno.args)
     "Creates a Lance db table with embeddings from MDX files",
     {
       ...debugArgs,
+      ...llmHostArgs,
       input: {
         alias: "i",
         description: "Path to a directory containing MD or MDX files",
@@ -208,7 +214,7 @@ yargs(Deno.args)
       model: {
         description:
           "The embedding LLM model to use, this should be the same as embeddingsModel in your app manifest",
-        default: "nomic-embed-text",
+        required: true,
         type: "string",
       },
       overwrite: {
@@ -226,12 +232,18 @@ yargs(Deno.args)
         const { generate } = await import(
           "./embeddings/generator/generator.ts"
         );
+
+        const generateFunction = await getGenerateFunction(
+          argv.host,
+          argv.model,
+          argv.openAiApiKey,
+        );
         return await generate(
           resolve(argv.input),
           resolve(argv.output),
           argv.table,
+          generateFunction,
           argv.ignoredFiles?.map((f) => resolve(f)),
-          argv.model,
           argv.overwrite,
         );
       } catch (e) {

diff --git a/src/runners/ollama.ts b/src/runners/ollama.ts
@@ -49,16 +49,28 @@ export class OllamaRunnerFactory implements IRunnerFactory {
       await ollama.show({ model: sandbox.manifest.embeddingsModel });
     }
 
-    return new OllamaRunnerFactory(ollama, sandbox, loader);
+    const factory = new OllamaRunnerFactory(ollama, sandbox, loader);
+
+    // Makes sure vectorStorage is loaded
+    await factory.getContext();
+
+    return factory;
   }
 
-  async runEmbedding(input: string | string[]): Promise<number[]> {
-    const { embeddings: [embedding] } = await this.#ollama.embed({
+  async runEmbedding(input: string): Promise<number[]> {
+    const { embeddings: [embed] } = await this.#ollama.embed({
       model: this.#sandbox.manifest.embeddingsModel ?? "nomic-embed-text",
       input,
     });
 
-    return embedding;
+    return embed;
+
+    // const { embedding } = await this.#ollama.embeddings({
+    //   model: this.#sandbox.manifest.embeddingsModel ?? "nomic-embed-text",
+    //   prompt: input,
+    // });
+
+    // return embedding;
   }
 
   @Memoize()

diff --git a/src/runners/openai.ts b/src/runners/openai.ts
@@ -52,11 +52,16 @@ export class OpenAIRunnerFactory implements IRunnerFactory {
       await openai.models.retrieve(sandbox.manifest.embeddingsModel);
     }
 
-    return new OpenAIRunnerFactory(
+    const factory = new OpenAIRunnerFactory(
       openai,
       sandbox,
       loader,
     );
+
+    // Makes sure vector storage is loaded
+    await factory.getContext();
+
+    return factory;
   }
 
   async runEmbedding(input: string | string[]): Promise<number[]> {

diff --git a/src/runners/runner.ts b/src/runners/runner.ts
@@ -1,5 +1,7 @@
-import type { ChatResponse, Message } from "ollama";
+import { type ChatResponse, type Message, Ollama } from "ollama";
 import type { IChatStorage } from "../chatStorage/index.ts";
+import type { GenerateEmbedding } from "../embeddings/lance/writer.ts";
+import OpenAI from "openai";
 
 export interface IRunner {
   prompt(message: string): Promise<string>;
@@ -9,3 +11,43 @@ export interface IRunner {
 export interface IRunnerFactory {
   getRunner(chatStorage: IChatStorage): Promise<IRunner>;
 }
+
+export async function getGenerateFunction(
+  endpoint: string,
+  model: string,
+  apiKey?: string,
+): Promise<GenerateEmbedding> {
+  try {
+    const ollama = new Ollama({ host: endpoint });
+
+    // If this throws then try OpenAI
+    await ollama.show({ model });
+
+    return async (input: string | string[]) => {
+      const { embeddings } = await ollama.embed({ model, input });
+      return embeddings;
+    };
+  } catch (ollamaError) {
+    try {
+      const openai = new OpenAI({
+        apiKey,
+        baseURL: endpoint,
+      });
+
+      await openai.models.retrieve(model);
+
+      return async (input: string | string[]) => {
+        const { data } = await openai.embeddings.create({
+          model,
+          input,
+        });
+
+        return data.map((d) => d.embedding);
+      };
+    } catch (openAIError) {
+      throw new Error(`Unable to find model: ${model}.
+        Ollama error: ${ollamaError}
+        Openai error: ${openAIError}`);
+    }
+  }
+}