From c0cff6de7717710f4ec235f039e32a4ded95f5d8 Mon Sep 17 00:00:00 2001
From: Roman Kabanov <white.noise.xl@gmail.com>
Date: Sun, 19 May 2024 16:40:29 +0200
Subject: [PATCH 1/6] Add similarity search batch processing up to current
 CloudFlare limits

---
 tools/similarity_search/src/index.ts | 46 ++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 10 deletions(-)
diff --git a/tools/similarity_search/src/index.ts b/tools/similarity_search/src/index.ts
index dab8e4429..ddc38e52a 100644
--- a/tools/similarity_search/src/index.ts
+++ b/tools/similarity_search/src/index.ts
@@ -4,10 +4,11 @@ type Env = {
   API_KEY_TOKEN_CHECK: string
   AI: Ai
   VECTORIZE_INDEX: VectorizeIndex
+  MAX_INPUT: number
 }
 
 type TextEntry = {
-  text: string
+  text: string | string[]
   namespace: string
 }
 
@@ -31,21 +32,46 @@ app.post("/", async (c) => {
   const data = await c.req.json<TextEntry>()
   const { text, namespace } = data
 
-  if (typeof text !== "string" || typeof namespace !== "string") {
-    return c.text("Invalid JSON format", 400)
+  let texts: string[]
+  if (typeof text === "string") {
+    texts = [text]
+  } else if (Array.isArray(data.text) && data.text.every((element) => typeof element === "string")) {
+    texts = text
+  } else {
+    return c.text("Invalid JSON format, property `text` must be a string or array of strings", 400)
+  }
+  const MAX_INPUT = Number(c.env.MAX_INPUT) || 100
+  if (texts.length > MAX_INPUT) {
+    return c.text(`Too big input, property \`text\` can have max ${MAX_INPUT} items`, 400)
+  }
+  if (typeof namespace !== "string") {
+    return c.text("Invalid JSON format, property `namespace` must be a string", 400)
   }
 
   const modelResp = await c.env.AI.run("@cf/baai/bge-base-en-v1.5", {
-    text: [text]
+    text: texts
   })
-  const vector = modelResp.data[0]
-  const searchResponse = await c.env.VECTORIZE_INDEX.query(vector, {
-    namespace,
-    topK: 1
+
+  let index = 0
+  const requests = texts.map((text) => {
+    const vector = modelResp.data[index++]
+    return c.env.VECTORIZE_INDEX.query(vector, {
+      namespace,
+      topK: 1
+    })
   })
-  const similarityScore = searchResponse.matches[0]?.score || 0
+  const responses = await Promise.all(requests)
+  const similarityScores = []
+  for (const searchResponse of responses) {
+    const similarityScore = searchResponse.matches[0]?.score || 0
+    similarityScores.push(similarityScore)
+  }
 
-  return c.json({ similarity_score: similarityScore })
+  if (typeof text === 'string') {
+    return c.json({ similarity_score: similarityScores[0] })
+  } else {
+    return c.json({ similarity_score: similarityScores })
+  }
 })
 
 export default app

From 6805cd88d7c979f9e9ec80bb7f2fa16f24fa6248 Mon Sep 17 00:00:00 2001
From: Roman Kabanov <white.noise.xl@gmail.com>
Date: Sun, 19 May 2024 23:19:30 +0200
Subject: [PATCH 2/6] Similarity search - add tests for single and batch
 message processing

---
 tools/similarity_search/test/index.spec.ts | 61 ++++++++++++++++++++++
 tools/similarity_search/vitest.config.ts   |  3 +-
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/tools/similarity_search/test/index.spec.ts b/tools/similarity_search/test/index.spec.ts
index 083e7d387..c45c05ebc 100644
--- a/tools/similarity_search/test/index.spec.ts
+++ b/tools/similarity_search/test/index.spec.ts
@@ -20,3 +20,64 @@ describe("Authentication", () => {
     expect(await response.text()).toBe("Unauthorized")
   })
 })
+
+describe("Single message processing", () => {
+  it("returns single scalar result when single scalar text is given", async () => {
+    const response = await SELF.fetch("https://example.com/", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "X-API-Key": "test-api-key",
+      },
+      body: JSON.stringify({
+        text: "Sample text",
+        namespace: "test-namespace"
+      })
+    })
+    expect(response.status).toBe(200)
+    expect(await response.text()).toEqual('{"similarity_score":0.5678}')
+  })
+})
+
+describe("Batch message processing", () => {
+  it ("limits max inputs", async () => {
+    const response = await SELF.fetch("https://example.com/", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "X-API-Key": "test-api-key",
+      },
+      body: JSON.stringify({
+        text: [
+          "This is a story about an orange cloud",
+          "This is a story about a llama",
+          "This is a story about a hugging emoji",
+          "This is a story about overwhelming courage",
+        ],
+        namespace: "test-namespace"
+      })
+    })
+    expect(response.status).toBe(400)
+    expect(await response.text()).toEqual("Too big input, property `text` can have max 3 items")
+  })
+
+  it ("returns array results when multiple texts are given", async () => {
+    const response = await SELF.fetch("https://example.com/", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "X-API-Key": "test-api-key",
+      },
+      body: JSON.stringify({
+        text: [
+          "This is a story about an orange cloud",
+          "This is a story about a llama",
+          "This is a story about a hugging emoji"
+        ],
+        namespace: "test-namespace"
+      })
+    })
+    expect(response.status).toBe(200)
+    expect(await response.text()).toEqual('{"similarity_score":[0.5678,0.5678,0.5678]}')
+  })
+})
diff --git a/tools/similarity_search/vitest.config.ts b/tools/similarity_search/vitest.config.ts
index f5fcd9922..177628298 100644
--- a/tools/similarity_search/vitest.config.ts
+++ b/tools/similarity_search/vitest.config.ts
@@ -9,7 +9,8 @@ export default defineWorkersConfig({
         },
         miniflare: {
           bindings: {
-            API_KEY_TOKEN_CHECK: "test-api-key"
+            API_KEY_TOKEN_CHECK: "test-api-key",
+            MAX_INPUT: 3
           },
           wrappedBindings: {
             AI: {

From 691f8473b8eae65e108fa2071ec96f728b61a725 Mon Sep 17 00:00:00 2001
From: Roman Kabanov <white.noise.xl@gmail.com>
Date: Sun, 19 May 2024 23:39:26 +0200
Subject: [PATCH 3/6] Similarity search - add Prettier config, reformat src and
 test

---
 tools/similarity_search/.prettierrc.json   |  6 ++++
 tools/similarity_search/package.json       |  1 +
 tools/similarity_search/src/index.ts       | 26 +++++++++++++-----
 tools/similarity_search/test/index.spec.ts | 32 ++++++++++++----------
 4 files changed, 44 insertions(+), 21 deletions(-)
 create mode 100644 tools/similarity_search/.prettierrc.json

diff --git a/tools/similarity_search/.prettierrc.json b/tools/similarity_search/.prettierrc.json
new file mode 100644
index 000000000..10a9fde47
--- /dev/null
+++ b/tools/similarity_search/.prettierrc.json
@@ -0,0 +1,6 @@
+{
+  "trailingComma": "all",
+  "tabWidth": 2,
+  "semi": false,
+  "singleQuote": false
+}
diff --git a/tools/similarity_search/package.json b/tools/similarity_search/package.json
index c187f2503..16f39988c 100644
--- a/tools/similarity_search/package.json
+++ b/tools/similarity_search/package.json
@@ -11,6 +11,7 @@
   },
   "devDependencies": {
     "@cloudflare/workers-types": "^4.20240403.0",
+    "prettier": "^3.2.5",
     "wrangler": "^3.47.0"
   }
 }
diff --git a/tools/similarity_search/src/index.ts b/tools/similarity_search/src/index.ts
index ddc38e52a..37af0578e 100644
--- a/tools/similarity_search/src/index.ts
+++ b/tools/similarity_search/src/index.ts
@@ -35,21 +35,33 @@ app.post("/", async (c) => {
   let texts: string[]
   if (typeof text === "string") {
     texts = [text]
-  } else if (Array.isArray(data.text) && data.text.every((element) => typeof element === "string")) {
+  } else if (
+    Array.isArray(data.text) &&
+    data.text.every((element) => typeof element === "string")
+  ) {
     texts = text
   } else {
-    return c.text("Invalid JSON format, property `text` must be a string or array of strings", 400)
+    return c.text(
+      "Invalid JSON format, property `text` must be a string or array of strings",
+      400,
+    )
   }
   const MAX_INPUT = Number(c.env.MAX_INPUT) || 100
   if (texts.length > MAX_INPUT) {
-    return c.text(`Too big input, property \`text\` can have max ${MAX_INPUT} items`, 400)
+    return c.text(
+      `Too big input, property \`text\` can have max ${MAX_INPUT} items`,
+      400,
+    )
   }
   if (typeof namespace !== "string") {
-    return c.text("Invalid JSON format, property `namespace` must be a string", 400)
+    return c.text(
+      "Invalid JSON format, property `namespace` must be a string",
+      400,
+    )
   }
 
   const modelResp = await c.env.AI.run("@cf/baai/bge-base-en-v1.5", {
-    text: texts
+    text: texts,
   })
 
   let index = 0
@@ -57,7 +69,7 @@ app.post("/", async (c) => {
     const vector = modelResp.data[index++]
     return c.env.VECTORIZE_INDEX.query(vector, {
       namespace,
-      topK: 1
+      topK: 1,
     })
   })
   const responses = await Promise.all(requests)
@@ -67,7 +79,7 @@ app.post("/", async (c) => {
     similarityScores.push(similarityScore)
   }
 
-  if (typeof text === 'string') {
+  if (typeof text === "string") {
     return c.json({ similarity_score: similarityScores[0] })
   } else {
     return c.json({ similarity_score: similarityScores })
diff --git a/tools/similarity_search/test/index.spec.ts b/tools/similarity_search/test/index.spec.ts
index c45c05ebc..247bcb830 100644
--- a/tools/similarity_search/test/index.spec.ts
+++ b/tools/similarity_search/test/index.spec.ts
@@ -8,12 +8,12 @@ describe("Authentication", () => {
     const response = await SELF.fetch("https://example.com/", {
       method: "POST",
       headers: {
-        "Content-Type": "application/json"
+        "Content-Type": "application/json",
       },
       body: JSON.stringify({
         text: "Sample text",
-        namespace: "test-namespace"
-      })
+        namespace: "test-namespace",
+      }),
     })
 
     expect(response.status).toBe(401)
@@ -31,8 +31,8 @@ describe("Single message processing", () => {
       },
       body: JSON.stringify({
         text: "Sample text",
-        namespace: "test-namespace"
-      })
+        namespace: "test-namespace",
+      }),
     })
     expect(response.status).toBe(200)
     expect(await response.text()).toEqual('{"similarity_score":0.5678}')
@@ -40,7 +40,7 @@ describe("Single message processing", () => {
 })
 
 describe("Batch message processing", () => {
-  it ("limits max inputs", async () => {
+  it("limits max inputs", async () => {
     const response = await SELF.fetch("https://example.com/", {
       method: "POST",
       headers: {
@@ -54,14 +54,16 @@ describe("Batch message processing", () => {
           "This is a story about a hugging emoji",
           "This is a story about overwhelming courage",
         ],
-        namespace: "test-namespace"
-      })
+        namespace: "test-namespace",
+      }),
     })
     expect(response.status).toBe(400)
-    expect(await response.text()).toEqual("Too big input, property `text` can have max 3 items")
+    expect(await response.text()).toEqual(
+      "Too big input, property `text` can have max 3 items",
+    )
   })
 
-  it ("returns array results when multiple texts are given", async () => {
+  it("returns array results when multiple texts are given", async () => {
     const response = await SELF.fetch("https://example.com/", {
       method: "POST",
       headers: {
@@ -72,12 +74,14 @@ describe("Batch message processing", () => {
         text: [
           "This is a story about an orange cloud",
           "This is a story about a llama",
-          "This is a story about a hugging emoji"
+          "This is a story about a hugging emoji",
         ],
-        namespace: "test-namespace"
-      })
+        namespace: "test-namespace",
+      }),
     })
     expect(response.status).toBe(200)
-    expect(await response.text()).toEqual('{"similarity_score":[0.5678,0.5678,0.5678]}')
+    expect(await response.text()).toEqual(
+      '{"similarity_score":[0.5678,0.5678,0.5678]}',
+    )
   })
 })

From de7261fb8233798f09b392e29e75e21110753c53 Mon Sep 17 00:00:00 2001
From: Roman Kabanov <white.noise.xl@gmail.com>
Date: Mon, 20 May 2024 20:44:53 +0200
Subject: [PATCH 4/6] Similarity search - run only unique texts

---
 tools/similarity_search/src/env.d.ts       |  6 ++++
 tools/similarity_search/src/index.ts       | 25 +++++++++------
 tools/similarity_search/test/env.d.ts      |  3 ++
 tools/similarity_search/test/index.spec.ts | 36 ++++++++++++++++++++--
 tools/similarity_search/test/tsconfig.json | 20 ++++++------
 5 files changed, 68 insertions(+), 22 deletions(-)
 create mode 100644 tools/similarity_search/src/env.d.ts
 create mode 100644 tools/similarity_search/test/env.d.ts

diff --git a/tools/similarity_search/src/env.d.ts b/tools/similarity_search/src/env.d.ts
new file mode 100644
index 000000000..642cc11ca
--- /dev/null
+++ b/tools/similarity_search/src/env.d.ts
@@ -0,0 +1,6 @@
+type Env = {
+  API_KEY_TOKEN_CHECK: string
+  AI: Ai
+  VECTORIZE_INDEX: VectorizeIndex
+  MAX_INPUT: number
+}
diff --git a/tools/similarity_search/src/index.ts b/tools/similarity_search/src/index.ts
index 37af0578e..b1825af5e 100644
--- a/tools/similarity_search/src/index.ts
+++ b/tools/similarity_search/src/index.ts
@@ -1,12 +1,5 @@
 import { Hono } from "hono"
 
-type Env = {
-  API_KEY_TOKEN_CHECK: string
-  AI: Ai
-  VECTORIZE_INDEX: VectorizeIndex
-  MAX_INPUT: number
-}
-
 type TextEntry = {
   text: string | string[]
   namespace: string
@@ -60,12 +53,17 @@ app.post("/", async (c) => {
     )
   }
 
+  const uniqueScores = new Map(texts.map((text) => [text, 0.0]))
+  const uniqueTexts = Array.from(uniqueScores.keys())
+
+  // transform texts into unique vectors
   const modelResp = await c.env.AI.run("@cf/baai/bge-base-en-v1.5", {
-    text: texts,
+    text: uniqueTexts,
   })
 
+  // query unique vectors in vector database
   let index = 0
-  const requests = texts.map((text) => {
+  const requests = uniqueTexts.map((text) => {
     const vector = modelResp.data[index++]
     return c.env.VECTORIZE_INDEX.query(vector, {
       namespace,
@@ -73,9 +71,16 @@ app.post("/", async (c) => {
     })
   })
   const responses = await Promise.all(requests)
-  const similarityScores = []
+  index = 0
   for (const searchResponse of responses) {
     const similarityScore = searchResponse.matches[0]?.score || 0
+    uniqueScores.set(uniqueTexts[index++], similarityScore)
+  }
+
+  // assign results to original (possibly duplicate) texts
+  const similarityScores = []
+  for (const text of texts) {
+    const similarityScore = uniqueScores.get(text) || 0
     similarityScores.push(similarityScore)
   }
 
diff --git a/tools/similarity_search/test/env.d.ts b/tools/similarity_search/test/env.d.ts
new file mode 100644
index 000000000..a35f483c0
--- /dev/null
+++ b/tools/similarity_search/test/env.d.ts
@@ -0,0 +1,3 @@
+declare module "cloudflare:test" {
+  interface ProvidedEnv extends Env {}
+}
diff --git a/tools/similarity_search/test/index.spec.ts b/tools/similarity_search/test/index.spec.ts
index 247bcb830..1c00f38f7 100644
--- a/tools/similarity_search/test/index.spec.ts
+++ b/tools/similarity_search/test/index.spec.ts
@@ -1,5 +1,5 @@
-import { SELF } from "cloudflare:test"
-import { describe, it, expect } from "vitest"
+import { SELF, env } from "cloudflare:test"
+import { describe, it, expect, vi } from "vitest"
 
 import "../src/index"
 
@@ -84,4 +84,36 @@ describe("Batch message processing", () => {
       '{"similarity_score":[0.5678,0.5678,0.5678]}',
     )
   })
+
+  it("runs only unique inputs for duplicate-heavy use cases", async () => {
+    const runSpy = vi.spyOn(env.AI, "run")
+    const querySpy = vi.spyOn(env.VECTORIZE_INDEX, "query")
+
+    const response = await SELF.fetch("https://example.com/", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "X-API-Key": "test-api-key",
+      },
+      body: JSON.stringify({
+        text: [
+          "This is a story about an orange cloud",
+          "This is a story about a llama",
+          "This is a story about an orange cloud",
+        ],
+        namespace: "test-namespace",
+      }),
+    })
+    expect(response.status).toBe(200)
+    expect(await response.text()).toEqual(
+      '{"similarity_score":[0.5678,0.5678,0.5678]}',
+    )
+    expect(runSpy).toHaveBeenCalledWith("@cf/baai/bge-base-en-v1.5", {
+      text: [
+        "This is a story about an orange cloud",
+        "This is a story about a llama",
+      ],
+    })
+    expect(querySpy).toHaveBeenCalledTimes(2)
+  })
 })
diff --git a/tools/similarity_search/test/tsconfig.json b/tools/similarity_search/test/tsconfig.json
index cf7721300..21fc1b6ea 100644
--- a/tools/similarity_search/test/tsconfig.json
+++ b/tools/similarity_search/test/tsconfig.json
@@ -1,11 +1,11 @@
 {
-    "extends": "../tsconfig.json",
-    "compilerOptions": {
-      "moduleResolution": "bundler",
-      "types": [
-        "@cloudflare/workers-types/experimental",
-        "@cloudflare/vitest-pool-workers"
-      ]
-    },
-    "include": ["./**/*.ts", "../src/env.d.ts"]
-  }
\ No newline at end of file
+  "extends": "../tsconfig.json",
+  "compilerOptions": {
+    "moduleResolution": "bundler",
+    "types": [
+      "@cloudflare/workers-types/experimental",
+      "@cloudflare/vitest-pool-workers"
+    ]
+  },
+  "include": ["./**/*.ts", "../src/env.d.ts"]
+}

From ea985cceb550565e27be676c43b83ef920cabc95 Mon Sep 17 00:00:00 2001
From: Roman Kabanov <white.noise.xl@gmail.com>
Date: Tue, 28 May 2024 22:48:07 +0200
Subject: [PATCH 5/6] Add AI Gateway caching, CPU limit

---
 tools/similarity_search/src/env.d.ts       |  3 +-
 tools/similarity_search/src/index.ts       | 77 ++++++++++++++--------
 tools/similarity_search/test/index.spec.ts | 32 ---------
 tools/similarity_search/wrangler.toml      |  9 ++-
 4 files changed, 58 insertions(+), 63 deletions(-)

diff --git a/tools/similarity_search/src/env.d.ts b/tools/similarity_search/src/env.d.ts
index 642cc11ca..61f75b2a9 100644
--- a/tools/similarity_search/src/env.d.ts
+++ b/tools/similarity_search/src/env.d.ts
@@ -1,6 +1,7 @@
 type Env = {
+  API_GATEWAY_UNIVERSAL_API: string
   API_KEY_TOKEN_CHECK: string
-  AI: Ai
+  WORKERS_API_KEY: string
   VECTORIZE_INDEX: VectorizeIndex
   MAX_INPUT: number
 }
diff --git a/tools/similarity_search/src/index.ts b/tools/similarity_search/src/index.ts
index b1825af5e..f94372b2f 100644
--- a/tools/similarity_search/src/index.ts
+++ b/tools/similarity_search/src/index.ts
@@ -22,20 +22,37 @@ app.use("*", async (c, next) => {
 })
 
 app.post("/", async (c) => {
-  const data = await c.req.json<TextEntry>()
+  // Format https://gateway.ai.cloudflare.com/v1/{ACCOUNT_ID}/{SLUG}/
+  const apiGatewayUniversalApi = c.env.API_GATEWAY_UNIVERSAL_API
+  if (!apiGatewayUniversalApi) {
+    return c.text("Missing gateway URL", 500)
+  }
+
+  const workersApikey = c.env.WORKERS_API_KEY
+  if (!workersApikey) {
+    return c.text("Missing workers API key", 500)
+  }
+
+  let data: TextEntry
+  try {
+    data = await c.req.json<TextEntry>()
+  } catch (error) {
+    return c.text("Cannot parse JSON input", 400)
+  }
+
   const { text, namespace } = data
 
   let texts: string[]
-  if (typeof text === "string") {
+  if (typeof text === "string" && text.length) {
     texts = [text]
   } else if (
-    Array.isArray(data.text) &&
-    data.text.every((element) => typeof element === "string")
+    Array.isArray(text) &&
+    text.every((element) => typeof element === "string" && element.length)
   ) {
     texts = text
   } else {
     return c.text(
-      "Invalid JSON format, property `text` must be a string or array of strings",
+      "Invalid JSON format, property `text` must be a non-empty string or array of non-empty strings",
       400,
     )
   }
@@ -53,34 +70,36 @@ app.post("/", async (c) => {
     )
   }
 
-  const uniqueScores = new Map(texts.map((text) => [text, 0.0]))
-  const uniqueTexts = Array.from(uniqueScores.keys())
-
-  // transform texts into unique vectors
-  const modelResp = await c.env.AI.run("@cf/baai/bge-base-en-v1.5", {
-    text: uniqueTexts,
-  })
-
-  // query unique vectors in vector database
-  let index = 0
-  const requests = uniqueTexts.map((text) => {
-    const vector = modelResp.data[index++]
-    return c.env.VECTORIZE_INDEX.query(vector, {
-      namespace,
-      topK: 1,
-    })
+  // resolve each text individually to enable cache per request
+  const requests = texts.map((text) => {
+    return fetch(
+      `${apiGatewayUniversalApi}workers-ai/@cf/baai/bge-base-en-v1.5`,
+      {
+        method: "POST",
+        headers: {
+          Authorization: `Bearer ${workersApikey}`,
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({
+          text,
+        }),
+      },
+    )
+      .then((response) => response.json())
+      .then((response) => {
+        console.log("Gateway response", response)
+        // TODO - type
+        const vector = response?.result?.data?.[0]
+        return c.env.VECTORIZE_INDEX.query(vector, {
+          namespace,
+          topK: 1,
+        })
+      })
   })
   const responses = await Promise.all(requests)
-  index = 0
+  const similarityScores = []
   for (const searchResponse of responses) {
     const similarityScore = searchResponse.matches[0]?.score || 0
-    uniqueScores.set(uniqueTexts[index++], similarityScore)
-  }
-
-  // assign results to original (possibly duplicate) texts
-  const similarityScores = []
-  for (const text of texts) {
-    const similarityScore = uniqueScores.get(text) || 0
     similarityScores.push(similarityScore)
   }
 
diff --git a/tools/similarity_search/test/index.spec.ts b/tools/similarity_search/test/index.spec.ts
index 1c00f38f7..08f90d4d0 100644
--- a/tools/similarity_search/test/index.spec.ts
+++ b/tools/similarity_search/test/index.spec.ts
@@ -84,36 +84,4 @@ describe("Batch message processing", () => {
       '{"similarity_score":[0.5678,0.5678,0.5678]}',
     )
   })
-
-  it("runs only unique inputs for duplicate-heavy use cases", async () => {
-    const runSpy = vi.spyOn(env.AI, "run")
-    const querySpy = vi.spyOn(env.VECTORIZE_INDEX, "query")
-
-    const response = await SELF.fetch("https://example.com/", {
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        "X-API-Key": "test-api-key",
-      },
-      body: JSON.stringify({
-        text: [
-          "This is a story about an orange cloud",
-          "This is a story about a llama",
-          "This is a story about an orange cloud",
-        ],
-        namespace: "test-namespace",
-      }),
-    })
-    expect(response.status).toBe(200)
-    expect(await response.text()).toEqual(
-      '{"similarity_score":[0.5678,0.5678,0.5678]}',
-    )
-    expect(runSpy).toHaveBeenCalledWith("@cf/baai/bge-base-en-v1.5", {
-      text: [
-        "This is a story about an orange cloud",
-        "This is a story about a llama",
-      ],
-    })
-    expect(querySpy).toHaveBeenCalledTimes(2)
-  })
 })
diff --git a/tools/similarity_search/wrangler.toml b/tools/similarity_search/wrangler.toml
index a4cd0ee7d..25ab5c24a 100644
--- a/tools/similarity_search/wrangler.toml
+++ b/tools/similarity_search/wrangler.toml
@@ -20,4 +20,11 @@ compatibility_flags = ["nodejs_compat"]
 # database_id = ""
 
 # [ai]
-# binding = "AI"
\ No newline at end of file
+# binding = "AI"
+
+[[vectorize]]
+binding = "VECTORIZE_INDEX"
+index_name = "embeddings-index"
+
+[limits]
+cpu_ms = 50

From b145f87ec4b803ac355c632b1f1e35edad5ced97 Mon Sep 17 00:00:00 2001
From: Roman Kabanov <white.noise.xl@gmail.com>
Date: Wed, 29 May 2024 09:30:09 +0200
Subject: [PATCH 6/6] Convert to async fn, add AI Gateway response types, catch
 errors

---
 tools/similarity_search/src/index.ts | 48 ++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/tools/similarity_search/src/index.ts b/tools/similarity_search/src/index.ts
index f94372b2f..4f024c4c6 100644
--- a/tools/similarity_search/src/index.ts
+++ b/tools/similarity_search/src/index.ts
@@ -1,9 +1,19 @@
 import { Hono } from "hono"
+import { type AiTextEmbeddingsOutput } from "@cloudflare/workers-types/experimental"
 
 type TextEntry = {
   text: string | string[]
   namespace: string
 }
+type GatewayAiResponse = {
+  result: AiTextEmbeddingsOutput
+  success: boolean
+  errors: Array<{
+    message: string
+    code: number
+  }>
+  messages: Array<unknown>
+}
 
 const app = new Hono<{ Bindings: Env }>()
 
@@ -71,8 +81,9 @@ app.post("/", async (c) => {
   }
 
   // resolve each text individually to enable cache per request
-  const requests = texts.map((text) => {
-    return fetch(
+  let index = 0
+  const requests = texts.map(async (text) => {
+    const response = await fetch(
       `${apiGatewayUniversalApi}workers-ai/@cf/baai/bge-base-en-v1.5`,
       {
         method: "POST",
@@ -85,18 +96,29 @@ app.post("/", async (c) => {
         }),
       },
     )
-      .then((response) => response.json())
-      .then((response) => {
-        console.log("Gateway response", response)
-        // TODO - type
-        const vector = response?.result?.data?.[0]
-        return c.env.VECTORIZE_INDEX.query(vector, {
-          namespace,
-          topK: 1,
-        })
-      })
+    const json = await response.json() as GatewayAiResponse
+    if (!json?.success || !json?.result?.data) {
+      console.error(`Workers AI error at index ${index}`, json)
+      throw new Error(`Workers AI error for text index ${index}`)
+    }
+
+    index++
+
+    const vector = json.result?.data?.[0]
+    return await c.env.VECTORIZE_INDEX.query(vector, {
+      namespace,
+      topK: 1,
+    })
   })
-  const responses = await Promise.all(requests)
+
+  let responses
+  try {
+    responses = await Promise.all(requests)
+  } catch (error) {
+    console.error(`Batch error - ${error}`)
+    return c.text(`An error occurred - ${error}`, 500)
+  }
+
   const similarityScores = []
   for (const searchResponse of responses) {
     const similarityScore = searchResponse.matches[0]?.score || 0