release: v0.1.0

Atome-FE · May 9, 2023 · 755e6ea · 755e6ea
1 parent e82222d
commit 755e6ea
Show file tree

Hide file tree

Showing 27 changed files with 191 additions and 136 deletions.
diff --git a/example/js/langchain/langchain.js b/example/js/langchain/langchain.js
@@ -18,8 +18,8 @@ const config = {
     embedding: true,
     useMmap: true,
 };
-llama.load(config);
 const run = async () => {
+    await llama.load(config);
     // Load the docs into the vector store
     const vectorStore = await MemoryVectorStore.fromTexts(["Hello world", "Bye bye", "hello nice world"], [{ id: 2 }, { id: 1 }, { id: 3 }], new LLamaEmbeddings({ maxConcurrency: 1 }, llama));
     // Search for the most similar document

diff --git a/example/js/llama-cpp/embedding.js b/example/js/llama-cpp/embedding.js
@@ -16,7 +16,6 @@ const config = {
     embedding: true,
     useMmap: true,
 };
-llama.load(config);
 const prompt = `Who is the president of the United States?`;
 const params = {
     nThreads: 4,
@@ -27,4 +26,8 @@ const params = {
     repeatPenalty: 1,
     prompt,
 };
-llama.getEmbedding(params).then(console.log);
+const run = async () => {
+    await llama.load(config);
+    await llama.getEmbedding(params).then(console.log);
+};
+run();
diff --git a/example/js/llama-cpp/inference.js b/example/js/llama-cpp/inference.js
@@ -16,19 +16,23 @@ const config = {
     embedding: false,
     useMmap: true,
 };
-llama.load(config);
 const template = `How are you?`;
 const prompt = `A chat between a user and an assistant.
 USER: ${template}
 ASSISTANT:`;
-llama.createCompletion({
+const params = {
     nThreads: 4,
     nTokPredict: 2048,
     topK: 40,
     topP: 0.1,
     temp: 0.2,
     repeatPenalty: 1,
     prompt,
-}, (response) => {
-    process.stdout.write(response.token);
-});
+};
+const run = async () => {
+    await llama.load(config);
+    await llama.createCompletion(params, (response) => {
+        process.stdout.write(response.token);
+    });
+};
+run();
diff --git a/example/js/llama-cpp/tokenize.js b/example/js/llama-cpp/tokenize.js
@@ -16,6 +16,9 @@ const config = {
     embedding: false,
     useMmap: true,
 };
-llama.load(config);
 const content = "how are you?";
-llama.tokenize({ content, nCtx: 2048 }).then(console.log);
+const run = async () => {
+    await llama.load(config);
+    await llama.tokenize({ content, nCtx: 2048 }).then(console.log);
+};
+run();
diff --git a/example/js/llama-rs/embedding.js b/example/js/llama-rs/embedding.js
@@ -4,7 +4,6 @@ import path from "path";
 import fs from "fs";
 const model = path.resolve(process.cwd(), "../ggml-alpaca-7b-q4.bin");
 const llama = new LLM(LLamaRS);
-llama.load({ path: model });
 const getWordEmbeddings = async (prompt, file) => {
     const data = await llama.getEmbedding({
         prompt,
@@ -20,6 +19,7 @@ const getWordEmbeddings = async (prompt, file) => {
     await fs.promises.writeFile(path.resolve(process.cwd(), file), JSON.stringify(data));
 };
 const run = async () => {
+    await llama.load({ path: model });
     const dog1 = `My favourite animal is the dog`;
     await getWordEmbeddings(dog1, "./example/semantic-compare/dog1.json");
     const dog2 = `I have just adopted a cute dog`;

diff --git a/example/js/llama-rs/inference.js b/example/js/llama-rs/inference.js
@@ -3,7 +3,6 @@ import { LLamaRS } from "llama-node/dist/llm/llama-rs.js";
 import path from "path";
 const model = path.resolve(process.cwd(), "../ggml-alpaca-7b-q4.bin");
 const llama = new LLM(LLamaRS);
-llama.load({ path: model });
 const template = `how are you`;
 const prompt = `Below is an instruction that describes a task. Write a response that appropriately completes the request.
 
@@ -12,7 +11,7 @@ const prompt = `Below is an instruction that describes a task. Write a response
 ${template}
 
 ### Response:`;
-llama.createCompletion({
+const params = {
     prompt,
     numPredict: 128,
     temp: 0.2,
@@ -22,6 +21,11 @@ llama.createCompletion({
     repeatLastN: 64,
     seed: 0,
     feedPrompt: true,
-}, (response) => {
-    process.stdout.write(response.token);
-});
+};
+const run = async () => {
+    await llama.load({ path: model });
+    await llama.createCompletion(params, (response) => {
+        process.stdout.write(response.token);
+    });
+};
+run();
diff --git a/example/js/llama-rs/tokenize.js b/example/js/llama-rs/tokenize.js
@@ -3,6 +3,9 @@ import { LLamaRS } from "llama-node/dist/llm/llama-rs.js";
 import path from "path";
 const model = path.resolve(process.cwd(), "../ggml-alpaca-7b-q4.bin");
 const llama = new LLM(LLamaRS);
-llama.load({ path: model });
 const content = "how are you?";
-llama.tokenize(content).then(console.log);
+const run = async () => {
+    await llama.load({ path: model });
+    await llama.tokenize(content).then(console.log);
+};
+run();
diff --git a/example/js/rwkv-cpp/inference.js b/example/js/rwkv-cpp/inference.js
@@ -10,18 +10,22 @@ const config = {
     nThreads: 4,
     enableLogging: true,
 };
-rwkv.load(config);
 const template = `Who is the president of the United States?`;
 const prompt = `Below is an instruction that describes a task. Write a response that appropriately completes the request.
 
 ### Instruction: ${template}
 
 ### Response:`;
-rwkv.createCompletion({
+const params = {
     maxPredictLength: 2048,
     topP: 0.1,
     temp: 0.1,
     prompt,
-}, (response) => {
-    process.stdout.write(response.token);
-});
+};
+const run = async () => {
+    await rwkv.load(config);
+    await rwkv.createCompletion(params, (response) => {
+        process.stdout.write(response.token);
+    });
+};
+run();
diff --git a/example/js/rwkv-cpp/tokenize.js b/example/js/rwkv-cpp/tokenize.js
@@ -10,5 +10,8 @@ const config = {
     nThreads: 4,
     enableLogging: true,
 };
-rwkv.load(config);
-rwkv.tokenize({ content: "hello world" }).then(console.log);
+const run = async () => {
+    await rwkv.load(config);
+    await rwkv.tokenize({ content: "hello world" }).then(console.log);
+};
+run();
diff --git a/example/package.json b/example/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@llama-node/examples",
-  "version": "0.0.37",
+  "version": "0.1.0",
   "description": "",
   "main": "index.js",
   "type": "module",
@@ -18,9 +18,9 @@
     "langchain": "^0.0.56"
   },
   "dependencies": {
-    "@llama-node/core": "0.0.37",
-    "@llama-node/llama-cpp": "0.0.37",
-    "@llama-node/rwkv-cpp": "0.0.37",
-    "llama-node": "0.0.37"
+    "@llama-node/core": "0.1.0",
+    "@llama-node/llama-cpp": "0.1.0",
+    "@llama-node/rwkv-cpp": "0.1.0",
+    "llama-node": "0.1.0"
   }
 }
diff --git a/example/ts/langchain/langchain.ts b/example/ts/langchain/langchain.ts
@@ -1,7 +1,7 @@
 import { MemoryVectorStore } from "langchain/vectorstores/memory";
 import { LLamaEmbeddings } from "llama-node/dist/extensions/langchain.js";
 import { LLM } from "llama-node";
-import { LLamaCpp, LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
+import { LLamaCpp, type LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
 import path from "path";
 
 const model = path.resolve(process.cwd(), "../ggml-vicuna-7b-1.1-q4_1.bin");
@@ -22,9 +22,8 @@ const config: LoadConfig = {
     useMmap: true,
 };
 
-llama.load(config);
-
 const run = async () => {
+    await llama.load(config);
     // Load the docs into the vector store
     const vectorStore = await MemoryVectorStore.fromTexts(
         ["Hello world", "Bye bye", "hello nice world"],

diff --git a/example/ts/llama-cpp/embedding.ts b/example/ts/llama-cpp/embedding.ts
@@ -1,5 +1,5 @@
 import { LLM } from "llama-node";
-import { LLamaCpp, LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
+import { LLamaCpp, type LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
 import path from "path";
 
 const model = path.resolve(process.cwd(), "../ggml-vicuna-7b-1.1-q4_1.bin");
@@ -20,8 +20,6 @@ const config: LoadConfig = {
     useMmap: true,
 };
 
-llama.load(config);
-
 const prompt = `Who is the president of the United States?`;
 
 const params = {
@@ -34,4 +32,10 @@ const params = {
     prompt,
 };
 
-llama.getEmbedding(params).then(console.log);
+const run = async () => {
+    await llama.load(config);
+
+    await llama.getEmbedding(params).then(console.log);
+};
+
+run();
diff --git a/example/ts/llama-cpp/inference.ts b/example/ts/llama-cpp/inference.ts
@@ -1,5 +1,6 @@
+import type { LlamaInvocation } from "@llama-node/llama-cpp";
 import { LLM } from "llama-node";
-import { LLamaCpp, LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
+import { LLamaCpp, type LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
 import path from "path";
 
 const model = path.resolve(process.cwd(), "../ggml-vicuna-7b-1.1-q4_1.bin");
@@ -20,25 +21,28 @@ const config: LoadConfig = {
     useMmap: true,
 };
 
-llama.load(config);
-
 const template = `How are you?`;
 
 const prompt = `A chat between a user and an assistant.
 USER: ${template}
 ASSISTANT:`;
 
-llama.createCompletion(
-    {
-        nThreads: 4,
-        nTokPredict: 2048,
-        topK: 40,
-        topP: 0.1,
-        temp: 0.2,
-        repeatPenalty: 1,
-        prompt,
-    },
-    (response) => {
+const params: LlamaInvocation = {
+    nThreads: 4,
+    nTokPredict: 2048,
+    topK: 40,
+    topP: 0.1,
+    temp: 0.2,
+    repeatPenalty: 1,
+    prompt,
+};
+
+const run = async () => {
+    await llama.load(config);
+
+    await llama.createCompletion(params, (response) => {
         process.stdout.write(response.token);
-    }
-);
+    });
+};
+
+run();
diff --git a/example/ts/llama-cpp/tokenize.ts b/example/ts/llama-cpp/tokenize.ts
@@ -1,5 +1,5 @@
 import { LLM } from "llama-node";
-import { LLamaCpp, LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
+import { LLamaCpp, type LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
 import path from "path";
 
 const model = path.resolve(process.cwd(), "../ggml-vicuna-7b-1.1-q4_1.bin");
@@ -20,8 +20,12 @@ const config: LoadConfig = {
     useMmap: true,
 };
 
-llama.load(config);
-
 const content = "how are you?";
 
-llama.tokenize({ content, nCtx: 2048 }).then(console.log);
+const run = async () => {
+    await llama.load(config);
+
+    await llama.tokenize({ content, nCtx: 2048 }).then(console.log);
+};
+
+run();
diff --git a/example/ts/llama-rs/embedding.ts b/example/ts/llama-rs/embedding.ts
@@ -7,8 +7,6 @@ const model = path.resolve(process.cwd(), "../ggml-alpaca-7b-q4.bin");
 
 const llama = new LLM(LLamaRS);
 
-llama.load({ path: model });
-
 const getWordEmbeddings = async (prompt: string, file: string) => {
     const data = await llama.getEmbedding({
         prompt,
@@ -30,6 +28,8 @@ const getWordEmbeddings = async (prompt: string, file: string) => {
 };
 
 const run = async () => {
+    await llama.load({ path: model });
+
     const dog1 = `My favourite animal is the dog`;
     await getWordEmbeddings(dog1, "./example/semantic-compare/dog1.json");
 

diff --git a/example/ts/llama-rs/inference.ts b/example/ts/llama-rs/inference.ts
@@ -1,3 +1,4 @@
+import type { LLamaInferenceArguments } from "@llama-node/core";
 import { LLM } from "llama-node";
 import { LLamaRS } from "llama-node/dist/llm/llama-rs.js";
 import path from "path";
@@ -6,8 +7,6 @@ const model = path.resolve(process.cwd(), "../ggml-alpaca-7b-q4.bin");
 
 const llama = new LLM(LLamaRS);
 
-llama.load({ path: model });
-
 const template = `how are you`;
 
 const prompt = `Below is an instruction that describes a task. Write a response that appropriately completes the request.
@@ -18,19 +17,24 @@ ${template}
 
 ### Response:`;
 
-llama.createCompletion(
-    {
-        prompt,
-        numPredict: 128,
-        temp: 0.2,
-        topP: 1,
-        topK: 40,
-        repeatPenalty: 1,
-        repeatLastN: 64,
-        seed: 0,
-        feedPrompt: true,
-    },
-    (response) => {
+const params: LLamaInferenceArguments = {
+    prompt,
+    numPredict: 128,
+    temp: 0.2,
+    topP: 1,
+    topK: 40,
+    repeatPenalty: 1,
+    repeatLastN: 64,
+    seed: 0,
+    feedPrompt: true,
+};
+
+const run = async () => {
+    await llama.load({ path: model });
+
+    await llama.createCompletion(params, (response) => {
         process.stdout.write(response.token);
-    }
-);
+    });
+};
+
+run();