diff --git a/README.md b/README.md
index 8dcc4da..07e4fd1 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ Make sure you have [Node.js](https://nodejs.org/en/) (**download current**) inst
 ```bash
 npm install -g catai
 
-catai install llama3-8b-openhermes-dpo-q3_k_s
+catai install meta-llama-3-8b-q4_k_m
 catai up
 ```
 
@@ -118,14 +118,47 @@ const data = await response.text();
 
 For more information, please read the [API guide](https://github.com/withcatai/catai/blob/main/docs/api.md)
 
-## Development API + Node-llama-cpp@beta integration
+## Development API
+
+You can also use the development API to interact with the model.
+
+```ts
+import {createChat, downloadModel, initCatAILlama, LlamaJsonSchemaGrammar} from "catai";
+
+// skip downloading the model if you already have it
+await downloadModel("meta-llama-3-8b-q4_k_m");
+
+const llama = await initCatAILlama();
+const chat = await createChat({
+    model: "meta-llama-3-8b-q4_k_m"
+});
+
+const fullResponse = await chat.prompt("Give me array of random numbers (10 numbers)", {
+    grammar: new LlamaJsonSchemaGrammar(llama, {
+        type: "array",
+        items: {
+            type: "number",
+            minimum: 0,
+            maximum: 100
+        },
+    }),
+    topP: 0.8,
+    temperature: 0.8,
+});
+
+console.log(fullResponse); // [10, 2, 3, 4, 6, 9, 8, 1, 7, 5]
+```
+
+(For the full list of model, run `catai models`)
+
+### Node-llama-cpp@beta low level integration
 
 You can use the model with [node-llama-cpp@beta](https://github.com/withcatai/node-llama-cpp/pull/105)
 
 CatAI enables you to easily manage the models and chat with them.
 
 ```ts
-import {downloadModel, getModelPath} from 'catai';
+import {downloadModel, getModelPath, initCatAILlama, LlamaChatSession} from 'catai';
 
 // download the model, skip if you already have the model
 await downloadModel(
@@ -136,7 +169,7 @@ await downloadModel(
 // get the model path with catai
 const modelPath = getModelPath("llama3");
 
-const llama = await getLlama();
+const llama = await initCatAILlama();
 const model = await llama.loadModel({
     modelPath
 });
diff --git a/models.json b/models.json
index 4313e84..3f87c37 100644
--- a/models.json
+++ b/models.json
@@ -528,70 +528,114 @@
     },
     "version": 1
   },
-  "alphallama3-8b-q3_k_s": {
+  "meta-llama-3-8b-q4_k_m": {
     "download": {
       "files": {
-        "model": "Alphallama3-8B.Q3_K_S.gguf"
+        "model": "Meta-Llama-3-8B.Q4_K_M.gguf"
       },
-      "repo": "https://huggingface.co/mradermacher/Alphallama3-8B-GGUF",
-      "commit": "738ab183a3e2ce92b96c9273e5d78960387ad939",
+      "repo": "https://huggingface.co/QuantFactory/Meta-Llama-3-8B-GGUF-v2",
+      "commit": "7b15b4f184a48c035fbc5ac2876e5617b64ea885",
       "branch": "main"
     },
     "hardwareCompatibility": {
-      "ramGB": 5.3,
+      "ramGB": 5.6,
       "cpuCors": 3,
-      "compressions": "q3_k_s"
+      "compressions": "q4_k_m"
     },
     "compatibleCatAIVersionRange": [
-      "3.1.2"
+      "3.2.0"
     ],
     "settings": {
       "bind": "node-llama-cpp-v2"
     },
     "version": 1
   },
-  "llama3-8b-dpo-uncensored-q4_k_s": {
+  "llama-3-8b-lexi-uncensored-q4_k_m": {
     "download": {
       "files": {
-        "model": "Llama3-8B-DPO-uncensored.Q4_K_S.gguf"
+        "model": "Llama-3-8B-Lexi-Uncensored.Q4_K_M.gguf"
       },
-      "repo": "https://huggingface.co/mradermacher/Llama3-8B-DPO-uncensored-GGUF",
-      "commit": "af5654c362a9967e2f704658f8aad7429cfcffb7",
+      "repo": "https://huggingface.co/QuantFactory/Llama-3-8B-Lexi-Uncensored-GGUF",
+      "commit": "5caac86e58458f70d7ff02ad2b7d99a850d61d4b",
       "branch": "main"
     },
     "hardwareCompatibility": {
-      "ramGB": 5.3,
+      "ramGB": 5.6,
       "cpuCors": 3,
-      "compressions": "q4_k_s"
+      "compressions": "q4_k_m"
     },
     "compatibleCatAIVersionRange": [
-      "3.1.2"
+      "3.2.0"
     ],
     "settings": {
       "bind": "node-llama-cpp-v2"
     },
     "version": 1
   },
-  "llama3-8b-openhermes-dpo-q3_k_s": {
+  "power-wizardlm-2-13b-q5_k_m": {
     "download": {
       "files": {
-        "model": "Llama3-8B-OpenHermes-DPO.Q3_K_S.gguf"
+        "model": "Power-WizardLM-2-13b.Q5_K_M.gguf"
       },
-      "repo": "https://huggingface.co/mradermacher/Llama3-8B-OpenHermes-DPO-GGUF",
-      "commit": "c0edd26cf8259267807d02ad8903faac593b099d",
+      "repo": "https://huggingface.co/mradermacher/Power-WizardLM-2-13b-GGUF",
+      "commit": "15ecbe0d095df08b49017db3b223433cd89153fc",
       "branch": "main"
     },
     "hardwareCompatibility": {
-      "ramGB": 5.3,
+      "ramGB": 9.8,
+      "cpuCors": 5,
+      "compressions": "q5_k_m"
+    },
+    "compatibleCatAIVersionRange": [
+      "3.2.0"
+    ],
+    "settings": {
+      "bind": "node-llama-cpp-v2"
+    },
+    "version": 1
+  },
+  "power-llama-3-13b-q4_k_m": {
+    "download": {
+      "files": {
+        "model": "Power-Llama-3-13b.Q4_K_M.gguf"
+      },
+      "repo": "https://huggingface.co/mradermacher/Power-Llama-3-13b-GGUF",
+      "commit": "0a61b3cce433745691cb73c5609c249b9b9848e9",
+      "branch": "main"
+    },
+    "hardwareCompatibility": {
+      "ramGB": 8.4,
+      "cpuCors": 4,
+      "compressions": "q4_k_m"
+    },
+    "compatibleCatAIVersionRange": [
+      "3.2.0"
+    ],
+    "settings": {
+      "bind": "node-llama-cpp-v2"
+    },
+    "version": 1
+  },
+  "arrowpro-7b-robinhood-q4_k_m": {
+    "download": {
+      "files": {
+        "model": "ArrowPro-7B-RobinHood.Q4_K_M.gguf\\"
+      },
+      "repo": "https://huggingface.co/mradermacher/ArrowPro-7B-RobinHood-GGUF",
+      "commit": "54be3527006ac83c14d74d25b2573f81285077bc",
+      "branch": "main"
+    },
+    "hardwareCompatibility": {
+      "ramGB": 4.6,
       "cpuCors": 3,
-      "compressions": "q3_k_s"
+      "compressions": "q4_k_m"
     },
     "compatibleCatAIVersionRange": [
-      "3.1.2"
+      "3.2.0"
     ],
     "settings": {
       "bind": "node-llama-cpp-v2"
     },
     "version": 1
   }
-}
+}
\ No newline at end of file
diff --git a/server/scripts/new-model.js b/server/scripts/new-model.js
index 37ab1a7..52664ff 100644
--- a/server/scripts/new-model.js
+++ b/server/scripts/new-model.js
@@ -69,6 +69,7 @@ const fileCompressionParametersToSize = {
     },
     'q4_k_m': {
         7: 4.1,
+        8: 5.1,
         13: 7.9,
         30: 19.6,
         34: 20.2,
diff --git a/server/src/index.ts b/server/src/index.ts
index bbec0e1..c00a3ba 100644
--- a/server/src/index.ts
+++ b/server/src/index.ts
@@ -4,6 +4,9 @@ import createChat, {getModelPath} from './manage-models/bind-class/bind-class.js
 import CatAIDB from './storage/app-db.js';
 import ENV_CONFIG from './storage/config.js';
 import {CatAIError} from './errors/CatAIError.js';
+import {initCatAILlama} from './manage-models/bind-class/binds/node-llama-cpp/node-llama-cpp-v2/node-llama-cpp-v2.js';
+
+export * from 'node-llama-cpp';
 
 const downloadModel = FetchModels.simpleDownload;
 
@@ -15,5 +18,6 @@ export {
     CatAIDB,
     getModelPath,
     downloadModel,
+    initCatAILlama,
     ENV_CONFIG as CATAI_ENV_CONFIG,
 };
diff --git a/server/src/manage-models/bind-class/bind-class.ts b/server/src/manage-models/bind-class/bind-class.ts
index 99d68e4..24238a2 100644
--- a/server/src/manage-models/bind-class/bind-class.ts
+++ b/server/src/manage-models/bind-class/bind-class.ts
@@ -6,6 +6,8 @@ import {ModelNotInstalledError} from './errors/ModelNotInstalledError.js';
 import {NoActiveModelError} from './errors/NoActiveModelError.js';
 import {NoModelBindError} from './errors/NoModelBindError.js';
 import {BindNotFoundError} from './errors/BindNotFoundError.js';
+import {ChatContext} from './chat-context.js';
+import type {LLamaChatPromptOptions} from 'node-llama-cpp';
 
 export const ALL_BINDS = [NodeLlamaCppV2];
 const cachedBinds: { [key: string]: InstanceType<typeof BaseBindClass> } = {};
@@ -37,7 +39,7 @@ export function getCacheBindClass(modelDetails: ModelSettings<any> = findLocalMo
 }
 
 const lockContext = {};
-export default async function createChat(options?: CreateChatOptions) {
+export default async function createChat(options?: CreateChatOptions): Promise<ChatContext<LLamaChatPromptOptions>> {
     return await withLock(lockContext, "createChat", async () => {
         const modelDetails = findLocalModel(options?.model);
         const cachedBindClass = getCacheBindClass(modelDetails);
diff --git a/server/src/manage-models/bind-class/binds/base-bind-class.ts b/server/src/manage-models/bind-class/binds/base-bind-class.ts
index 194af62..1eb7d1b 100644
--- a/server/src/manage-models/bind-class/binds/base-bind-class.ts
+++ b/server/src/manage-models/bind-class/binds/base-bind-class.ts
@@ -1,16 +1,16 @@
 import {ModelSettings} from '../../../storage/app-db.js';
 import {ChatContext} from '../chat-context.js';
-import {NodeLlamaCppOptions} from "./node-llama-cpp/node-llama-cpp-v2/node-llama-cpp-v2.js";
+import {NodeLlamaCppOptions} from './node-llama-cpp/node-llama-cpp-v2/node-llama-cpp-v2.js';
 
 export type CreateChatOptions = NodeLlamaCppOptions & {
     model: string
 }
 
-export default abstract class BaseBindClass<T> {
+export default abstract class BaseBindClass<Settings> {
     public static shortName?: string;
     public static description?: string;
 
-    public constructor(public modelSettings: ModelSettings<T>) {
+    public constructor(public modelSettings: ModelSettings<Settings>) {
     }
 
     public abstract initialize(): Promise<void> | void;
diff --git a/server/src/manage-models/bind-class/binds/node-llama-cpp/node-llama-cpp-v2/node-llama-cpp-chat.ts b/server/src/manage-models/bind-class/binds/node-llama-cpp/node-llama-cpp-v2/node-llama-cpp-chat.ts
index fa3c48b..7dbb744 100644
--- a/server/src/manage-models/bind-class/binds/node-llama-cpp/node-llama-cpp-v2/node-llama-cpp-chat.ts
+++ b/server/src/manage-models/bind-class/binds/node-llama-cpp/node-llama-cpp-v2/node-llama-cpp-chat.ts
@@ -1,14 +1,18 @@
 import type {LLamaChatPromptOptions, LlamaChatSession, Token} from 'node-llama-cpp';
 import {ChatContext} from '../../../chat-context.js';
-import objectAssignDeep from "object-assign-deep";
 
-export default class NodeLlamaCppChat extends ChatContext {
+export default class NodeLlamaCppChat extends ChatContext<LLamaChatPromptOptions> {
 
     constructor(protected _promptSettings: Partial<LLamaChatPromptOptions>, private _session: LlamaChatSession) {
         super();
     }
 
-    public async prompt(prompt: string, onTokenText?: (token: string) => void, overrideSettings?: Partial<LLamaChatPromptOptions>): Promise<string | null> {
+    public async prompt(prompt: string, onTokenText?: ((token: string) => void) | Partial<LLamaChatPromptOptions>, overrideSettings?: Partial<LLamaChatPromptOptions>): Promise<string | null> {
+        if (typeof onTokenText !== 'function') {
+            overrideSettings = onTokenText as Partial<LLamaChatPromptOptions>;
+            onTokenText = undefined;
+        }
+
         this.emit('abort', 'Aborted by new prompt');
         const abort = new AbortController();
         const closeCallback = () => {
@@ -19,7 +23,7 @@ export default class NodeLlamaCppChat extends ChatContext {
 
         let response = null;
         try {
-            const allSettings: LLamaChatPromptOptions = objectAssignDeep({}, this._promptSettings, overrideSettings);
+            const allSettings: LLamaChatPromptOptions = Object.assign({}, this._promptSettings, overrideSettings);
             response = await this._session.prompt(prompt, {
                 ...allSettings,
                 signal: abort.signal,
diff --git a/server/src/manage-models/bind-class/binds/node-llama-cpp/node-llama-cpp-v2/node-llama-cpp-v2.ts b/server/src/manage-models/bind-class/binds/node-llama-cpp/node-llama-cpp-v2/node-llama-cpp-v2.ts
index 43747f5..403ed06 100644
--- a/server/src/manage-models/bind-class/binds/node-llama-cpp/node-llama-cpp-v2/node-llama-cpp-v2.ts
+++ b/server/src/manage-models/bind-class/binds/node-llama-cpp/node-llama-cpp-v2/node-llama-cpp-v2.ts
@@ -1,13 +1,6 @@
-import type {
-    LLamaChatPromptOptions,
-    LlamaChatSessionOptions,
-    LlamaContextOptions,
-    LlamaModel,
-    LlamaModelOptions
-} from 'node-llama-cpp';
+import {getLlama, Llama, LLamaChatPromptOptions, LlamaChatSession, LlamaChatSessionOptions, LlamaContextOptions, LlamaModel, LlamaModelOptions, LlamaOptions} from 'node-llama-cpp';
 import NodeLlamaCppChat from './node-llama-cpp-chat.js';
 import BaseBindClass from '../../base-bind-class.js';
-import objectAssignDeep from "object-assign-deep";
 
 export type NodeLlamaCppOptions =
     Omit<LlamaContextOptions, 'model'> &
@@ -15,22 +8,28 @@ export type NodeLlamaCppOptions =
     Omit<LlamaChatSessionOptions, 'contextSequence'> &
     LLamaChatPromptOptions;
 
+
+let cachedLlama: Llama | null = null;
+
+export async function initCatAILlama(options?: LlamaOptions) {
+    return cachedLlama = await getLlama(options);
+}
+
 export default class NodeLlamaCppV2 extends BaseBindClass<NodeLlamaCppOptions> {
     public static override shortName = 'node-llama-cpp-v2';
     public static override description = 'node-llama-cpp v2, that support GGUF model, and advanced feature such as output format, max tokens and much more';
     private _model?: LlamaModel;
-    private _package?: typeof import('node-llama-cpp');
 
     async createChat(overrideSettings?: NodeLlamaCppOptions) {
-        if (!this._model || !this._package)
+        if (!this._model)
             throw new Error('Model not initialized');
 
-        const settings= objectAssignDeep({}, this.modelSettings.settings, overrideSettings);
+        const settings = Object.assign({}, this.modelSettings.settings, overrideSettings);
         const context = await this._model.createContext({
             ...settings
         });
 
-        const session = new this._package.LlamaChatSession({
+        const session = new LlamaChatSession({
             contextSequence: context.getSequence(),
             ...settings
         });
@@ -39,10 +38,7 @@ export default class NodeLlamaCppV2 extends BaseBindClass<NodeLlamaCppOptions> {
     }
 
     async initialize(): Promise<void> {
-        const {getLlama, ...others} = await import('node-llama-cpp');
-        this._package = others as any;
-
-        const llama = await getLlama();
+        const llama = cachedLlama ?? await initCatAILlama();
         this._model = await llama.loadModel({
             modelPath: this.modelSettings.downloadedFiles.model,
             ...this.modelSettings.settings
diff --git a/server/src/manage-models/bind-class/chat-context.ts b/server/src/manage-models/bind-class/chat-context.ts
index 35cb648..c5b2052 100644
--- a/server/src/manage-models/bind-class/chat-context.ts
+++ b/server/src/manage-models/bind-class/chat-context.ts
@@ -10,14 +10,13 @@ export interface ChatContextEvents {
     emit(event: 'modelResponseEnd'): boolean;
 }
 
-export abstract class ChatContext extends EventEmitter implements ChatContextEvents {
+export abstract class ChatContext<Settings = any> extends EventEmitter implements ChatContextEvents {
 
     /**
      * Prompt the model and stream the response
-     * @param prompt
-     * @param onToken
      */
-    abstract prompt(prompt: string, onToken?: (token: string) => void): Promise<string | null>;
+    abstract prompt(prompt: string, overrideSettings?: Partial<Settings>): Promise<string | null>;
+    abstract prompt(prompt: string, onToken?: (token: string) => void, overrideSettings?: Partial<Settings>): Promise<string | null>;
 
     /**
      * Abort the model response