diff --git a/docs/architecture.mdx b/docs/architecture.mdx index 1f7f84c..f2cd598 100644 --- a/docs/architecture.mdx +++ b/docs/architecture.mdx @@ -5,7 +5,7 @@ slug: "architecture" --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: ## Introduction diff --git a/docs/basic-usage/command-line.md b/docs/basic-usage/command-line.md index 6a87f1c..05a10fe 100644 --- a/docs/basic-usage/command-line.md +++ b/docs/basic-usage/command-line.md @@ -5,7 +5,7 @@ slug: "command-line" --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: Cortex has a [Docker](https://docs.docker.com/engine/reference/commandline/cli/) and [Ollama](https://ollama.com/)-inspired [CLI syntax](/docs/cli) for running model operations. diff --git a/docs/basic-usage/integration/js-library.md b/docs/basic-usage/integration/js-library.md index cd15ce2..e2d83fc 100644 --- a/docs/basic-usage/integration/js-library.md +++ b/docs/basic-usage/integration/js-library.md @@ -5,10 +5,10 @@ slug: "ts-library" --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: -Cortex can be used in a Typescript application with the `cortex.js` library. Cortex provides a Typescript client library as a **fork of OpenAI's [Typescript library](https://github.com/openai/openai-node)** with additional methods for Local AI. +Cortex.cpp can be used in a Typescript application with the `cortex.js` library. Cortex.cpp provides a Typescript client library as a **fork of OpenAI's [Typescript library](https://github.com/openai/openai-node)** with additional methods for Local AI. ## Installation @@ -18,19 +18,19 @@ npm install @janhq/cortexso-node ## Usage -1. Replace the OpenAI import with Cortex in your application: +1. Replace the OpenAI import with Cortex.cpp in your application: ```diff - import OpenAI from 'openai'; + import Cortex from '@janhq/cortexso-node'; ``` -2. Modify the initialization of the client to use Cortex: +2. Modify the initialization of the client to use Cortex.cpp: ```diff - const openai = new OpenAI({ + const cortex = new Cortex({ - baseURL: "BASE_URL", // The default base URL for Cortex is 'http://localhost:1337' + baseURL: "BASE_URL", // The default base URL for Cortex is 'http://localhost:3928' apiKey: "OPENAI_API_KEY", // This can be omitted if using the default }); @@ -43,7 +43,7 @@ import Cortex from "@janhq/cortexso-node"; async function inference() { const cortex = new Cortex({ - baseURL: "http://localhost:1337/v1", + baseURL: "http://localhost:3928/v1", apiKey: "", }); diff --git a/docs/basic-usage/integration/py-library.md b/docs/basic-usage/integration/py-library.md index 765b53e..3e126d0 100644 --- a/docs/basic-usage/integration/py-library.md +++ b/docs/basic-usage/integration/py-library.md @@ -5,9 +5,9 @@ slug: "py-library" --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: -Cortex can be used in a Python application with the `cortex.py` library. Cortex provides a Python client library as a **fork of OpenAI's [Python library](https://github.com/openai/openai-python)** with additional methods for Local AI. +Cortex.cpp can be used in a Python application with the `cortex.py` library. Cortex.cpp provides a Python client library as a **fork of OpenAI's [Python library](https://github.com/openai/openai-python)** with additional methods for Local AI. ## Installation ```py @@ -16,14 +16,14 @@ pip install @janhq/cortex-python ## Usage -1. Replace the OpenAI import with Cortex in your application: +1. Replace the OpenAI import with Cortex.cpp in your application: ```diff - from openai import OpenAI + from @janhq/cortex-python import Cortex ``` -2. Modify the initialization of the client to use Cortex: +2. Modify the initialization of the client to use Cortex.cpp: ```diff - client = OpenAI(api_key='your-api-key') @@ -36,7 +36,7 @@ pip install @janhq/cortex-python ```py from @janhq/cortex-python import Cortex -client = OpenAI(base_url="http://localhost:1337", api_key="cortex") +client = OpenAI(base_url="http://localhost:3928", api_key="cortex") model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" client.models.start(model=model) diff --git a/docs/basic-usage/server.mdx b/docs/basic-usage/server.mdx index 1819342..3a74bab 100644 --- a/docs/basic-usage/server.mdx +++ b/docs/basic-usage/server.mdx @@ -8,10 +8,10 @@ import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: -Cortex has an [API server](https://cortex.so/api-reference) that runs at `localhost:1337`. +Cortex has an [API server](https://cortex.so/api-reference) that runs at `localhost:3928`. ## Usage @@ -28,10 +28,10 @@ cortex --dataFolder ```bash # Pull a model curl --request POST \ - --url http://localhost:1337/v1/models/mistral/pull + --url http://localhost:3928/v1/models/mistral/pull # Start the model curl --request POST \ - --url http://localhost:1337/v1/models/mistral/start \ + --url http://localhost:3928/v1/models/mistral/start \ --header 'Content-Type: application/json' \ --data '{ "prompt_template": "system\n{system_message}\nuser\n{prompt}\nassistant", @@ -47,19 +47,19 @@ curl --request POST \ "flash_attn": true, "cache_type": "f16", "use_mmap": true, - "engine": "cortex.llamacpp" + "engine": "llamacpp" }' ``` ### Show the Model State ```bash # Check the model status curl --request GET \ - --url http://localhost:1337/v1/system/events/model + --url http://localhost:3928/v1/system/events/model ``` ### Chat with Model ```bash # Invoke the chat completions endpoint -curl http://localhost:1337/v1/chat/completions \ +curl http://localhost:3928/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "", @@ -85,11 +85,11 @@ curl http://localhost:1337/v1/chat/completions \ ```bash # Stop a model curl --request POST \ - --url http://localhost:1337/v1/models/mistral/stop + --url http://localhost:3928/v1/models/mistral/stop ``` ### Pull Model ```bash # Pull a model curl --request POST \ - --url http://localhost:1337/v1/models/mistral/pull + --url http://localhost:3928/v1/models/mistral/pull ``` \ No newline at end of file diff --git a/docs/benchmarking-architecture.mdx b/docs/benchmarking-architecture.mdx index 3f04022..fe2d0fe 100644 --- a/docs/benchmarking-architecture.mdx +++ b/docs/benchmarking-architecture.mdx @@ -6,7 +6,7 @@ slug: "benchmarking-architecture" :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: ## Architecture diff --git a/docs/built-in-models.mdx b/docs/built-in-models.mdx index 89570cd..836c2d8 100644 --- a/docs/built-in-models.mdx +++ b/docs/built-in-models.mdx @@ -8,10 +8,10 @@ import TabItem from "@theme/TabItem"; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: -Cortex maintains a collection of built-in models that cover the most popular open-source models. +Cortex.cpp maintains a collection of built-in models that cover the most popular open-source models. ## Cortex Model Repos Built-in models are [Cortex Model Repositories](/docs/hub/cortex-hub) hosted on HuggingFace and pre-compiled for different engines, allowing one model to have multiple branches in various formats. diff --git a/docs/chat-completions.mdx b/docs/chat-completions.mdx index 25bdacf..952e1fc 100644 --- a/docs/chat-completions.mdx +++ b/docs/chat-completions.mdx @@ -8,7 +8,7 @@ import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: Cortex's Chat API is compatible with OpenAI’s [Chat Completions](https://platform.openai.com/docs/api-reference/chat) endpoint. It is a drop-in replacement for local inference. @@ -32,7 +32,7 @@ cortex chat --model mistral ```bash - curl http://localhost:1337/v1/chat/completions \ + curl http://localhost:3928/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "", @@ -58,7 +58,7 @@ cortex chat --model mistral ```bash - curl http://localhost:1337/v1/chat/completions \ + curl http://localhost:3928/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "messages": [ @@ -131,9 +131,9 @@ To configure each engine, refer to the [`cortex engines init`](/docs/cli/engines Learn more about our engine architecture: - cortex.cpp -- [cortex.llamacpp](/docs/cortex-llamacpp) -- cortex.tensorrt-llm -- [cortex.onnx](/docs/cortex-onnx) +- [llamacpp](/docs/cortex-llamacpp) +- tensorrt-llm +- [onnx](/docs/cortex-onnx) ### Multiple Remote APIs diff --git a/docs/cli/benchmark.mdx b/docs/cli/benchmark.mdx index 3f86347..98e7ac1 100644 --- a/docs/cli/benchmark.mdx +++ b/docs/cli/benchmark.mdx @@ -65,7 +65,7 @@ For example, it will return the following: }, model: { modelId: 'tinyllama', - engine: 'cortex.llamacpp', + engine: 'llamacpp', status: 'running', duration: '2h 38m 44s', ram: '-', diff --git a/docs/cli/chat.md b/docs/cli/chat.md index 2f77f03..7fa2270 100644 --- a/docs/cli/chat.md +++ b/docs/cli/chat.md @@ -5,7 +5,7 @@ slug: "chat" --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex chat` @@ -20,7 +20,7 @@ This command starts a chat session with a specified model, allowing you to inter ## Usage ```bash -cortex chat [options] +cortex chat [options] ``` :::info This command uses a `model_id` from the model that you have downloaded or available in your file system. @@ -30,8 +30,8 @@ This command uses a `model_id` from the model that you have downloaded or availa | Option | Description | Required | Default value | Example | | ----------------------------- | ----------------------------------------------------------------------------------------------- | -------- | ------------- | ----------------------------- | -| `model_id` | Model ID to chat with. | No | - | `mistral` | -| `-m`, `--message ` | Message to send to the model | No | - | `-m "Hello, model!"` | +| `model_id` | Model ID to chat with. | Yes | - | `mistral` | +| `-m`, `--message ` | Message to send to the model | Yes | - | `-m "Hello, model!"` | | `-h`, `--help` | Display help information for the command. | No | - | `-h` | diff --git a/docs/cli/configs/get.mdx b/docs/cli/configs/get.mdx index 31209ff..503e183 100644 --- a/docs/cli/configs/get.mdx +++ b/docs/cli/configs/get.mdx @@ -4,7 +4,7 @@ description: Cortex configs subcommands. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex configs get` diff --git a/docs/cli/configs/index.mdx b/docs/cli/configs/index.mdx index 197118d..0ec9265 100644 --- a/docs/cli/configs/index.mdx +++ b/docs/cli/configs/index.mdx @@ -3,7 +3,7 @@ title: Cortex Configs --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex configs` diff --git a/docs/cli/configs/list.mdx b/docs/cli/configs/list.mdx index e37c8a8..303bb01 100644 --- a/docs/cli/configs/list.mdx +++ b/docs/cli/configs/list.mdx @@ -4,7 +4,7 @@ description: Cortex configs subcommands. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex configs list` diff --git a/docs/cli/configs/set.mdx b/docs/cli/configs/set.mdx index 10f63fb..0e659bc 100644 --- a/docs/cli/configs/set.mdx +++ b/docs/cli/configs/set.mdx @@ -4,7 +4,7 @@ description: Cortex configs subcommands. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex configs set` diff --git a/docs/cli/cortex.md b/docs/cli/cortex.md index fd83190..7d897bb 100644 --- a/docs/cli/cortex.md +++ b/docs/cli/cortex.md @@ -5,15 +5,15 @@ slug: /cli --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # Cortex :::info -This is the initial command you need to run to start using Cortex. +This is the initial command you need to run to start using Cortex.cpp. ::: -This command starts the Cortex process and the API server, which runs on port `1337` by default. +This command starts the Cortex.cpp process and the API server, which runs on port `1337` by default. ## Usage @@ -25,11 +25,11 @@ cortex [command] [options] | Option | Description | Required | Default value | Example | | ---------------------------- | ----------------------------------------- | -------- | ------------- | ----------------------------- | -| `-a`, `--address
` | Address to use. | No | - | `-a 192.168.1.1` | -| `-p`, `--port ` | Port to serve the application. | No | - | `-p 1337` | | `-v`, `--version` | Show version. | No | - | `-v` | | `-h`, `--help` | Display help information for the command. | No | - | `-h` | | `--verbose` | Show the detailed command logs | No | - | `--verbose` | + @@ -46,11 +46,8 @@ For example: - [cortex models](/docs/cli/models): Manage and configure models. - [cortex chat](/docs/cli/chat): Send a chat request to a model. - [cortex ps](/docs/cli/ps): Display active models and their operational status. -- [cortex presets](/docs/cli/presets): Show all the available model presets within Cortex. - [cortex embeddings](/docs/cli/embeddings): Create an embedding vector representing the input text. -- [cortex benchmark](/docs/cli/benchmark): Benchmark and analyze the performance of a specific AI model using your system. -- [cortex engines](/docs/cli/engines): Manage Cortex engines. +- [cortex engines](/docs/cli/engines): Manage Cortex.cpp engines. - [cortex pull|download](/docs/cli/pull): Download a model. - [cortex run](/docs/cli/run): Shortcut to start a model and chat. -- [cortex telemetry](/docs/cli/telemetry): Retrieve telemetry logs for monitoring and analysis. -- [cortex stop](/docs/cli/stop): Stop the API server. +- [cortex update](/docs/cli/stop): Update the Cortex.cpp version. diff --git a/docs/cli/embeddings.mdx b/docs/cli/embeddings.mdx index 9d59183..6713326 100644 --- a/docs/cli/embeddings.mdx +++ b/docs/cli/embeddings.mdx @@ -5,7 +5,7 @@ slug: "embeddings" --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex embeddings` diff --git a/docs/cli/engines/get.mdx b/docs/cli/engines/get.mdx index cb12917..123c967 100644 --- a/docs/cli/engines/get.mdx +++ b/docs/cli/engines/get.mdx @@ -4,7 +4,7 @@ description: Cortex engines subcommands. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex engines get` @@ -23,7 +23,7 @@ For example, it returns the following: β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ (index) β”‚ Values β”‚ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ name β”‚ 'cortex.onnx' β”‚ +β”‚ name β”‚ 'onnx' β”‚ β”‚ description β”‚ 'This extension enables chat completion API calls using the Cortex engine' β”‚ β”‚ version β”‚ '0.0.1' β”‚ β”‚ productName β”‚ 'Cortex Inference Engine' β”‚ @@ -38,6 +38,6 @@ To get an engine name, run the [`engines list`](/docs/cli/engines/list) command | Option | Description | Required | Default value | Example | |-------------------|-------------------------------------------------------|----------|---------------|-----------------| -| `name` | The name of the engine that you want to retrieve. | Yes | - | `cortex.llamacpp`| +| `name` | The name of the engine that you want to retrieve. | Yes | - | `llamacpp`| | `-h`, `--help` | Display help information for the command. | No | - | `-h` | diff --git a/docs/cli/engines/index.mdx b/docs/cli/engines/index.mdx index eb7b317..53202f4 100644 --- a/docs/cli/engines/index.mdx +++ b/docs/cli/engines/index.mdx @@ -3,7 +3,7 @@ title: Cortex Engines --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex engines` @@ -22,8 +22,8 @@ cortex engines [options] [subcommand] | Option | Description | Required | Default value | Example | |-------------------|-------------------------------------------------------|----------|---------------|-----------------| -| `-vk`, `--vulkan` | Install Vulkan engine. | No | `false` | `-vk` | | `-h`, `--help` | Display help information for the command. | No | - | `-h` | +{/* | `-vk`, `--vulkan` | Install Vulkan engine. | No | `false` | `-vk` | */} ## `cortex engines get` :::info @@ -44,7 +44,7 @@ For example, it returns the following: β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ (index) β”‚ Values β”‚ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ name β”‚ 'cortex.onnx' β”‚ +β”‚ name β”‚ 'onnx' β”‚ β”‚ description β”‚ 'This extension enables chat completion API calls using the Cortex engine' β”‚ β”‚ version β”‚ '0.0.1' β”‚ β”‚ productName β”‚ 'Cortex Inference Engine' β”‚ @@ -59,7 +59,7 @@ To get an engine name, run the [`engines list`](/docs/cli/engines/list) command | Option | Description | Required | Default value | Example | |-------------------|-------------------------------------------------------|----------|---------------|-----------------| -| `engine_name` | The name of the engine that you want to retrieve. | Yes | - | `cortex.llamacpp`| +| `engine_name` | The name of the engine that you want to retrieve. | Yes | - | `llamacpp`| | `-h`, `--help` | Display help information for the command. | No | - | `-h` | ## `cortex engines list` @@ -81,13 +81,13 @@ For example, it returns the following: +---------+---------------------+-------------------------------------------------------------------------------+---------+------------------------------+-----------------+ | (Index) | name | description | version | product name | status | +---------+---------------------+-------------------------------------------------------------------------------+---------+------------------------------+-----------------+ -| 1 | cortex.onnx | This extension enables chat completion API calls using the Onnx engine | 0.0.1 +| 1 | onnx | This extension enables chat completion API calls using the Onnx engine | 0.0.1 | Onnx Inference Engine | not_initialized | +---------+---------------------+-------------------------------------------------------------------------------+---------+------------------------------+-----------------+ -| 2 | cortex.llamacpp | This extension enables chat completion API calls using the LlamaCPP engine | 0.0.1 +| 2 | llamacpp | This extension enables chat completion API calls using the LlamaCPP engine | 0.0.1 | LlamaCPP Inference Engine | ready | +---------+---------------------+-------------------------------------------------------------------------------+---------+------------------------------+-----------------+ -| 3 | cortex.tensorrt-llm | This extension enables chat completion API calls using the TensorrtLLM engine | 0.0.1 +| 3 | tensorrt-llm | This extension enables chat completion API calls using the TensorrtLLM engine | 0.0.1 | TensorrtLLM Inference Engine | not_initialized | +---------+---------------------+-------------------------------------------------------------------------------+---------+------------------------------+-----------------+ ``` @@ -111,18 +111,18 @@ This command downloads the required dependencies and installs the engine within **Usage**: ```bash -cortex engines install [options] +cortex engines install [options] ``` For Example: ```bash ## Llama.cpp engine -cortex engines install cortex.llamacpp +cortex engines install llamacpp ## ONNX engine -cortex engines install cortex.onnx +cortex engines install onnx ## Tensorrt-LLM engine -cortex engines install cortex.tensorrt-llm +cortex engines install tensorrt-llm ``` @@ -139,18 +139,18 @@ This command uninstalls the engine within Cortex. **Usage**: ```bash -cortex engines uninstall [options] +cortex engines uninstall [options] ``` For Example: ```bash ## Llama.cpp engine -cortex engines uninstall cortex.llamacpp +cortex engines uninstall llamacpp ## ONNX engine -cortex engines uninstall cortex.onnx +cortex engines uninstall onnx ## Tensorrt-LLM engine -cortex engines uninstall cortex.tensorrt-llm +cortex engines uninstall tensorrt-llm ``` diff --git a/docs/cli/engines/init.mdx b/docs/cli/engines/init.mdx index b51ccdf..17ff817 100644 --- a/docs/cli/engines/init.mdx +++ b/docs/cli/engines/init.mdx @@ -4,7 +4,7 @@ description: Cortex engines subcommands. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex engines init` @@ -19,13 +19,13 @@ cortex engines init [options] For Example: ```bash ## Llama.cpp engine -cortex engines init cortex.llamacpp +cortex engines init llamacpp ## ONNX engine -cortex engines init cortex.onnx +cortex engines init onnx ## Tensorrt-LLM engine -cortex engines init cortex.tensorrt-llm +cortex engines init tensorrt-llm ``` diff --git a/docs/cli/engines/list.mdx b/docs/cli/engines/list.mdx index b60be0f..af3c3c0 100644 --- a/docs/cli/engines/list.mdx +++ b/docs/cli/engines/list.mdx @@ -4,7 +4,7 @@ description: Cortex engines subcommands. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex engines list` @@ -20,17 +20,13 @@ cortex engines list [options] ``` For example, it returns the following: ```bash -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ (index) β”‚ name β”‚ description β”‚ version β”‚ productName β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ 0 β”‚ 'cortex.llamacpp' β”‚ 'This extension enables chat completion API calls using the Cortex engine' β”‚ '0.0.1' β”‚ 'Cortex Inference Engine' β”‚ -β”‚ 1 β”‚ 'cortex.onnx' β”‚ 'This extension enables chat completion API calls using the Cortex engine' β”‚ '0.0.1' β”‚ 'Cortex Inference Engine' β”‚ -β”‚ 2 β”‚ 'cortex.tensorrt-llm' β”‚ 'This extension enables chat completion API calls using the Cortex engine' β”‚ '0.0.1' β”‚ 'Cortex Inference Engine' β”‚ -β”‚ 3 β”‚ 'openai' β”‚ 'This extension enables OpenAI chat completion API calls' β”‚ '0.0.1' β”‚ 'OpenAI Inference Engine' β”‚ -β”‚ 4 β”‚ 'groq' β”‚ 'This extension enables fast Groq chat completion API calls' β”‚ '0.0.1' β”‚ 'Groq Inference Engine' β”‚ -β”‚ 5 β”‚ 'mistral' β”‚ 'This extension enables Mistral chat completion API calls' β”‚ '0.0.1' β”‚ 'Mistral Inference Engine' β”‚ -β”‚ 6 β”‚ 'anthropic' β”‚ 'This extension enables Anthropic chat completion API calls' β”‚ '0.0.1' β”‚ 'Anthropic Inference Engine' β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ++---+---------------+--------------------+---------+--------------+ +| # | Name | Supported Formats | Version | Status | ++---+---------------+--------------------+---------+--------------+ +| 1 | ONNXRuntime | ONNX | 0.0.1 | Incompatible | +| 2 | llama.cpp | GGUF | 0.0.1 | Ready | +| 3 | TensorRT-LLM | TensorRT Engines | 0.0.1 | Incompatible | ++---+---------------+--------------------+---------+--------------+ ``` ## Options diff --git a/docs/cli/models/download.md b/docs/cli/models/download.md index e401f00..e63516a 100644 --- a/docs/cli/models/download.md +++ b/docs/cli/models/download.md @@ -4,7 +4,7 @@ description: Cortex models subcommands. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex models pull` diff --git a/docs/cli/models/get.md b/docs/cli/models/get.md index cedbac5..750430a 100644 --- a/docs/cli/models/get.md +++ b/docs/cli/models/get.md @@ -4,7 +4,7 @@ description: Cortex models subcommands. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex models get` @@ -35,7 +35,7 @@ For example, it returns the following: stream: true, ngl: 33, ctx_len: 4096, - engine: 'cortex.llamacpp', + engine: 'llamacpp', prompt_template: '<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>', id: 'tinyllama', created: 1720659351720, diff --git a/docs/cli/models/index.md b/docs/cli/models/index.md index 38fff6b..f1f79e0 100644 --- a/docs/cli/models/index.md +++ b/docs/cli/models/index.md @@ -3,7 +3,7 @@ title: Cortex Models --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex models` @@ -57,7 +57,7 @@ max_tokens: 4096 stream: true ngl: 33 ctx_len: 4096 -engine: cortex.llamacpp +engine: llamacpp prompt_template: <|system|> @@ -125,9 +125,9 @@ For example, it returns the following: +---------+----------------+-----------------+---------+ | (Index) | ID | engine | version | +---------+----------------+-----------------+---------+ -| 1 | tinyllama-gguf | cortex.llamacpp | 1 | +| 1 | tinyllama-gguf | llamacpp | 1 | +---------+----------------+-----------------+---------+ -| 2 | tinyllama | cortex.llamacpp | 1 | +| 2 | tinyllama | llamacpp | 1 | +---------+----------------+-----------------+---------+ ``` @@ -155,10 +155,10 @@ This command starts a model defined by a `model_id`. cortex models start # Start a model with a preset -cortex models start [options] +cortex models start [options] # Start with a specified engine -cortex models start :[engine] [options] +cortex models start [options] :[engine] ``` @@ -170,7 +170,7 @@ This command uses a `model_id` from the model that you have downloaded or availa | Option | Description | Required | Default value | Example | |---------------------------|---------------------------------------------------------------------------|----------|----------------------------------------------|------------------------| -| `model_id` | The identifier of the model you want to start. | No | `Prompt to select from the available models` | `mistral` | +| `model_id` | The identifier of the model you want to start. | Yes | `Prompt to select from the available models` | `mistral` | | `-h`, `--help` | Display help information for the command. | No | - | `-h` | @@ -224,7 +224,7 @@ This command uses a `model_id` from the model that you have downloaded or availa | `-c`, `--options ` | Specify the options to update the model. Syntax: `-c option1=value1 option2=value2`. | Yes | - | `-c max_tokens=100 temperature=0.5` | | `-h`, `--help` | Display help information for the command. | No | - | `-h` | -## `cortex models remove` +## `cortex models delete` :::info This CLI command calls the following API endpoint: - [Delete Model](/api-reference#tag/models/delete/v1/models/{id}) @@ -236,7 +236,7 @@ This command deletes a local model defined by a `model_id`. **Usage**: ```bash -cortex models remove +cortex models delete ``` :::info This command uses a `model_id` from the model that you have downloaded or available in your file system. @@ -244,5 +244,5 @@ This command uses a `model_id` from the model that you have downloaded or availa **Options**: | Option | Description | Required | Default value | Example | |---------------------------|-----------------------------------------------------------------------------|----------|----------------------|------------------------| -| `model_id` | The identifier of the model you want to remove. | Yes | - | `mistral` | +| `model_id` | The identifier of the model you want to delete. | Yes | - | `mistral` | | `-h`, `--help` | Display help for command. | No | - | `-h` | \ No newline at end of file diff --git a/docs/cli/models/list.md b/docs/cli/models/list.md index 5a31bb3..423f6cc 100644 --- a/docs/cli/models/list.md +++ b/docs/cli/models/list.md @@ -4,7 +4,7 @@ description: Cortex models subcommands. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex models list` @@ -25,11 +25,11 @@ For example, it returns the following: β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ β”‚ 0 β”‚ 'gpt-3.5-turbo' β”‚ 'openai' β”‚ 1 β”‚ β”‚ 1 β”‚ 'gpt-4o' β”‚ 'openai' β”‚ 1 β”‚ -β”‚ 2 β”‚ 'llama3:onnx' β”‚ 'cortex.onnx' β”‚ 1 β”‚ -β”‚ 3 β”‚ 'llama3' β”‚ 'cortex.llamacpp' β”‚ undefined β”‚ -β”‚ 4 β”‚ 'openhermes-2.5:tensorrt-llm-windows-ada' β”‚ 'cortex.tensorrt-llm' β”‚ 1 β”‚ -β”‚ 5 β”‚ 'openhermes-2.5:tensorrt-llm' β”‚ 'cortex.tensorrt-llm' β”‚ 1 β”‚ -β”‚ 6 β”‚ 'tinyllama' β”‚ 'cortex.llamacpp' β”‚ undefined β”‚ +β”‚ 2 β”‚ 'llama3:onnx' β”‚ 'onnx' β”‚ 1 β”‚ +β”‚ 3 β”‚ 'llama3' β”‚ 'llamacpp' β”‚ undefined β”‚ +β”‚ 4 β”‚ 'openhermes-2.5:tensorrt-llm-windows-ada' β”‚ 'tensorrt-llm' β”‚ 1 β”‚ +β”‚ 5 β”‚ 'openhermes-2.5:tensorrt-llm' β”‚ 'tensorrt-llm' β”‚ 1 β”‚ +β”‚ 6 β”‚ 'tinyllama' β”‚ 'llamacpp' β”‚ undefined β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ``` diff --git a/docs/cli/models/remove.md b/docs/cli/models/remove.md index 4f23317..83fc32a 100644 --- a/docs/cli/models/remove.md +++ b/docs/cli/models/remove.md @@ -4,7 +4,7 @@ description: Cortex models subcommands. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex models remove` diff --git a/docs/cli/models/start.md b/docs/cli/models/start.md index f253f0e..892ea01 100644 --- a/docs/cli/models/start.md +++ b/docs/cli/models/start.md @@ -4,7 +4,7 @@ description: Cortex models subcommands. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex models start` diff --git a/docs/cli/models/stop.md b/docs/cli/models/stop.md index d1a987e..56e4f15 100644 --- a/docs/cli/models/stop.md +++ b/docs/cli/models/stop.md @@ -4,7 +4,7 @@ description: Cortex models subcommands. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex models stop` diff --git a/docs/cli/models/update.md b/docs/cli/models/update.md index b05d718..57faac1 100644 --- a/docs/cli/models/update.md +++ b/docs/cli/models/update.md @@ -4,7 +4,7 @@ description: Cortex models subcommands. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex models update` diff --git a/docs/cli/presets.mdx b/docs/cli/presets.mdx index 5a7ffec..f814028 100644 --- a/docs/cli/presets.mdx +++ b/docs/cli/presets.mdx @@ -5,7 +5,7 @@ slug: "presets" --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex presets` diff --git a/docs/cli/ps.md b/docs/cli/ps.md index 0361313..cdf22e5 100644 --- a/docs/cli/ps.md +++ b/docs/cli/ps.md @@ -5,7 +5,7 @@ slug: "ps" --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex ps` @@ -33,7 +33,7 @@ For example, it returns the following table: β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β” β”‚ (index) β”‚ modelId β”‚ engine β”‚ status β”‚ duration β”‚ ram β”‚ vram β”‚ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€ -β”‚ 0 β”‚ 'janhq/tinyllama/1b' β”‚ 'cortex.llamacpp' β”‚ 'running' β”‚ '7s' β”‚ '-' β”‚ '-' β”‚ +β”‚ 0 β”‚ 'janhq/tinyllama/1b' β”‚ 'llamacpp' β”‚ 'running' β”‚ '7s' β”‚ '-' β”‚ '-' β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”˜ √ API server is offline ## The ps command also provides information on the percentage of system resources being used. diff --git a/docs/cli/pull.md b/docs/cli/pull.md index 779cda4..5dbc478 100644 --- a/docs/cli/pull.md +++ b/docs/cli/pull.md @@ -5,7 +5,7 @@ slug: "pull" --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex pull` diff --git a/docs/cli/run.md b/docs/cli/run.md index 65b383a..964ee0b 100644 --- a/docs/cli/run.md +++ b/docs/cli/run.md @@ -5,14 +5,14 @@ slug: "run" --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: # `cortex run` :::info This CLI command calls the following API endpoint: - [Download Model](/api-reference#tag/models/post/v1/models/{modelId}/pull) (The command only calls this endpoint if the specified model is not downloaded yet.) -- Download Engine (The command only calls this endpoint if the specified engine is not downloaded yet.) +- Install Engine (The command only calls this endpoint if the specified engine is not downloaded yet.) - [Start Model](/api-reference#tag/models/post/v1/models/{modelId}/start) - [Chat Completions](/api-reference#tag/inference/post/v1/chat/completions) (The command makes a call to this endpoint if the `-c` option is used.) ::: @@ -38,7 +38,7 @@ This command downloads and installs the model if not already available in your f | Option | Description | Required | Default value | Example | |-----------------------------|-----------------------------------------------------------------------------|----------|----------------------------------------------|------------------------| -| `model_id` | The identifier of the model you want to chat with. | No | `Prompt to select from the available models` | `mistral` | +| `model_id` | The identifier of the model you want to chat with. | Yes | `Prompt to select from the available models` | `mistral` | | `-h`, `--help` | Display help information for the command. | No | - | `-h` | >JS: HTTP response ``` -The diagram above illustrates the interaction between three components: `cortex-js`, `cortex-cpp`, and `cortex.onnx` when using the `onnx` engine to call the `chat completions endpoint` with the non-stream inference option: +The diagram above illustrates the interaction between three components: `cortex-js`, `cortex-cpp`, and `onnx` when using the `onnx` engine to call the `chat completions endpoint` with the non-stream inference option: 1. **HTTP Request from `cortex-js` to `cortex-cpp`**: - `cortex-js` sends an HTTP request to `cortex-cpp` for chat completion. -2. **Request Chat Completion from `cortex-cpp` to `cortex.onnx`**: - - `cortex-cpp` forwards the request to `cortex.onnx` to process the chat completion. +2. **Request Chat Completion from `cortex-cpp` to `onnx`**: + - `cortex-cpp` forwards the request to `onnx` to process the chat completion. -3. **Chat Processing in `cortex.onnx`**: - - `cortex.onnx` performs the following tasks: +3. **Chat Processing in `onnx`**: + - `onnx` performs the following tasks: - **Apply Chat Template**: Applies the chat template. - **Encode**: Encodes the input data. - **Set Search Options**: Configures search options for inference. - **Create Generator**: Creates a generator to process the request. -4. **Output Generation in `cortex.onnx`**: - - `cortex.onnx` executes the following steps to generate the response: +4. **Output Generation in `onnx`**: + - `onnx` executes the following steps to generate the response: - **Generate Output**: Generates the output based on the processed data. - **Decode Output**: Decodes the generated output. -5. **Callback from `cortex.onnx` to `cortex-cpp`**: - - Once the output is generated and ready, `cortex.onnx` sends a callback to `cortex-cpp` to indicate the completion of the chat completion process. +5. **Callback from `onnx` to `cortex-cpp`**: + - Once the output is generated and ready, `onnx` sends a callback to `cortex-cpp` to indicate the completion of the chat completion process. 6. **HTTP Response from `cortex-cpp` to `cortex-js`**: - `cortex-cpp` sends an HTTP response back to `cortex-js`, providing the generated output. @@ -252,7 +252,7 @@ The diagram above illustrates the interaction between three components: `cortex- β”‚ β”œβ”€β”€ chat_completion_request.h # OpenAI compatible request handling β”‚ β”œβ”€β”€ onnx_engine.h # Implementation onnx engine of model loading and inference | β”œβ”€β”€ onnx_engine.cc -β”œβ”€β”€ third-party # Dependencies of the cortex.onnx project +β”œβ”€β”€ third-party # Dependencies of the onnx project └── (list of third-party dependencies) ``` diff --git a/docs/cortex-tensorrt-llm.mdx b/docs/cortex-tensorrt-llm.mdx index 6a31fc4..c6dc185 100644 --- a/docs/cortex-tensorrt-llm.mdx +++ b/docs/cortex-tensorrt-llm.mdx @@ -8,14 +8,14 @@ import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: ## Introduction -[Cortex.tensorrt-llm](https://github.com/janhq/cortex.tensorrt-llm) is a C++ inference library for NVIDIA GPUs. It submodules NVIDIA’s [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) for GPU accelerated inference. +[Cortex.tensorrt-llm](https://github.com/janhq/tensorrt-llm) is a C++ inference library for NVIDIA GPUs. It submodules NVIDIA’s [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) for GPU accelerated inference. -In addition to TensorRT-LLM, `cortex.tensorrt-llm` adds: +In addition to TensorRT-LLM, `tensorrt-llm` adds: - Tokenizers for popular model architectures - Prebuilt model engines compatible with popular GPUs @@ -58,7 +58,7 @@ The command will check, download, and install these dependencies: :::info -To include `cortex.tensorrt-llm` in your own server implementation, follow the steps [here](https://github.com/janhq/cortex.tensorrt-llm/tree/rel). +To include `tensorrt-llm` in your own server implementation, follow the steps [here](https://github.com/janhq/tensorrt-llm/tree/rel). ::: #### Get TensorRT-LLM Models @@ -68,7 +68,7 @@ You can download precompiled models from the [Cortex Hub](https://huggingface.co ## Interface -`cortex.tensorrt-llm` has the following Interfaces: +`tensorrt-llm` has the following Interfaces: - **HandleChatCompletion:** Processes chat completion tasks. ```cpp @@ -109,7 +109,7 @@ import Diagram from "../src/components/Diagram" These are the main components that interact to provide an API for `inference` tasks using the `tensorrt-llm`: -1. **cortex-cpp**: Acts as an intermediary between `cortex-js` and the inference engine (`cortex.tensorrt-llm`). It processes incoming HTTP requests and forwards them to the appropriate components for handling. Once a response is generated, it sends it back to `cortex-js`. +1. **cortex-cpp**: Acts as an intermediary between `cortex-js` and the inference engine (`tensorrt-llm`). It processes incoming HTTP requests and forwards them to the appropriate components for handling. Once a response is generated, it sends it back to `cortex-js`. 2. **enginei**: Serves as an interface for the inference engine. It defines the methods and protocols used for running inference tasks. @@ -140,7 +140,7 @@ sequenceDiagram ``` -The diagram above illustrates the interaction between three components: `cortex-js`, `cortex-cpp`, and `cortex.tensorrt-llm` when using the `tensorrt-llm` engine in Cortex: +The diagram above illustrates the interaction between three components: `cortex-js`, `cortex-cpp`, and `tensorrt-llm` when using the `tensorrt-llm` engine in Cortex: 1. **HTTP Request Load Model (cortex-js to cortex-cpp)**: - `cortex-js` sends an HTTP request to `cortex-cpp` to load the model. @@ -148,23 +148,23 @@ The diagram above illustrates the interaction between three components: `cortex- 2. **Load Engine (cortex-cpp)**: - `cortex-cpp` processes the request and starts by loading the engine. -3. **Load Model (cortex-cpp to cortex.tensorrt-llm)**: - - `cortex-cpp` then sends a request to `cortex.tensorrt-llm` to load the model. +3. **Load Model (cortex-cpp to tensorrt-llm)**: + - `cortex-cpp` then sends a request to `tensorrt-llm` to load the model. -4. **Load Config (cortex.tensorrt-llm)**: - - `cortex.tensorrt-llm` begins by loading the necessary configuration. This includes parameters, settings, and other essential information needed to run the model. +4. **Load Config (tensorrt-llm)**: + - `tensorrt-llm` begins by loading the necessary configuration. This includes parameters, settings, and other essential information needed to run the model. -5. **Create Tokenizer (cortex.tensorrt-llm)**: - - After loading the configuration, `cortex.tensorrt-llm` creates a tokenizer. The tokenizer is responsible for converting input text into tokens that the model can understand and process. +5. **Create Tokenizer (tensorrt-llm)**: + - After loading the configuration, `tensorrt-llm` creates a tokenizer. The tokenizer is responsible for converting input text into tokens that the model can understand and process. -6. **Cache Chat Template (cortex.tensorrt-llm)**: - - Following the creation of the tokenizer, `cortex.tensorrt-llm` caches a chat template. +6. **Cache Chat Template (tensorrt-llm)**: + - Following the creation of the tokenizer, `tensorrt-llm` caches a chat template. -7. **Initialize GPT Session (cortex.tensorrt-llm)**: - - Finally, `cortex.tensorrt-llm` initializes the GPT session, setting up the necessary environment and resources required for the session. +7. **Initialize GPT Session (tensorrt-llm)**: + - Finally, `tensorrt-llm` initializes the GPT session, setting up the necessary environment and resources required for the session. -8. **Callback (cortex.tensorrt-llm to cortex-cpp)**: - - After completing the initialization, `cortex.tensorrt-llm` sends a callback to `cortex-cpp` to indicate that the model loading process is complete. +8. **Callback (tensorrt-llm to cortex-cpp)**: + - After completing the initialization, `tensorrt-llm` sends a callback to `cortex-cpp` to indicate that the model loading process is complete. 9. **HTTP Response (cortex-cpp to cortex-js)**: - `cortex-cpp` then sends an HTTP response back to `cortex-js`, indicating that the model has been successfully loaded. @@ -193,34 +193,34 @@ sequenceDiagram ``` -The diagram above illustrates the interaction between three components: `cortex-js`, `cortex-cpp`, and `cortex.tensorrt-llm` when using the `tensorrt-llm` engine to call the `chat completions endpoint` with the inference option: +The diagram above illustrates the interaction between three components: `cortex-js`, `cortex-cpp`, and `tensorrt-llm` when using the `tensorrt-llm` engine to call the `chat completions endpoint` with the inference option: 1. **HTTP Request Chat Completion (cortex-js to cortex-cpp)**: - `cortex-js` sends an HTTP request to `cortex-cpp` to request chat completion. -2. **Request Chat Completion (cortex-cpp to cortex.tensorrt-llm)**: - - `cortex-cpp` processes the request and forwards it to `cortex.tensorrt-llm` to handle the chat completion. +2. **Request Chat Completion (cortex-cpp to tensorrt-llm)**: + - `cortex-cpp` processes the request and forwards it to `tensorrt-llm` to handle the chat completion. -3. **Apply Chat Template (cortex.tensorrt-llm)**: - - `cortex.tensorrt-llm` starts by applying the chat template to the incoming request. +3. **Apply Chat Template (tensorrt-llm)**: + - `tensorrt-llm` starts by applying the chat template to the incoming request. -4. **Encode (cortex.tensorrt-llm)**: +4. **Encode (tensorrt-llm)**: - The next step involves encoding the input data. -5. **Set Sampling Config (cortex.tensorrt-llm)**: +5. **Set Sampling Config (tensorrt-llm)**: - After encoding, the sampling configuration is set. This configuration might include parameters that control the generation process, such as temperature and top-k sampling. -6. **Create Generation Input/Output (cortex.tensorrt-llm)**: - - `cortex.tensorrt-llm` then creates the generation input and output structures. These structures are used to manage the data flowing in and out of the model during generation. +6. **Create Generation Input/Output (tensorrt-llm)**: + - `tensorrt-llm` then creates the generation input and output structures. These structures are used to manage the data flowing in and out of the model during generation. -7. **Copy New Token from GPU (cortex.tensorrt-llm)**: +7. **Copy New Token from GPU (tensorrt-llm)**: - During the generation process, new tokens are copied from the GPU as they are generated. -8. **Decode New Token (cortex.tensorrt-llm)**: +8. **Decode New Token (tensorrt-llm)**: - The newly generated tokens are then decoded back. -9. **Callback (cortex.tensorrt-llm to cortex-cpp)**: - - After processing the request, `cortex.tensorrt-llm` sends a callback to `cortex-cpp` indicating that the chat completion process is done. +9. **Callback (tensorrt-llm to cortex-cpp)**: + - After processing the request, `tensorrt-llm` sends a callback to `cortex-cpp` indicating that the chat completion process is done. 10. **HTTP Stream Response (cortex-cpp to cortex-js)**: - `cortex-cpp` streams the response back to `cortex-js`, which waits for the completion of the process. @@ -229,7 +229,7 @@ The diagram above illustrates the interaction between three components: `cortex- ## Code Structure ``` . -cortex.tensorrt-llm # Forks from nvidia tensorrt-llm repository +tensorrt-llm # Forks from nvidia tensorrt-llm repository |__ ... |__cpp | |_ ... @@ -244,7 +244,7 @@ cortex.tensorrt-llm # Forks from nvidia tensorrt-llm repository | | β”‚ β”œβ”€β”€ chat_completion_request.h # OpenAI compatible request handling β”‚ | | β”œβ”€β”€ tensorrt-llm_engine.h # Implementation tensorrt-llm engine of model loading and inference | | | β”œβ”€β”€ tensorrt-llm_engine.cc -| | β”œβ”€β”€ third-party # Dependencies of the cortex.tensorrt-llm project +| | β”œβ”€β”€ third-party # Dependencies of the tensorrt-llm project | | └── (list of third-party dependencies) | |__ ... |__ ... diff --git a/docs/data-structures.mdx b/docs/data-structures.mdx new file mode 100644 index 0000000..86a1aa7 --- /dev/null +++ b/docs/data-structures.mdx @@ -0,0 +1,66 @@ +--- +title: Data Structures +description: Cortex.cpp's data structures. +slug: "data-structures" +--- + +:::warning +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +::: + +When you install Cortex.cpp, three types of files will be generated on your device: + +- **Binary Files** +- **Configuration Files** +- **Data Folder** + +## Binary Files +These are the executable files of the Cortex.cpp application. The file format varies depending on the operating system: + +- **Windows**: `.exe` located at `C:\Users\\AppData\Local\cortexcpp\cortex.exe` +- **Linux**: `.deb` or `.fedora` located at `/usr/bin/cortexcpp` +- **macOS**: `.pkg` located at `/usr/local/bin/cortexcpp` + +## Configuration Files +This file stores the path to the user data folder and the port for the Cortex.cpp application, allowing you to modify the data folder path and port settings.. The configuration file is named `.cortexrc` and is stored in the following locations: + +- **Windows**: `C:\Users\\.cortexrc` +- **Linux**: `/home//.cortexrc` +- **macOS**: `/Users//.cortexrc` + +The following is the sample of the configuration file: +``` +dataFolderPath: /Users//cortexcpp +apiServerHost: 127.0.0.1 +apiServerPort: 3928 +``` + +## Cortex.cpp Data Folder +The data folder stores the engines, models, and logs required by Cortex.cpp. This folder is located at: + +- **Windows**: `C:\Users\\.cortexcpp` +- **Linux**: `/home//.cortexcpp` +- **macOS**: `/Users/\.cortexcpp` + +### Folder Structure +The Cortex.cpp data folder typically follows this structure: + +```yaml +~/.cortexcpp + β”œβ”€β”€ models/ + β”‚ └── model_id.yaml + β”‚ └── model_id + β”‚ └── model_id.yaml + β”œβ”€β”€ logs/ + β”‚ └── logs.txt + └── engines/ + └── llamacpp +``` +#### `.cortexcpp` +The main directory that stores all Cortex-related files, located in the user's home directory. +#### `models/` +Contains the AI models used by Cortex for processing and generating responses. +#### `logs/` +Stores log files, which are useful for troubleshooting and monitoring the application's performance. +#### `engines/` +Stores the necessary dependencies and engine files needed to run Cortex on supported engines. \ No newline at end of file diff --git a/docs/embeddings.mdx b/docs/embeddings.mdx index 28a76d7..ab0d9b2 100644 --- a/docs/embeddings.mdx +++ b/docs/embeddings.mdx @@ -8,7 +8,7 @@ import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: An embedding is a vector that represents a piece of text, with the distance between vectors indicating similarity, which means closer distances mean more similar texts, while farther distances mean less similar texts. @@ -71,7 +71,7 @@ curl http://127.0.0.1:3928/v1/embeddings \ ## Capabilities ### Batch Embeddings -Cortex's Embedding feature, powered by the [`cortex.llamacpp`](/docs/cortex-llamacpp) engine, offers an OpenAI-compatible endpoint. It supports processing multiple input data prompts simultaneously for batch embeddings. +Cortex's Embedding feature, powered by the [`llamacpp`](/docs/cortex-llamacpp) engine, offers an OpenAI-compatible endpoint. It supports processing multiple input data prompts simultaneously for batch embeddings. ### Pre-configured Models We provide a selection of pre-configured models designed to integrate seamlessly with embedding features. These optimized models include: - Mistral Instruct 7B Q4 diff --git a/docs/formats/gguf.mdx b/docs/formats/gguf.mdx index c8afd94..14ad28c 100644 --- a/docs/formats/gguf.mdx +++ b/docs/formats/gguf.mdx @@ -4,7 +4,7 @@ description: GGUF Model Format. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: Cortex uses `llama.cpp` as the default engine by default the `GGUF` format is supported by Cortex. @@ -25,7 +25,7 @@ model: openhermes-2.5:7B version: 1 # Engine / Model Settings -engine: cortex.llamacpp +engine: llamacpp ngl: 33 # Infer from base config.json -> num_attention_heads ctx_len: 4096 # Infer from base config.json -> max_position_embeddings prompt_template: "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" diff --git a/docs/formats/onnx.mdx b/docs/formats/onnx.mdx index e9436c8..d4e9994 100644 --- a/docs/formats/onnx.mdx +++ b/docs/formats/onnx.mdx @@ -4,7 +4,7 @@ description: ONNX Model Format. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: Cortex uses `onnxruntime-genai` with DirectML to provide GPU acceleration for AMD, Intel, NVIDIA, and Qualcomm GPUs. @@ -24,7 +24,7 @@ model: openhermes version: 1 # Engine / Model Settings -engine: cortex.onnx +engine: onnx prompt_template: "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" # Results Preferences diff --git a/docs/formats/tensorrt-llm.mdx b/docs/formats/tensorrt-llm.mdx index 8339ea0..0cfe7d4 100644 --- a/docs/formats/tensorrt-llm.mdx +++ b/docs/formats/tensorrt-llm.mdx @@ -4,7 +4,7 @@ description: TensorRT-LLM Model Format. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: Cortex uses the `tensorrt-llm` inference library for NVIDIA GPUs acceleration. @@ -24,7 +24,7 @@ model: openhermes-2.5:7B-tensorrt-llm version: 1 # Engine / Model Settings -engine: cortex.tensorrt-llm +engine: tensorrt-llm os: linux gpu_arch: ada quantization_method: awq diff --git a/docs/hub/cortex-hub.mdx b/docs/hub/cortex-hub.mdx index 2d9ca4a..10e4752 100644 --- a/docs/hub/cortex-hub.mdx +++ b/docs/hub/cortex-hub.mdx @@ -4,12 +4,12 @@ description: Cortex's built-in models are hosted on Huggingface, supporting mult --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: ![cortex repo](/img/docs/repo.png) -[Cortex Model Repos](https://huggingface.co/cortexso) are Cortex's built-in models hosted on Huggingface, which uses a single Git repository to hold different versions of a model, which can be pulled using Docker or Ollama-like syntax. +[Cortex Model Repos](https://huggingface.co/cortexso) are Cortex.cpp's built-in models hosted on Huggingface, which uses a single Git repository to hold different versions of a model, which can be pulled using Docker or Ollama-like syntax. :::info We also plan to provide alternative hosting locations or servers to replicate the content, ensuring access in regions where HuggingFace is blocked or has slow download speeds. diff --git a/docs/hub/hugging-face.mdx b/docs/hub/hugging-face.mdx index e773aeb..8def7fb 100644 --- a/docs/hub/hugging-face.mdx +++ b/docs/hub/hugging-face.mdx @@ -4,11 +4,11 @@ description: Cortex supports all `GGUF` and `ONNX` models available in Huggingfa --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: -Cortex supports all `GGUF` and `ONNX` models from the [Hugging Face Hub](https://huggingface.co), along with its built-in models. For `TensorRT-LLM` models, only built-in models in the [Cortex Model Repos](/docs/hub/cortex-hub) are supported. +Cortex.cpp supports all `GGUF` and `ONNX` models from the [Hugging Face Hub](https://huggingface.co), along with its built-in models. For `TensorRT-LLM` models, only built-in models in the [Cortex Model Repos](/docs/hub/cortex-hub) are supported. :::info To pull a supported model from HuggingFace, use the format `ORG_ID/MODEL_ID`. @@ -39,4 +39,4 @@ cortex pull bigscience/mt0-base ``` ## TensorRT-LLM -We are still working to support all available `TensorRT-LLM` models on HuggingFace. For now, Cortex only supports built-in `TensorRT-LLM` models, which can be downloaded from the [Cortex Model Repos](/docs/hub/cortex-hub). +We are still working to support all available `TensorRT-LLM` models on HuggingFace. For now, Cortex.cpp only supports built-in `TensorRT-LLM` models, which can be downloaded from the [Cortex Model Repos](/docs/hub/cortex-hub). diff --git a/docs/hub/index.mdx b/docs/hub/index.mdx index d3d67ec..95739ef 100644 --- a/docs/hub/index.mdx +++ b/docs/hub/index.mdx @@ -6,9 +6,9 @@ import DocCardList from '@theme/DocCardList'; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: -Cortex allows users to pull models from multiple repositories, offering flexibility and extensive model access. Here are the supported repositories: +Cortex.cpp allows users to pull models from multiple repositories, offering flexibility and extensive model access. Here are the supported repositories: diff --git a/docs/hub/nvidia-ngc.mdx b/docs/hub/nvidia-ngc.mdx index d3f7b31..ee0248d 100644 --- a/docs/hub/nvidia-ngc.mdx +++ b/docs/hub/nvidia-ngc.mdx @@ -4,7 +4,7 @@ description: Coming Soon! --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: -Cortex will soon support Nvidia NGC, enabling users to download a wider variety of TensorRT-LLM models. \ No newline at end of file +Cortex.cpp will soon support Nvidia NGC, enabling users to download a wider variety of TensorRT-LLM models. \ No newline at end of file diff --git a/docs/installation/docker.mdx b/docs/installation/docker.mdx index a0b4c45..3aa0dcb 100644 --- a/docs/installation/docker.mdx +++ b/docs/installation/docker.mdx @@ -4,5 +4,5 @@ description: Install Cortex through Docker. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: \ No newline at end of file diff --git a/docs/installation/gpu-acceleration.mdx b/docs/installation/gpu-acceleration.mdx index 072d714..ff57a71 100644 --- a/docs/installation/gpu-acceleration.mdx +++ b/docs/installation/gpu-acceleration.mdx @@ -4,5 +4,5 @@ description: GPU Acceleration. --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: \ No newline at end of file diff --git a/docs/installation/linux.mdx b/docs/installation/linux.mdx index 9e5f681..186d0df 100644 --- a/docs/installation/linux.mdx +++ b/docs/installation/linux.mdx @@ -9,49 +9,68 @@ import TabItem from '@theme/TabItem'; import Admonition from '@theme/Admonition'; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: -## Cortex Installation +## Cortex.cpp Installation Before installation, make sure that you have met the required [dependencies](#dependencies) and [hardware](#hardware) to run Cortex. +### .deb +- Stable: https://github.com/janhq/cortex.cpp/releases +- Beta: https://github.com/janhq/cortex.cpp/releases +- Nightly: https://github.com/janhq/cortex.cpp/releases -```sh -sudo apt install cortex-engine -``` +### .appImage +- Stable: https://github.com/janhq/cortex.cpp/releases +- Beta: https://github.com/janhq/cortex.cpp/releases +- Nightly: https://github.com/janhq/cortex.cpp/releases :::info -You can also install Cortex using the Cortex Installer available on [GitHub Releases](https://github.com/janhq/cortex/releases). +You can also install Cortex.cpp using the Cortex Installer available on [GitHub Releases](https://github.com/janhq/cortex/releases). ::: +### Data Folder +By default, Cortex.cpp is installed in the following directory: +``` +# Binary Location +/usr/bin/cortexcpp -## Uninstall Cortex -```sh -sudo apt remove cortex-engine +# Application Data (Engines, Models and Logs folders) +/home//.cortexcpp ``` -:::info -Delete the Cortex data folder located in your home folder. -::: +## Uninstall Cortex.cpp +```sh +# Uninstall Jan +sudo apt-get remove cortexcpp -## Build from Source +# Remove all user data +rm -rf ~/.cortexcpp -1. Clone the Cortex repository [here](https://github.com/janhq/cortex/tree/dev). -2. Navigate to the `cortex-js` folder. -3. Open the terminal and run the following command to build the Cortex project: +# Delete the application data +rm -rf ~/.cortexrc -```sh -npx nest build ``` +:::info +Delete the Cortex.cpp data folder located in your home folder. +::: -4. Make the `command.js` executable: +## Build from Source +1. Clone the Cortex.cpp repository [here](https://github.com/janhq/cortex.cpp). +2. Navigate to the `engine > vcpkg` folder. +3. Configure the vpkg: -```sh -chmod +x '[path-to]/cortex/cortex-js/dist/src/command.js' +```bash +cd vcpkg +./bootstrap-vcpkg.sh +vcpkg install ``` +4. Build the Cortex.cpp inside the `build` folder: -5. Link the package globally: - -```sh -npm link +```bash +mkdir build +cd build +cmake .. -DCMAKE_TOOLCHAIN_FILE=path_to_vcpkg_folder/vcpkg/scripts/buildsystems/vcpkg.cmake +make -j4 ``` -6. Verify that Cortex is installed correctly by getting help information. +5. Use Visual Studio with the C++ development kit to build the project using the files generated in the `build` folder. +6. Verify that Cortex.cpp is installed correctly by getting help information. ```sh # Get the help information @@ -71,7 +90,7 @@ cortex -h - Ubuntu version 22.04 and higher #### CPU :::info -- Cortex supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2). +- Cortex.cpp supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2). - We support older processors with AVX and AVX-512, though this is not recommended. ::: ##### Intel CPU diff --git a/docs/installation/mac.mdx b/docs/installation/mac.mdx index 7da559a..44a22be 100644 --- a/docs/installation/mac.mdx +++ b/docs/installation/mac.mdx @@ -8,48 +8,65 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: -## Cortex Installation +## Cortex.cpp Installation Before installation, make sure that you have met the required [dependencies](#dependencies) and [hardware](#hardware) to run Cortex. - -```sh -brew install cortex-engine -``` +- Stable: https://github.com/janhq/cortex.cpp/releases +- Beta: https://github.com/janhq/cortex.cpp/releases +- Nightly: https://github.com/janhq/cortex.cpp/releases :::info -You can also install Cortex using the Cortex Installer available on [GitHub Releases](https://github.com/janhq/cortex/releases). +You can also install Cortex.cpp using the Cortex.cpp Installer available on [GitHub Releases](https://github.com/janhq/cortex/releases). ::: +### Data Folder +By default, Cortex.cpp is installed in the following directory: +``` +# Binary Location +/usr/local/bin/cortexcpp -## Uninstall Cortex -```sh -brew uninstall cortex-engine +# Application Data (Engines, Models and Logs folders) +/Users//.cortexcpp +``` +## Uninstall Cortex.cpp +1. Open the Finder menu. +2. Click the Applications option from the sidebar. +3. Find the Jan app or type in the search bar. +4. Use any of these ways to move the Jan app to the Trash: + - Drag the app to the Trash. + - Select the app and choose the Move to Trash option. + - Select the app and press Command-Delete on your keyboard. +5. Use the following command to delete Jan's user data and app cache: +```bash +# Remove all user data +rm -rf ~/.cortexcpp + +# Delete the application data +rm -rf ~/.cortexrc ``` :::info -Delete the Cortex data folder located in your home folder. +Delete the Cortex.cpp data folder located in your home folder. ::: ## Build from Source +1. Clone the Cortex.cpp repository [here](https://github.com/janhq/cortex.cpp). +2. Navigate to the `engine > vcpkg` folder. +3. Configure the vpkg: -1. Clone the Cortex repository [here](https://github.com/janhq/cortex/tree/dev). -2. Navigate to the `cortex-js` folder. -3. Open the terminal and run the following command to build the Cortex project: - -```sh -npx nest build +```bash +cd vcpkg +./bootstrap-vcpkg.sh +vcpkg install ``` +4. Build the Cortex.cpp inside the `build` folder: -4. Make the `command.js` executable: - -```sh -chmod +x '[path-to]/cortex/cortex-js/dist/src/command.js' -``` - -5. Link the package globally: - -```sh -npm link +```bash +mkdir build +cd build +cmake .. -DCMAKE_TOOLCHAIN_FILE=path_to_vcpkg_folder/vcpkg/scripts/buildsystems/vcpkg.cmake +make -j4 ``` -6. Verify that Cortex is installed correctly by getting help information. +5. Use Visual Studio with the C++ development kit to build the project using the files generated in the `build` folder. +6. Verify that Cortex.cpp is installed correctly by getting help information. ```sh # Get the help information diff --git a/docs/installation/windows.mdx b/docs/installation/windows.mdx index 4993948..cdb76bb 100644 --- a/docs/installation/windows.mdx +++ b/docs/installation/windows.mdx @@ -9,78 +9,87 @@ import TabItem from '@theme/TabItem'; import Admonition from '@theme/Admonition'; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: ## Overview -For Windows, Cortex can be installed in two ways: +For Windows, Cortex.cpp can be installed in two ways: - [Windows](#windows) - [Windows Subsystem for Linux (WSL)](#windows-subsystem-linux) ## Windows -### Install Cortex +### Install Cortex.cpp Before installation, make sure that you have met the required [dependencies](#windows) and [hardware](#hardware) to run Cortex. -```sh -winget install cortex-engine +- Stable: +- Beta: +- Nightly: +#### Data Folder +By default, Cortex.cpp is installed in the following directory: ``` +# Binary Location +C:\Users\\AppData\Local\cortexcpp -### Uninstall Cortex -```sh -winget uninstall cortex-engine +# Application Data (Engines, Models and Logs folders) +C:\Users\\.cortexcpp ``` +### Uninstall Cortex.cpp +To uninstall Cortex.cpp, simply run the `uninstaller.exe` located in the binary data folder after installation. :::info -Delete the Cortex data folder located in your home folder. +Delete the Cortex.cpp data folder located in your home folder. ::: ## Windows Subsystem Linux :::info Windows Subsystem Linux allows running Linux tools and workflows seamlessly alongside Windows applications. For more information, please see this [article](https://learn.microsoft.com/en-us/windows/wsl/faq). ::: -### Install Cortex +### Install Cortex.cpp Before installation, make sure that you have met the required [dependencies](#windows-subsystem-for-linux) and [hardware](#hardware) to run Cortex. -```sh -# Install using Brew -brew install cortex-engine +- Stable: https://github.com/janhq/cortex.cpp/releases +- Beta: https://github.com/janhq/cortex.cpp/releases +- Nightly: https://github.com/janhq/cortex.cpp/releases -# Install using Sudo -sudo apt install cortex-engine +#### Data Folder +By default, Cortex.cpp is installed in the following directory: ``` +# Binary Location +C:\Users\\AppData\Local\cortexcpp\cortex.exe -### Uninstall Cortex - -```sh -# Uninstall using Brew -brew uninstall cortex-engine - -# Uninstall cortex -sudo apt remove cortex-engine +# Application Data (Engines, Models and Logs folders) +C:\Users\\.cortexcpp ``` +### Uninstall Cortex.cpp +To uninstall Cortex.cpp, simply run the `uninstaller.exe` located in the binary data folder after installation. :::info -Delete the Cortex data folder located in your home folder. +Delete the Cortex.cpp data folder located in your home folder. ::: ## Build from Source -1. Clone the Cortex repository [here](https://github.com/janhq/cortex/tree/dev). -2. Navigate to the `cortex-js` folder. -3. Open the terminal and run the following command to build the Cortex project: +1. Clone the Cortex.cpp repository [here](https://github.com/janhq/cortex.cpp). +2. Navigate to the `engine > vcpkg` folder. +3. Configure the vpkg: -```sh -npx nest build -``` - -4. Make the `command.js` executable: - -```sh -node "[path-to]\cortex\cortex-js\dist\src\command.js" +```bash +cd vcpkg +## Windows +./bootstrap-vcpkg.bat +## WSL +./bootstrap-vcpkg.sh +vcpkg install ``` +4. Build the Cortex.cpp inside the `build` folder: -5. Link the package globally: - -```sh -npm link +```bash +mkdir build +cd build +## Windows +cmake .. -DBUILD_SHARED_LIBS=OFF -DCMAKE_TOOLCHAIN_FILE=path_to_vcpkg_folder/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=x64-windows-static +## WSL +cmake .. -DCMAKE_TOOLCHAIN_FILE=path_to_vcpkg_folder/vcpkg/scripts/buildsystems/vcpkg.cmake +make -j4 ``` -6. Verify that Cortex is installed correctly by getting help information. +5. Use Visual Studio with the C++ development kit to build the project using the files generated in the `build` folder. +6. Verify that Cortex.cpp is installed correctly by getting help information. ```sh # Get the help information @@ -104,7 +113,7 @@ cortex -h - Windows 10 or higher. #### CPU :::info -- Cortex supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2). +- Cortex.cpp supports a processor that can handle AVX2. For the full list, please see [here](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX2). - We support older processors with AVX and AVX-512, though this is not recommended. ::: ##### Intel CPU diff --git a/docs/integrate-remote-engine.mdx b/docs/integrate-remote-engine.mdx index 6687f75..b32fcc6 100644 --- a/docs/integrate-remote-engine.mdx +++ b/docs/integrate-remote-engine.mdx @@ -7,7 +7,7 @@ import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: diff --git a/docs/model-overview.mdx b/docs/model-overview.mdx index b7491bd..0eecc9e 100644 --- a/docs/model-overview.mdx +++ b/docs/model-overview.mdx @@ -4,17 +4,17 @@ description: The Model section overview --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: -When Cortex is started, it automatically starts an API server, this is inspired by Docker CLI. This server manages various model endpoints. These endpoints facilitate the following: +When Cortex.cpp is started, it automatically starts an API server, this is inspired by Docker CLI. This server manages various model endpoints. These endpoints facilitate the following: - **Model Operations**: Run and stop models. - **Model Management**: Manage your local models. :::info The model in the API server is automatically loaded/unloaded by using the [`/chat/completions`](/api-reference#tag/inference/post/v1/chat/completions) endpoint. ::: ## Model Formats -Cortex supports three model formats: +Cortex.cpp supports three model formats: - GGUF - ONNX - TensorRT-LLM @@ -24,7 +24,7 @@ For details on each format, see the [Model Formats](/docs/model-yaml#model-forma ::: ## Built-in Models -Cortex offers a range of built-in models that include popular open-source options. These models, hosted on HuggingFace as [Cortex Model Repositories](/docs/hub/cortex-hub), are pre-compiled for different engines, enabling each model to have multiple branches in various formats. +Cortex.cpp offers a range of built-in models that include popular open-source options. These models, hosted on HuggingFace as [Cortex Model Repositories](/docs/hub/cortex-hub), are pre-compiled for different engines, enabling each model to have multiple branches in various formats. ### Built-in Model Variants Built-in models are made available across the following variants: @@ -38,6 +38,5 @@ You can see our full list of Built-in Models [here](/models). ::: ## Next steps -- Learn more about using models in Cortex [here](/docs/using-models). - Cortex requires a `model.yaml` file to run a model. Find out more [here](/docs/model-yaml). - Cortex supports multiple model hubs hosting built-in models. See details [here](/docs/model-sources). \ No newline at end of file diff --git a/docs/model-presets.mdx b/docs/model-presets.mdx index 74971dc..d4196e1 100644 --- a/docs/model-presets.mdx +++ b/docs/model-presets.mdx @@ -4,7 +4,7 @@ description: Model Presets --- :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: ## Model Presets diff --git a/docs/model-yaml.mdx b/docs/model-yaml.mdx index 5744eda..b18d2d0 100644 --- a/docs/model-yaml.mdx +++ b/docs/model-yaml.mdx @@ -8,10 +8,12 @@ import TabItem from "@theme/TabItem"; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: -Cortex uses a `model.yaml` file to specify the configuration for running a model. This file provides Cortex with information about the engine to be used and the parameters for running the model. +Cortex.cpp uses a `model.yaml` file to specify the configuration for running a model. Models can be downloaded from the Cortex Model Hub or Hugging Face repositories. Once downloaded, the model data is parsed and stored in two locations: +- `/cortexcpp/models//.yml`: Contains the original model data. +- `/cortexcpp/models/.yaml`: Manages model settings for Cortex.cpp. ## `model.yaml` High Level Structure @@ -44,7 +46,7 @@ name: openhermes-2.5 model: openhermes-2.5:7B version: 1 ``` -Cortex Meta consists of essential metadata that identifies the model within Cortex. The required parameters include: +Cortex Meta consists of essential metadata that identifies the model within Cortex.cpp. The required parameters include: | **Parameter** | **Description** | |---------------|-----------------| | `name` | The identifier name of the model, used as the `model_id`. | @@ -54,13 +56,13 @@ Cortex Meta consists of essential metadata that identifies the model within Cort ### Engine / Model Settings ```yaml # Engine / Model Settings -engine: cortex.llamacpp +engine: llamacpp ngl: 33 ctx_len: 4096 prompt_template: "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" ``` -Engine/Model Settings include the options that control how Cortex runs the model. The required parameters include: +Engine/Model Settings include the options that control how Cortex.cpp runs the model. The required parameters include: | **Parameter** | **Description** | |---------------|-----------------| | `engine` | Specifies the engine to be used for model execution. | @@ -94,7 +96,7 @@ model: openhermes-2.5:7B version: 1 # Engine / Model Settings -engine: cortex.llamacpp +engine: llamacpp ngl: 33 # Infer from base config.json -> num_attention_heads ctx_len: 4096 # Infer from base config.json -> max_position_embeddings prompt_template: "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" @@ -133,7 +135,7 @@ model: openhermes version: 1 # Engine / Model Settings -engine: cortex.onnx +engine: onnx prompt_template: "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" # Results Preferences @@ -169,7 +171,7 @@ model: openhermes-2.5:7B-tensorrt-llm version: 1 # Engine / Model Settings -engine: cortex.tensorrt-llm +engine: tensorrt-llm os: linux gpu_arch: ada quantization_method: awq diff --git a/docs/overview.mdx b/docs/overview.mdx index a807e35..36f36bb 100644 --- a/docs/overview.mdx +++ b/docs/overview.mdx @@ -11,14 +11,14 @@ import TabItem from "@theme/TabItem"; # Cortex :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: ![Cortex Cover Image](/img/social-card.jpg) -Cortex lets you run AI easily on your computer. +Cortex.cpp lets you run AI easily on your computer. -Cortex is a C++ command-line interface (CLI) that serves as an alternative to Ollama, capable of running with three different engines: `llama.cpp`, `ONNX`, and `TensorRT-LLM`. +Cortex.cpp is a C++ command-line interface (CLI) designed as an alternative to Ollama. By default, it runs on the `llama.cpp` engine but also supports other engines, including `ONNX` and `TensorRT-LLM`, making it a multi-engine platform. ## Supported Accelerators - Nvidia CUDA @@ -33,14 +33,14 @@ Cortex is a C++ command-line interface (CLI) that serves as an alternative to Ol If GPU hardware is available, Cortex is GPU accelerated by default. :::info -**Real-world Use**: Cortex powers [Jan](https://jan.ai), our on-device ChatGPT-alternative. +**Real-world Use**: Cortex.cpp powers [Jan](https://jan.ai), our on-device ChatGPT-alternative. -Cortex has been battle-tested across 1 million+ downloads and handles a variety of hardware configurations. +Cortex.cpp has been battle-tested across 1 million+ downloads and handles a variety of hardware configurations. ::: ## Supported Models -Cortex supports the following list of [Built-in Models](/models): +Cortex.cpp supports the following list of [Built-in Models](/models): @@ -88,5 +88,5 @@ Cortex supports the following list of [Built-in Models](/models): :::info -Cortex supports pulling `GGUF` and `ONNX` models from the [Hugging Face Hub](https://huggingface.co). Read how to [Pull models from Hugging Face](/docs/hub/hugging-face/) +Cortex.cpp supports pulling `GGUF` and `ONNX` models from the [Hugging Face Hub](https://huggingface.co). Read how to [Pull models from Hugging Face](/docs/hub/hugging-face/) ::: diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx index bf5119f..5c59052 100644 --- a/docs/quickstart.mdx +++ b/docs/quickstart.mdx @@ -9,23 +9,18 @@ import TabItem from "@theme/TabItem"; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: ## Installation -```sh -# Install using Brew for Mac -brew install cortex-engine - -# Install using Winget for Windows -winget install cortex-engine - -# Install using Sudo for Linux -sudo apt install cortex-engine - -``` -## Start Cortex Processes and API Server -This command starts the Cortex API server at `localhost:1337`. +To install Cortex, download the installer for your operating system from the following options: +- **Stable Version** + - [Windows](https://github.com/janhq/cortex.cpp/releases) + - [Mac](https://github.com/janhq/cortex.cpp/releases) + - [Linux (Debian)](https://github.com/janhq/cortex.cpp/releases) + - [Linux (Fedora)](https://github.com/janhq/cortex.cpp/releases) +## Start Cortex.cpp Processes and API Server +This command starts the Cortex.cpp API server at `localhost:3928`. ```sh cortex ``` @@ -45,7 +40,7 @@ cortex chat mistral ``` ### API ```curl -curl http://localhost:1337/v1/chat/completions \ +curl http://localhost:3928/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "", @@ -77,7 +72,7 @@ const resp = await cortex.chat.completions.create({ ], }); ``` -### Cortex-python +### Cortex.py ```py completion = client.chat.completions.create( model=mistral, @@ -90,9 +85,9 @@ completion = client.chat.completions.create( ) ``` ## Stop a Model -This command stops the running model and the Cortex API server. +This command stops the running model. ```bash -cortex stop +cortex models stop ``` ## Show the System State This command displays the running model and the hardware system status. @@ -100,9 +95,6 @@ This command displays the running model and the hardware system status. cortex ps ``` ## Run Different Model Variants -:::info -All model files are stored in the `~users/cortex/models` folder. -::: ```bash # Run HuggingFace model with HuggingFace Repo cortex run TheBloke/Mistral-7B-Instruct-v0.2-GGUF @@ -116,7 +108,7 @@ cortex run mistral:tensorrt-llm :::info -Cortex is still in early development, so if you have any questions, please reach out to us: +Cortex.cpp is still in early development, so if you have any questions, please reach out to us: - [GitHub](https://github.com/janhq/cortex) - [Discord](https://discord.gg/YFKKeuVu) diff --git a/docs/requirements.mdx b/docs/requirements.mdx index 24315f9..7c13ab7 100644 --- a/docs/requirements.mdx +++ b/docs/requirements.mdx @@ -8,7 +8,7 @@ import TabItem from '@theme/TabItem'; import Admonition from '@theme/Admonition'; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: To run LLMs on-device or on-premise, Cortex has the following requirements: diff --git a/docs/telemetry-architecture.mdx b/docs/telemetry-architecture.mdx index b39fbc5..32ac104 100644 --- a/docs/telemetry-architecture.mdx +++ b/docs/telemetry-architecture.mdx @@ -9,7 +9,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: ## Architecture diff --git a/docs/telemetry.mdx b/docs/telemetry.mdx index 5d2e917..6024499 100644 --- a/docs/telemetry.mdx +++ b/docs/telemetry.mdx @@ -8,7 +8,7 @@ import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: Cortex collects telemetry data to enhance our product. This data provides detailed insights into your usage, including crash reports for your Cortex or Jan applications. By analyzing this information, we can identify and fix bugs, optimize performance, and improve overall stability and user experience. diff --git a/docs/troubleshooting.mdx b/docs/troubleshooting.mdx index 47b8ce3..a437ad4 100644 --- a/docs/troubleshooting.mdx +++ b/docs/troubleshooting.mdx @@ -10,7 +10,7 @@ import Admonition from '@theme/Admonition'; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: ## Model No Response diff --git a/docs/using-models.mdx b/docs/using-models.mdx index 41945b1..23ac76a 100644 --- a/docs/using-models.mdx +++ b/docs/using-models.mdx @@ -8,7 +8,7 @@ import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; :::warning -🚧 Cortex is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. ::: Cortex's Models API is compatible with OpenAI’s [Models API](https://platform.openai.com/docs/api-reference/models) endpoint. It is a fork of the OpenAI API used for model management. Additionally, Cortex exposes lower-level operations for managing models like downloading models from a model hub and model loading. @@ -23,7 +23,7 @@ The Run Model CLI and endpoint will automatically pull a model if it has not bee ```bash curl --request POST \ - --url http://localhost:1337/v1/models/mistral/start \ + --url http://localhost:3928/v1/models/mistral/start \ --header 'Content-Type: application/json' \ --data '{ "prompt_template": "system\n{system_message}\nuser\n{prompt}\nassistant", @@ -39,7 +39,7 @@ The Run Model CLI and endpoint will automatically pull a model if it has not bee "flash_attn": true, "cache_type": "f16", "use_mmap": true, - "engine": "cortex.llamacpp" + "engine": "llamacpp" }' ``` @@ -58,7 +58,7 @@ The Run Model CLI and endpoint will automatically pull a model if it has not bee ```bash curl --request POST \ - --url http://localhost:1337/models/mistral/stop + --url http://localhost:3928/models/mistral/stop @@ -79,7 +79,7 @@ The Run Model CLI and endpoint will automatically pull a model if it has not bee ```bash curl --request POST \ - --url http://localhost:1337/v1/models/mistral/pull + --url http://localhost:3928/v1/models/mistral/pull @@ -106,7 +106,7 @@ Model Management allows you to manage your local models, which can be found in ` ```bash curl --request GET \ - --url http://localhost:1337/v1/models + --url http://localhost:3928/v1/models @@ -127,7 +127,7 @@ Model Management allows you to manage your local models, which can be found in ` ```bash curl --request GET \ - --url http://localhost:1337/v1/models/mistral + --url http://localhost:3928/v1/models/mistral @@ -148,7 +148,7 @@ Model Management allows you to manage your local models, which can be found in ` ```bash curl --request DELETE \ - --url http://localhost:1337/v1/models/mistral + --url http://localhost:3928/v1/models/mistral @@ -169,7 +169,7 @@ Model Management allows you to manage your local models, which can be found in ` ```bash curl --request PATCH \ - --url http://localhost:1337/v1/models/mistral \ + --url http://localhost:3928/v1/models/mistral \ --header 'Content-Type: application/json' \ --data '{}' diff --git a/sidebars.ts b/sidebars.ts index de0aa8b..04490e6 100644 --- a/sidebars.ts +++ b/sidebars.ts @@ -13,16 +13,16 @@ import type { SidebarsConfig } from "@docusaurus/plugin-content-docs"; const sidebars: SidebarsConfig = { // By default, Docusaurus generates a sidebar from the docs folder structure sidebar: [ - { - type: "html", - value: - '', - }, - { - type: "html", - value: - '', - }, + // { + // type: "html", + // value: + // '', + // }, + // { + // type: "html", + // value: + // '', + // }, { type: "html", value: "GET STARTED", @@ -70,6 +70,7 @@ const sidebars: SidebarsConfig = { }, ], }, + { type: "doc", id: "data-structures", label: "Data Structures" }, // { // type: "category", // label: "Basic Usage", @@ -183,6 +184,7 @@ const sidebars: SidebarsConfig = { { type: "doc", id: "cli/engines/index", label: "cortex engines" }, { type: "doc", id: "cli/stop", label: "cortex stop" }, { type: "doc", id: "cli/ps", label: "cortex ps" }, + { type: "doc", id: "cli/update", label: "cortex update" }, // { type: "doc", id: "cli/telemetry", label: "cortex telemetry" }, // { type: "doc", id: "cli/benchmark", label: "cortex benchmark" }, // ARCHITECTURE @@ -226,12 +228,12 @@ const sidebars: SidebarsConfig = { // }, // ], // }, - { - type: "html", - value: "TROUBLESHOOTING", - className: "sidebar-divider", - }, - { type: "doc", id: "troubleshooting", label: "Troubleshooting" }, + // { + // type: "html", + // value: "TROUBLESHOOTING", + // className: "sidebar-divider", + // }, + // { type: "doc", id: "troubleshooting", label: "Troubleshooting" }, ], platform: [ { diff --git a/static/openapi/jan.json b/static/openapi/jan.json index 80b754d..5a90e0a 100644 --- a/static/openapi/jan.json +++ b/static/openapi/jan.json @@ -1774,7 +1774,7 @@ "engine": { "type": "string", "description": "The engine used to run the model.", - "example": "cortex.llamacpp" + "example": "llamacpp" }, "owned_by": { "type": "string", @@ -1887,7 +1887,7 @@ }, "engine": { "type": "string", - "example": "cortex.llamacpp", + "example": "llamacpp", "description": "The engine to use." } } @@ -2024,7 +2024,7 @@ "engine": { "type": "string", "description": "The engine to use.", - "example": "cortex.llamacpp" + "example": "llamacpp" } }, "required": [ @@ -2544,7 +2544,7 @@ "properties": { "name": { "type": "string", - "example": "cortex.llamacpp", + "example": "llamacpp", "description": "The name of the engine that you want to retrieve." }, "productName": {