fix(docs): Fix outdated documentation for llama cpp (#7426)

langchain-ai · Dec 30, 2024 · 21f3b2d · 21f3b2d
1 parent 4adab95
commit 21f3b2d
Show file tree

Hide file tree

Showing 15 changed files with 46 additions and 54 deletions.
diff --git a/docs/core_docs/docs/integrations/chat/llama_cpp.mdx b/docs/core_docs/docs/integrations/chat/llama_cpp.mdx
@@ -22,11 +22,11 @@ import IntegrationInstallTooltip from "@mdx_components/integration_install_toolt
 npm install -S node-llama-cpp@3 @langchain/community @langchain/core
 ```
 
-You will also need a local Llama 2 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example).
+You will also need a local Llama 3 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example).
 
 Out-of-the-box `node-llama-cpp` is tuned for running on a MacOS platform with support for the Metal GPU of Apple M-series of processors. If you need to turn this off or need support for the CUDA architecture then refer to the documentation at [node-llama-cpp](https://withcatai.github.io/node-llama-cpp/).
 
-For advice on getting and preparing `llama2` see the documentation for the LLM version of this module.
+For advice on getting and preparing `llama3` see the documentation for the LLM version of this module.
 
 A note to LangChain.js contributors: if you want to run the tests associated with this module you will need to put the path to your local model in the environment variable `LLAMA_PATH`.
 
@@ -51,7 +51,7 @@ import SystemExample from "@examples/models/chat/integration_llama_cpp_system.ts
 
 ### Chains
 
-This module can also be used with chains, note that using more complex chains will require suitably powerful version of `llama2` such as the 70B version.
+This module can also be used with chains, note that using more complex chains will require suitably powerful version of `llama3` such as the 70B version.
 
 import ChainExample from "@examples/models/chat/integration_llama_cpp_chain.ts";
 
@@ -65,7 +65,7 @@ import StreamExample from "@examples/models/chat/integration_llama_cpp_stream.ts
 
 <CodeBlock language="typescript">{StreamExample}</CodeBlock>
 
-Or you can provide multiple messages, note that this takes the input and then submits a Llama2 formatted prompt to the model.
+Or you can provide multiple messages, note that this takes the input and then submits a Llama3 formatted prompt to the model.
 
 import StreamMultiExample from "@examples/models/chat/integration_llama_cpp_stream_multi.ts";
 

diff --git a/docs/core_docs/docs/integrations/llms/llama_cpp.mdx b/docs/core_docs/docs/integrations/llms/llama_cpp.mdx
@@ -26,40 +26,28 @@ import IntegrationInstallTooltip from "@mdx_components/integration_install_toolt
 npm install @langchain/community @langchain/core
 ```
 
-You will also need a local Llama 2 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example).
+You will also need a local Llama 3 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example).
 
 Out-of-the-box `node-llama-cpp` is tuned for running on a MacOS platform with support for the Metal GPU of Apple M-series of processors. If you need to turn this off or need support for the CUDA architecture then refer to the documentation at [node-llama-cpp](https://withcatai.github.io/node-llama-cpp/).
 
 A note to LangChain.js contributors: if you want to run the tests associated with this module you will need to put the path to your local model in the environment variable `LLAMA_PATH`.
 
-## Guide to installing Llama2
+## Guide to installing Llama3
 
-Getting a local Llama2 model running on your machine is a pre-req so this is a quick guide to getting and building Llama 7B (the smallest) and then quantizing it so that it will run comfortably on a laptop. To do this you will need `python3` on your machine (3.11 is recommended), also `gcc` and `make` so that `llama.cpp` can be built.
+Getting a local Llama3 model running on your machine is a pre-req so this is a quick guide to getting and building Llama 3.1-8B (the smallest) and then quantizing it so that it will run comfortably on a laptop. To do this you will need `python3` on your machine (3.11 is recommended), also `gcc` and `make` so that `llama.cpp` can be built.
 
-### Getting the Llama2 models
+### Getting the Llama3 models
 
-To get a copy of Llama2 you need to visit [Meta AI](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and request access to their models. Once Meta AI grant you access, you will receive an email containing a unique URL to access the files, this will be needed in the next steps.
+To get a copy of Llama3 you need to visit [Meta AI](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and request access to their models. Once Meta AI grant you access, you will receive an email containing a unique URL to access the files, this will be needed in the next steps.
 Now create a directory to work in, for example:
 
 ```
-mkdir llama2
-cd llama2
+mkdir llama3
+cd llama3
 ```
 
-Now we need to get the Meta AI `llama` repo in place so we can download the model.
-
-```
-git clone https://github.com/facebookresearch/llama.git
-```
-
-Once we have this in place we can change into this directory and run the downloader script to get the model we will be working with. Note: From here on its assumed that the model in use is `llama-2–7b`, if you select a different model don't forget to change the references to the model accordingly.
-
-```
-cd llama
-/bin/bash ./download.sh
-```
-
-This script will ask you for the URL that Meta AI sent to you (see above), you will also select the model to download, in this case we used `llama-2–7b`. Once this step has completed successfully (this can take some time, the `llama-2–7b` model is around 13.5Gb) there should be a new `llama-2–7b` directory containing the model and other files.
+Now we need to go to the Meta AI `llama-models` repo, which can be found [here](https://github.com/meta-llama/llama-models). In the repo, there are instructions to download the model of your choice, and you should use the unique URL that was received in your email.
+The rest of the tutorial assumes that you have downloaded `Llama3.1-8B`, but any model from here on out should work. Upon downloading the model, make sure to save the model download path, this will be used for later.
 
 ### Converting and quantizing the model
 
@@ -71,38 +59,42 @@ git clone https://github.com/ggerganov/llama.cpp.git
 cd llama.cpp
 ```
 
-Now we need to build the `llama.cpp` tools and set up our `python` environment. In these steps it's assumed that your install of python can be run using `python3` and that the virtual environment can be called `llama2`, adjust accordingly for your own situation.
+Now we need to build the `llama.cpp` tools and set up our `python` environment. In these steps it's assumed that your install of python can be run using `python3` and that the virtual environment can be called `llama3`, adjust accordingly for your own situation.
 
 ```
-make
-python3 -m venv llama2
-source llama2/bin/activate
+cmake -B build
+cmake --build build --config Release
+python3 -m venv llama3
+source llama3/bin/activate
 ```
 
-After activating your llama2 environment you should see `(llama2)` prefixing your command prompt to let you know this is the active environment. Note: if you need to come back to build another model or re-quantize the model don't forget to activate the environment again also if you update `llama.cpp` you will need to rebuild the tools and possibly install new or updated dependencies! Now that we have an active python environment, we need to install the python dependencies.
+After activating your llama3 environment you should see `(llama3)` prefixing your command prompt to let you know this is the active environment. Note: if you need to come back to build another model or re-quantize the model don't forget to activate the environment again also if you update `llama.cpp` you will need to rebuild the tools and possibly install new or updated dependencies! Now that we have an active python environment, we need to install the python dependencies.
 
 ```
 python3 -m pip install -r requirements.txt
 ```
 
-Having done this, we can start converting and quantizing the Llama2 model ready for use locally via `llama.cpp`.
-First, we need to convert the model, prior to the conversion let's create a directory to store it in.
+Having done this, we can start converting and quantizing the Llama3 model ready for use locally via `llama.cpp`. A conversion to a Hugging Face model is needed, followed by a conversion to a GGUF model.
+First, we need to locate the path with the following script `convert_llama_weights_to_hf.py`. Copy and paste this script into your current working directory. Note that using the script may need you to pip install extra dependencies, do so as needed.
+Then, we need to convert the model, prior to the conversion let's create directories to store our Hugging Face conversion and our final model.
 
 ```
-mkdir models/7B
-python3 convert.py --outfile models/7B/gguf-llama2-f16.bin --outtype f16 ../../llama2/llama/llama-2-7b --vocab-dir ../../llama2/llama/llama-2-7b
+mkdir models/8B
+mkdir models/8B-GGUF
+python3 convert_llama_weights_to_hf.py --model_size 8B --input_dir <dir-to-your-model> --output_dir models/8B --llama_version 3
+python3 convert_hf_to_gguf.py --outtype f16 --outfile models/8B-GGUF/gguf-llama3-f16.bin models/8B
 ```
 
-This should create a converted model called `gguf-llama2-f16.bin` in the directory we just created. Note that this is just a converted model so it is also around 13.5Gb in size, in the next step we will quantize it down to around 4Gb.
+This should create a converted Hugging Face model and the final GGUF model in the directories we have created. Note that this is just a converted model so it is also around 16Gb in size, in the next step we will quantize it down to around 4Gb.
 
 ```
-./quantize ./models/7B/gguf-llama2-f16.bin ./models/7B/gguf-llama2-q4_0.bin q4_0
+./build/bin/llama-quantize ./models/8B-GGUF/gguf-llama3-f16.bin ./models/8B-GGUF/gguf-llama3-Q4_0.bin Q4_0
 ```
 
-Running this should result in a new model being created in the `models\7B` directory, this one called `gguf-llama2-q4_0.bin`, this is the model we can use with langchain. You can validate this model is working by testing it using the `llama.cpp` tools.
+Running this should result in a new model being created in the `models\8B-GGUF` directory, this one called `gguf-llama3-Q4_0.bin`, this is the model we can use with langchain. You can validate this model is working by testing it using the `llama.cpp` tools.
 
 ```
-./main -m ./models/7B/gguf-llama2-q4_0.bin -n 1024 --repeat_penalty 1.0 --color -i -r "User:" -f ./prompts/chat-with-bob.txt
+./build/bin/llama-cli -m ./models/8B-GGUF/gguf-llama3-Q4_0.bin -cnv -p "You are a helpful assistant"
 ```
 
 Running this command fires up the model for a chat session. BTW if you are running out of disk space this small model is the only one we need, so you can backup and/or delete the original and converted 13.5Gb models.

diff --git a/docs/core_docs/docs/integrations/text_embedding/llama_cpp.mdx b/docs/core_docs/docs/integrations/text_embedding/llama_cpp.mdx
@@ -26,19 +26,19 @@ import IntegrationInstallTooltip from "@mdx_components/integration_install_toolt
 npm install @langchain/community @langchain/core
 ```
 
-You will also need a local Llama 2 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example).
+You will also need a local Llama 3 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example).
 
 Out-of-the-box `node-llama-cpp` is tuned for running on a MacOS platform with support for the Metal GPU of Apple M-series of processors. If you need to turn this off or need support for the CUDA architecture then refer to the documentation at [node-llama-cpp](https://withcatai.github.io/node-llama-cpp/).
 
-For advice on getting and preparing `llama2` see the documentation for the LLM version of this module.
+For advice on getting and preparing `llama3` see the documentation for the LLM version of this module.
 
 A note to LangChain.js contributors: if you want to run the tests associated with this module you will need to put the path to your local model in the environment variable `LLAMA_PATH`.
 
 ## Usage
 
 ### Basic use
 
-We need to provide a path to our local Llama2 model, also the `embeddings` property is always set to `true` in this module.
+We need to provide a path to our local Llama3 model, also the `embeddings` property is always set to `true` in this module.
 
 import CodeBlock from "@theme/CodeBlock";
 import BasicExample from "@examples/embeddings/llama_cpp_basic.ts";

diff --git a/examples/src/embeddings/llama_cpp_basic.ts b/examples/src/embeddings/llama_cpp_basic.ts
@@ -1,6 +1,6 @@
 import { LlamaCppEmbeddings } from "@langchain/community/embeddings/llama_cpp";
 
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
 
 const embeddings = await LlamaCppEmbeddings.initialize({
   modelPath: llamaPath,

diff --git a/examples/src/embeddings/llama_cpp_docs.ts b/examples/src/embeddings/llama_cpp_docs.ts
@@ -1,6 +1,6 @@
 import { LlamaCppEmbeddings } from "@langchain/community/embeddings/llama_cpp";
 
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
 
 const documents = ["Hello World!", "Bye Bye!"];
 

diff --git a/examples/src/models/chat/integration_llama_cpp.ts b/examples/src/models/chat/integration_llama_cpp.ts
@@ -1,7 +1,7 @@
 import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp";
 import { HumanMessage } from "@langchain/core/messages";
 
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
 
 const model = await ChatLlamaCpp.initialize({ modelPath: llamaPath });
 

diff --git a/examples/src/models/chat/integration_llama_cpp_chain.ts b/examples/src/models/chat/integration_llama_cpp_chain.ts
@@ -2,7 +2,7 @@ import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp";
 import { LLMChain } from "langchain/chains";
 import { PromptTemplate } from "@langchain/core/prompts";
 
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
 
 const model = await ChatLlamaCpp.initialize({
   modelPath: llamaPath,

diff --git a/examples/src/models/chat/integration_llama_cpp_stream.ts b/examples/src/models/chat/integration_llama_cpp_stream.ts
@@ -1,6 +1,6 @@
 import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp";
 
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
 
 const model = await ChatLlamaCpp.initialize({
   modelPath: llamaPath,

diff --git a/examples/src/models/chat/integration_llama_cpp_stream_invoke.ts b/examples/src/models/chat/integration_llama_cpp_stream_invoke.ts
@@ -1,7 +1,7 @@
 import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp";
 import { SystemMessage, HumanMessage } from "@langchain/core/messages";
 
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
 
 const model = await ChatLlamaCpp.initialize({
   modelPath: llamaPath,

diff --git a/examples/src/models/chat/integration_llama_cpp_stream_multi.ts b/examples/src/models/chat/integration_llama_cpp_stream_multi.ts
@@ -1,7 +1,7 @@
 import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp";
 import { SystemMessage, HumanMessage } from "@langchain/core/messages";
 
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
 
 const llamaCpp = await ChatLlamaCpp.initialize({
   modelPath: llamaPath,

diff --git a/examples/src/models/chat/integration_llama_cpp_system.ts b/examples/src/models/chat/integration_llama_cpp_system.ts
@@ -1,7 +1,7 @@
 import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp";
 import { SystemMessage, HumanMessage } from "@langchain/core/messages";
 
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
 
 const model = await ChatLlamaCpp.initialize({ modelPath: llamaPath });
 

diff --git a/examples/src/models/llm/llama_cpp.ts b/examples/src/models/llm/llama_cpp.ts
@@ -1,6 +1,6 @@
 import { LlamaCpp } from "@langchain/community/llms/llama_cpp";
 
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
 const question = "Where do Llamas come from?";
 
 const model = await LlamaCpp.initialize({ modelPath: llamaPath });

diff --git a/examples/src/models/llm/llama_cpp_stream.ts b/examples/src/models/llm/llama_cpp_stream.ts
@@ -1,6 +1,6 @@
 import { LlamaCpp } from "@langchain/community/llms/llama_cpp";
 
-const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin";
+const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin";
 
 const model = await LlamaCpp.initialize({
   modelPath: llamaPath,

diff --git a/libs/langchain-community/src/chat_models/llama_cpp.ts b/libs/langchain-community/src/chat_models/llama_cpp.ts
@@ -47,12 +47,12 @@ export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {
  *  To use this model you need to have the `node-llama-cpp` module installed.
  *  This can be installed using `npm install -S node-llama-cpp` and the minimum
  *  version supported in version 2.0.0.
- *  This also requires that have a locally built version of Llama2 installed.
+ *  This also requires that have a locally built version of Llama3 installed.
  * @example
  * ```typescript
  * // Initialize the ChatLlamaCpp model with the path to the model binary file.
  * const model = await ChatLlamaCpp.initialize({
- *   modelPath: "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin",
+ *   modelPath: "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin",
  *   temperature: 0.5,
  * });
  *

diff --git a/libs/langchain-community/src/llms/llama_cpp.ts b/libs/langchain-community/src/llms/llama_cpp.ts
@@ -42,7 +42,7 @@ export interface LlamaCppCallOptions extends BaseLLMCallOptions {
  *  To use this model you need to have the `node-llama-cpp` module installed.
  *  This can be installed using `npm install -S node-llama-cpp` and the minimum
  *  version supported in version 2.0.0.
- *  This also requires that have a locally built version of Llama2 installed.
+ *  This also requires that have a locally built version of Llama3 installed.
  */
 export class LlamaCpp extends LLM<LlamaCppCallOptions> {
   lc_serializable = true;