From 21f3b2da305cdaa596799028c13d16843ade4cc3 Mon Sep 17 00:00:00 2001 From: Jacky Chen Date: Sun, 29 Dec 2024 21:42:54 -0500 Subject: [PATCH] fix(docs): Fix outdated documentation for llama cpp (#7426) --- .../docs/integrations/chat/llama_cpp.mdx | 8 +-- .../docs/integrations/llms/llama_cpp.mdx | 60 ++++++++----------- .../integrations/text_embedding/llama_cpp.mdx | 6 +- examples/src/embeddings/llama_cpp_basic.ts | 2 +- examples/src/embeddings/llama_cpp_docs.ts | 2 +- .../src/models/chat/integration_llama_cpp.ts | 2 +- .../chat/integration_llama_cpp_chain.ts | 2 +- .../chat/integration_llama_cpp_stream.ts | 2 +- .../integration_llama_cpp_stream_invoke.ts | 2 +- .../integration_llama_cpp_stream_multi.ts | 2 +- .../chat/integration_llama_cpp_system.ts | 2 +- examples/src/models/llm/llama_cpp.ts | 2 +- examples/src/models/llm/llama_cpp_stream.ts | 2 +- .../src/chat_models/llama_cpp.ts | 4 +- .../langchain-community/src/llms/llama_cpp.ts | 2 +- 15 files changed, 46 insertions(+), 54 deletions(-) diff --git a/docs/core_docs/docs/integrations/chat/llama_cpp.mdx b/docs/core_docs/docs/integrations/chat/llama_cpp.mdx index dbff7089dece..5123b96be70e 100644 --- a/docs/core_docs/docs/integrations/chat/llama_cpp.mdx +++ b/docs/core_docs/docs/integrations/chat/llama_cpp.mdx @@ -22,11 +22,11 @@ import IntegrationInstallTooltip from "@mdx_components/integration_install_toolt npm install -S node-llama-cpp@3 @langchain/community @langchain/core ``` -You will also need a local Llama 2 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example). +You will also need a local Llama 3 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example). Out-of-the-box `node-llama-cpp` is tuned for running on a MacOS platform with support for the Metal GPU of Apple M-series of processors. If you need to turn this off or need support for the CUDA architecture then refer to the documentation at [node-llama-cpp](https://withcatai.github.io/node-llama-cpp/). -For advice on getting and preparing `llama2` see the documentation for the LLM version of this module. +For advice on getting and preparing `llama3` see the documentation for the LLM version of this module. A note to LangChain.js contributors: if you want to run the tests associated with this module you will need to put the path to your local model in the environment variable `LLAMA_PATH`. @@ -51,7 +51,7 @@ import SystemExample from "@examples/models/chat/integration_llama_cpp_system.ts ### Chains -This module can also be used with chains, note that using more complex chains will require suitably powerful version of `llama2` such as the 70B version. +This module can also be used with chains, note that using more complex chains will require suitably powerful version of `llama3` such as the 70B version. import ChainExample from "@examples/models/chat/integration_llama_cpp_chain.ts"; @@ -65,7 +65,7 @@ import StreamExample from "@examples/models/chat/integration_llama_cpp_stream.ts {StreamExample} -Or you can provide multiple messages, note that this takes the input and then submits a Llama2 formatted prompt to the model. +Or you can provide multiple messages, note that this takes the input and then submits a Llama3 formatted prompt to the model. import StreamMultiExample from "@examples/models/chat/integration_llama_cpp_stream_multi.ts"; diff --git a/docs/core_docs/docs/integrations/llms/llama_cpp.mdx b/docs/core_docs/docs/integrations/llms/llama_cpp.mdx index 508229ac13b6..0601edcbe0c4 100644 --- a/docs/core_docs/docs/integrations/llms/llama_cpp.mdx +++ b/docs/core_docs/docs/integrations/llms/llama_cpp.mdx @@ -26,40 +26,28 @@ import IntegrationInstallTooltip from "@mdx_components/integration_install_toolt npm install @langchain/community @langchain/core ``` -You will also need a local Llama 2 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example). +You will also need a local Llama 3 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example). Out-of-the-box `node-llama-cpp` is tuned for running on a MacOS platform with support for the Metal GPU of Apple M-series of processors. If you need to turn this off or need support for the CUDA architecture then refer to the documentation at [node-llama-cpp](https://withcatai.github.io/node-llama-cpp/). A note to LangChain.js contributors: if you want to run the tests associated with this module you will need to put the path to your local model in the environment variable `LLAMA_PATH`. -## Guide to installing Llama2 +## Guide to installing Llama3 -Getting a local Llama2 model running on your machine is a pre-req so this is a quick guide to getting and building Llama 7B (the smallest) and then quantizing it so that it will run comfortably on a laptop. To do this you will need `python3` on your machine (3.11 is recommended), also `gcc` and `make` so that `llama.cpp` can be built. +Getting a local Llama3 model running on your machine is a pre-req so this is a quick guide to getting and building Llama 3.1-8B (the smallest) and then quantizing it so that it will run comfortably on a laptop. To do this you will need `python3` on your machine (3.11 is recommended), also `gcc` and `make` so that `llama.cpp` can be built. -### Getting the Llama2 models +### Getting the Llama3 models -To get a copy of Llama2 you need to visit [Meta AI](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and request access to their models. Once Meta AI grant you access, you will receive an email containing a unique URL to access the files, this will be needed in the next steps. +To get a copy of Llama3 you need to visit [Meta AI](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and request access to their models. Once Meta AI grant you access, you will receive an email containing a unique URL to access the files, this will be needed in the next steps. Now create a directory to work in, for example: ``` -mkdir llama2 -cd llama2 +mkdir llama3 +cd llama3 ``` -Now we need to get the Meta AI `llama` repo in place so we can download the model. - -``` -git clone https://github.com/facebookresearch/llama.git -``` - -Once we have this in place we can change into this directory and run the downloader script to get the model we will be working with. Note: From here on its assumed that the model in use is `llama-2–7b`, if you select a different model don't forget to change the references to the model accordingly. - -``` -cd llama -/bin/bash ./download.sh -``` - -This script will ask you for the URL that Meta AI sent to you (see above), you will also select the model to download, in this case we used `llama-2–7b`. Once this step has completed successfully (this can take some time, the `llama-2–7b` model is around 13.5Gb) there should be a new `llama-2–7b` directory containing the model and other files. +Now we need to go to the Meta AI `llama-models` repo, which can be found [here](https://github.com/meta-llama/llama-models). In the repo, there are instructions to download the model of your choice, and you should use the unique URL that was received in your email. +The rest of the tutorial assumes that you have downloaded `Llama3.1-8B`, but any model from here on out should work. Upon downloading the model, make sure to save the model download path, this will be used for later. ### Converting and quantizing the model @@ -71,38 +59,42 @@ git clone https://github.com/ggerganov/llama.cpp.git cd llama.cpp ``` -Now we need to build the `llama.cpp` tools and set up our `python` environment. In these steps it's assumed that your install of python can be run using `python3` and that the virtual environment can be called `llama2`, adjust accordingly for your own situation. +Now we need to build the `llama.cpp` tools and set up our `python` environment. In these steps it's assumed that your install of python can be run using `python3` and that the virtual environment can be called `llama3`, adjust accordingly for your own situation. ``` -make -python3 -m venv llama2 -source llama2/bin/activate +cmake -B build +cmake --build build --config Release +python3 -m venv llama3 +source llama3/bin/activate ``` -After activating your llama2 environment you should see `(llama2)` prefixing your command prompt to let you know this is the active environment. Note: if you need to come back to build another model or re-quantize the model don't forget to activate the environment again also if you update `llama.cpp` you will need to rebuild the tools and possibly install new or updated dependencies! Now that we have an active python environment, we need to install the python dependencies. +After activating your llama3 environment you should see `(llama3)` prefixing your command prompt to let you know this is the active environment. Note: if you need to come back to build another model or re-quantize the model don't forget to activate the environment again also if you update `llama.cpp` you will need to rebuild the tools and possibly install new or updated dependencies! Now that we have an active python environment, we need to install the python dependencies. ``` python3 -m pip install -r requirements.txt ``` -Having done this, we can start converting and quantizing the Llama2 model ready for use locally via `llama.cpp`. -First, we need to convert the model, prior to the conversion let's create a directory to store it in. +Having done this, we can start converting and quantizing the Llama3 model ready for use locally via `llama.cpp`. A conversion to a Hugging Face model is needed, followed by a conversion to a GGUF model. +First, we need to locate the path with the following script `convert_llama_weights_to_hf.py`. Copy and paste this script into your current working directory. Note that using the script may need you to pip install extra dependencies, do so as needed. +Then, we need to convert the model, prior to the conversion let's create directories to store our Hugging Face conversion and our final model. ``` -mkdir models/7B -python3 convert.py --outfile models/7B/gguf-llama2-f16.bin --outtype f16 ../../llama2/llama/llama-2-7b --vocab-dir ../../llama2/llama/llama-2-7b +mkdir models/8B +mkdir models/8B-GGUF +python3 convert_llama_weights_to_hf.py --model_size 8B --input_dir --output_dir models/8B --llama_version 3 +python3 convert_hf_to_gguf.py --outtype f16 --outfile models/8B-GGUF/gguf-llama3-f16.bin models/8B ``` -This should create a converted model called `gguf-llama2-f16.bin` in the directory we just created. Note that this is just a converted model so it is also around 13.5Gb in size, in the next step we will quantize it down to around 4Gb. +This should create a converted Hugging Face model and the final GGUF model in the directories we have created. Note that this is just a converted model so it is also around 16Gb in size, in the next step we will quantize it down to around 4Gb. ``` -./quantize ./models/7B/gguf-llama2-f16.bin ./models/7B/gguf-llama2-q4_0.bin q4_0 +./build/bin/llama-quantize ./models/8B-GGUF/gguf-llama3-f16.bin ./models/8B-GGUF/gguf-llama3-Q4_0.bin Q4_0 ``` -Running this should result in a new model being created in the `models\7B` directory, this one called `gguf-llama2-q4_0.bin`, this is the model we can use with langchain. You can validate this model is working by testing it using the `llama.cpp` tools. +Running this should result in a new model being created in the `models\8B-GGUF` directory, this one called `gguf-llama3-Q4_0.bin`, this is the model we can use with langchain. You can validate this model is working by testing it using the `llama.cpp` tools. ``` -./main -m ./models/7B/gguf-llama2-q4_0.bin -n 1024 --repeat_penalty 1.0 --color -i -r "User:" -f ./prompts/chat-with-bob.txt +./build/bin/llama-cli -m ./models/8B-GGUF/gguf-llama3-Q4_0.bin -cnv -p "You are a helpful assistant" ``` Running this command fires up the model for a chat session. BTW if you are running out of disk space this small model is the only one we need, so you can backup and/or delete the original and converted 13.5Gb models. diff --git a/docs/core_docs/docs/integrations/text_embedding/llama_cpp.mdx b/docs/core_docs/docs/integrations/text_embedding/llama_cpp.mdx index 35ec34988a9c..3ed1f10dc272 100644 --- a/docs/core_docs/docs/integrations/text_embedding/llama_cpp.mdx +++ b/docs/core_docs/docs/integrations/text_embedding/llama_cpp.mdx @@ -26,11 +26,11 @@ import IntegrationInstallTooltip from "@mdx_components/integration_install_toolt npm install @langchain/community @langchain/core ``` -You will also need a local Llama 2 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example). +You will also need a local Llama 3 model (or a model supported by [node-llama-cpp](https://github.com/withcatai/node-llama-cpp)). You will need to pass the path to this model to the LlamaCpp module as a part of the parameters (see example). Out-of-the-box `node-llama-cpp` is tuned for running on a MacOS platform with support for the Metal GPU of Apple M-series of processors. If you need to turn this off or need support for the CUDA architecture then refer to the documentation at [node-llama-cpp](https://withcatai.github.io/node-llama-cpp/). -For advice on getting and preparing `llama2` see the documentation for the LLM version of this module. +For advice on getting and preparing `llama3` see the documentation for the LLM version of this module. A note to LangChain.js contributors: if you want to run the tests associated with this module you will need to put the path to your local model in the environment variable `LLAMA_PATH`. @@ -38,7 +38,7 @@ A note to LangChain.js contributors: if you want to run the tests associated wit ### Basic use -We need to provide a path to our local Llama2 model, also the `embeddings` property is always set to `true` in this module. +We need to provide a path to our local Llama3 model, also the `embeddings` property is always set to `true` in this module. import CodeBlock from "@theme/CodeBlock"; import BasicExample from "@examples/embeddings/llama_cpp_basic.ts"; diff --git a/examples/src/embeddings/llama_cpp_basic.ts b/examples/src/embeddings/llama_cpp_basic.ts index cf89ffd4262f..a26e6877d0d5 100644 --- a/examples/src/embeddings/llama_cpp_basic.ts +++ b/examples/src/embeddings/llama_cpp_basic.ts @@ -1,6 +1,6 @@ import { LlamaCppEmbeddings } from "@langchain/community/embeddings/llama_cpp"; -const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin"; +const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin"; const embeddings = await LlamaCppEmbeddings.initialize({ modelPath: llamaPath, diff --git a/examples/src/embeddings/llama_cpp_docs.ts b/examples/src/embeddings/llama_cpp_docs.ts index 19e9ee404abe..b416c4769857 100644 --- a/examples/src/embeddings/llama_cpp_docs.ts +++ b/examples/src/embeddings/llama_cpp_docs.ts @@ -1,6 +1,6 @@ import { LlamaCppEmbeddings } from "@langchain/community/embeddings/llama_cpp"; -const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin"; +const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin"; const documents = ["Hello World!", "Bye Bye!"]; diff --git a/examples/src/models/chat/integration_llama_cpp.ts b/examples/src/models/chat/integration_llama_cpp.ts index bdd2f7818c3c..18a112fa17e9 100644 --- a/examples/src/models/chat/integration_llama_cpp.ts +++ b/examples/src/models/chat/integration_llama_cpp.ts @@ -1,7 +1,7 @@ import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp"; import { HumanMessage } from "@langchain/core/messages"; -const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin"; +const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin"; const model = await ChatLlamaCpp.initialize({ modelPath: llamaPath }); diff --git a/examples/src/models/chat/integration_llama_cpp_chain.ts b/examples/src/models/chat/integration_llama_cpp_chain.ts index 3499929b7ef7..1a016ded6da6 100644 --- a/examples/src/models/chat/integration_llama_cpp_chain.ts +++ b/examples/src/models/chat/integration_llama_cpp_chain.ts @@ -2,7 +2,7 @@ import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp"; import { LLMChain } from "langchain/chains"; import { PromptTemplate } from "@langchain/core/prompts"; -const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin"; +const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin"; const model = await ChatLlamaCpp.initialize({ modelPath: llamaPath, diff --git a/examples/src/models/chat/integration_llama_cpp_stream.ts b/examples/src/models/chat/integration_llama_cpp_stream.ts index 33697fedd876..addd2dbf2cac 100644 --- a/examples/src/models/chat/integration_llama_cpp_stream.ts +++ b/examples/src/models/chat/integration_llama_cpp_stream.ts @@ -1,6 +1,6 @@ import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp"; -const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin"; +const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin"; const model = await ChatLlamaCpp.initialize({ modelPath: llamaPath, diff --git a/examples/src/models/chat/integration_llama_cpp_stream_invoke.ts b/examples/src/models/chat/integration_llama_cpp_stream_invoke.ts index f452b9764fd8..3a7d7217f7dd 100644 --- a/examples/src/models/chat/integration_llama_cpp_stream_invoke.ts +++ b/examples/src/models/chat/integration_llama_cpp_stream_invoke.ts @@ -1,7 +1,7 @@ import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp"; import { SystemMessage, HumanMessage } from "@langchain/core/messages"; -const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin"; +const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin"; const model = await ChatLlamaCpp.initialize({ modelPath: llamaPath, diff --git a/examples/src/models/chat/integration_llama_cpp_stream_multi.ts b/examples/src/models/chat/integration_llama_cpp_stream_multi.ts index 9d2d337d0284..d2fce6da9919 100644 --- a/examples/src/models/chat/integration_llama_cpp_stream_multi.ts +++ b/examples/src/models/chat/integration_llama_cpp_stream_multi.ts @@ -1,7 +1,7 @@ import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp"; import { SystemMessage, HumanMessage } from "@langchain/core/messages"; -const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin"; +const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin"; const llamaCpp = await ChatLlamaCpp.initialize({ modelPath: llamaPath, diff --git a/examples/src/models/chat/integration_llama_cpp_system.ts b/examples/src/models/chat/integration_llama_cpp_system.ts index ec53a8aac4b7..a97174941d95 100644 --- a/examples/src/models/chat/integration_llama_cpp_system.ts +++ b/examples/src/models/chat/integration_llama_cpp_system.ts @@ -1,7 +1,7 @@ import { ChatLlamaCpp } from "@langchain/community/chat_models/llama_cpp"; import { SystemMessage, HumanMessage } from "@langchain/core/messages"; -const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin"; +const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin"; const model = await ChatLlamaCpp.initialize({ modelPath: llamaPath }); diff --git a/examples/src/models/llm/llama_cpp.ts b/examples/src/models/llm/llama_cpp.ts index da7d8b487930..f7a7f696ff66 100644 --- a/examples/src/models/llm/llama_cpp.ts +++ b/examples/src/models/llm/llama_cpp.ts @@ -1,6 +1,6 @@ import { LlamaCpp } from "@langchain/community/llms/llama_cpp"; -const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin"; +const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin"; const question = "Where do Llamas come from?"; const model = await LlamaCpp.initialize({ modelPath: llamaPath }); diff --git a/examples/src/models/llm/llama_cpp_stream.ts b/examples/src/models/llm/llama_cpp_stream.ts index 022da280ff5d..c5465d3fd76d 100644 --- a/examples/src/models/llm/llama_cpp_stream.ts +++ b/examples/src/models/llm/llama_cpp_stream.ts @@ -1,6 +1,6 @@ import { LlamaCpp } from "@langchain/community/llms/llama_cpp"; -const llamaPath = "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin"; +const llamaPath = "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin"; const model = await LlamaCpp.initialize({ modelPath: llamaPath, diff --git a/libs/langchain-community/src/chat_models/llama_cpp.ts b/libs/langchain-community/src/chat_models/llama_cpp.ts index 960228c1bb29..1752652dadff 100644 --- a/libs/langchain-community/src/chat_models/llama_cpp.ts +++ b/libs/langchain-community/src/chat_models/llama_cpp.ts @@ -47,12 +47,12 @@ export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions { * To use this model you need to have the `node-llama-cpp` module installed. * This can be installed using `npm install -S node-llama-cpp` and the minimum * version supported in version 2.0.0. - * This also requires that have a locally built version of Llama2 installed. + * This also requires that have a locally built version of Llama3 installed. * @example * ```typescript * // Initialize the ChatLlamaCpp model with the path to the model binary file. * const model = await ChatLlamaCpp.initialize({ - * modelPath: "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin", + * modelPath: "/Replace/with/path/to/your/model/gguf-llama3-Q4_0.bin", * temperature: 0.5, * }); * diff --git a/libs/langchain-community/src/llms/llama_cpp.ts b/libs/langchain-community/src/llms/llama_cpp.ts index 24fcc529a864..0d344c605334 100644 --- a/libs/langchain-community/src/llms/llama_cpp.ts +++ b/libs/langchain-community/src/llms/llama_cpp.ts @@ -42,7 +42,7 @@ export interface LlamaCppCallOptions extends BaseLLMCallOptions { * To use this model you need to have the `node-llama-cpp` module installed. * This can be installed using `npm install -S node-llama-cpp` and the minimum * version supported in version 2.0.0. - * This also requires that have a locally built version of Llama2 installed. + * This also requires that have a locally built version of Llama3 installed. */ export class LlamaCpp extends LLM { lc_serializable = true;