diff --git a/crates/llm-chain-llama-sys/Cargo.toml b/crates/llm-chain-llama-sys/Cargo.toml index d3665e7d..81fc2fcf 100644 --- a/crates/llm-chain-llama-sys/Cargo.toml +++ b/crates/llm-chain-llama-sys/Cargo.toml @@ -18,6 +18,3 @@ readme = "README.md" [build-dependencies] bindgen = "0.66" - -[features] -cuda = [] diff --git a/crates/llm-chain-llama-sys/build.rs b/crates/llm-chain-llama-sys/build.rs index 3c7b898c..5eb72606 100644 --- a/crates/llm-chain-llama-sys/build.rs +++ b/crates/llm-chain-llama-sys/build.rs @@ -23,7 +23,7 @@ fn main() { println!("cargo:rerun-if-changed=wrapper.h"); // Check if CUDA is enabled for cuBlAS - let cuda_enabled = env::var("CARGO_FEATURE_CUDA").is_ok(); + let cuda_enabled = env::var("LLM_CHAIN_CUDA").is_ok(); if env::var("LLAMA_DONT_GENERATE_BINDINGS").is_ok() { let _: u64 = std::fs::copy( @@ -99,8 +99,22 @@ fn main() { .arg("-DLLAMA_METAL=OFF"); // .arg("-DLLAMA_STATIC=ON") if cuda_enabled { - // If CUDA feature is enabled, build with cuBlAS to enable GPU acceleration + // If CUDA is enabled, build with cuBlAS to enable GPU acceleration + if let Ok(cuda_lib_path) = env::var("LLM_CHAIN_CUDA_LIB_PATH") { + println!( + "{}", + format!("cargo:rustc-link-search=native={}", cuda_lib_path) + ); + } else { + panic!("CUDA_FEATURE_CUDA_LIB_PATH is not set. Please set it to the library path of your CUDA installation."); + } code.arg("-DLLAMA_CUBLAS=ON"); + code.arg("-DCMAKE_CUDA_FLAGS=-Xcompiler=-fPIC"); + println!("cargo:rustc-link-lib=cuda"); + println!("cargo:rustc-link-lib=cublas"); + println!("cargo:rustc-link-lib=culibos"); + println!("cargo:rustc-link-lib=cudart"); + println!("cargo:rustc-link-lib=cublasLt"); } let code = code.status().expect("Failed to generate build script"); if code.code() != Some(0) { diff --git a/crates/llm-chain-llama/README.md b/crates/llm-chain-llama/README.md index 4137c04b..3e1e62d2 100644 --- a/crates/llm-chain-llama/README.md +++ b/crates/llm-chain-llama/README.md @@ -14,3 +14,25 @@ LLM-Chain-LLaMa is packed with all the features you need to harness the full pot - Prompts for working with `instruct` models, empowering you to easily build virtual assistants amazing applications 🧙‍♂️ So gear up and dive into the fantastic world of LLM-Chain-LLaMa! Let the power of LLaMa-style models propel your projects to the next level. Happy coding, and enjoy the ride! 🎉🥳 + + +## CUDA Support +This requires the [CUDA toolkit] to be installed on the system. CUDA support can +then be enabled by setting the following environment variables: +* LLM_CHAIN_CUDA +This should be set to `true` to enable CUDA support. + +* LLM_CHAIN_CUDA_LIB_PATH +This should be set to the path of the CUDA library directory. For example, on +Fedora, this could be `/usr/local/cuda-12.2/lib64`. + + +Example of building with CUDA support: +```console +$ env LLM_CHAIN_CUDA_LIB_PATH=/usr/local/cuda-12.2/lib64 LLM_CHAIN_CUDA=true cargo b -vv +``` +Using `-vv` will enable the output from llama.cpp build process to be displayed +which can be useful for debugging build issues. + +[CUDA toolkit]: https://developer.nvidia.com/cuda-downloads +```