diff --git a/.github/workflows/cpp-rllm.yml b/.github/workflows/cpp-rllm.yml new file mode 100644 index 00000000..cf34abc3 --- /dev/null +++ b/.github/workflows/cpp-rllm.yml @@ -0,0 +1,24 @@ +name: rLLM with llama.cpp + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: Swatinem/rust-cache@v2 + with: + cache-on-failure: true + - name: Build cpp-rllm + run: cargo build --verbose --release --no-default-features + working-directory: cpp-rllm diff --git a/Cargo.lock b/Cargo.lock index 171888cb..205568a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -850,6 +850,7 @@ name = "cpp-rllm" version = "0.1.0" dependencies = [ "actix-web", + "llama_cpp_low", "rllm", ] diff --git a/cpp-rllm/Cargo.toml b/cpp-rllm/Cargo.toml index 4b0f3bd8..7da01129 100644 --- a/cpp-rllm/Cargo.toml +++ b/cpp-rllm/Cargo.toml @@ -5,9 +5,13 @@ edition = "2021" [dependencies] actix-web = "4.4.0" +llama_cpp_low = { path = "../llama-cpp-low" } rllm = { path = "../rllm", default-features = false, features = ["llamacpp"] } [[bin]] name = "cpp-rllm" path = "src/cpp-rllm.rs" +[features] +default = ["cuda"] +cuda = ["llama_cpp_low/cuda"] diff --git a/cpp-rllm/server.sh b/cpp-rllm/cpp-server.sh similarity index 92% rename from cpp-rllm/server.sh rename to cpp-rllm/cpp-server.sh index 55fe490b..1af4124a 100755 --- a/cpp-rllm/server.sh +++ b/cpp-rllm/cpp-server.sh @@ -20,6 +20,11 @@ if [ "$1" = bench ] ; then shift fi +if [ "$1" = cpu ] ; then + REL="--release --no-default-features" + shift +fi + case "$1" in orca ) ARGS="-m https://huggingface.co/TheBloke/Orca-2-13B-GGUF/blob/main/orca-2-13b.Q8_0.gguf -t orca" diff --git a/llama-cpp-low/Cargo.toml b/llama-cpp-low/Cargo.toml index 93bf71c7..0cbf62f0 100644 --- a/llama-cpp-low/Cargo.toml +++ b/llama-cpp-low/Cargo.toml @@ -11,3 +11,7 @@ num_cpus = "1.16.0" [build-dependencies] bindgen = "0.69.2" cmake = "0.1.50" + +[features] +default = [] +cuda = [] diff --git a/llama-cpp-low/build.rs b/llama-cpp-low/build.rs index d856c0dd..56641136 100644 --- a/llama-cpp-low/build.rs +++ b/llama-cpp-low/build.rs @@ -5,7 +5,7 @@ const SUBMODULE_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/llama.cpp"); fn main() { let ccache = true; - let cuda = true; + let cuda = std::env::var("CARGO_FEATURE_CUDA").unwrap_or(String::new()); let submodule_dir = &PathBuf::from(SUBMODULE_DIR); let header_path = submodule_dir.join("llama.h"); @@ -29,7 +29,7 @@ fn main() { .configure_arg("-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache"); } - if cuda { + if cuda == "1" { cmake.configure_arg("-DLLAMA_CUBLAS=ON"); println!("cargo:rustc-link-search=/usr/local/cuda/lib64"); println!("cargo:rustc-link-lib=cuda"); diff --git a/rllm/Cargo.toml b/rllm/Cargo.toml index ee24d806..2cdbc570 100644 --- a/rllm/Cargo.toml +++ b/rllm/Cargo.toml @@ -53,5 +53,5 @@ rayon = "1.7.0" #default = ["llamacpp"] default = ["tch", "cuda"] tch = ["dep:tch", "dep:torch-sys"] -cuda = ["tch", "dep:tch-cuda", "dep:cudarc"] +cuda = ["tch", "dep:tch-cuda", "dep:cudarc", "llama_cpp_low?/cuda"] llamacpp = ["dep:llama_cpp_low"]