diff --git a/.github/workflows/cpp-rllm.yml b/.github/workflows/cpp-rllm.yml
new file mode 100644
index 00000000..cf34abc3
--- /dev/null
+++ b/.github/workflows/cpp-rllm.yml
@@ -0,0 +1,24 @@
+name: rLLM with llama.cpp
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - uses: Swatinem/rust-cache@v2
+      with:
+        cache-on-failure: true
+    - name: Build cpp-rllm
+      run: cargo build --verbose --release --no-default-features
+      working-directory: cpp-rllm
diff --git a/Cargo.lock b/Cargo.lock
index 171888cb..205568a5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -850,6 +850,7 @@ name = "cpp-rllm"
 version = "0.1.0"
 dependencies = [
  "actix-web",
+ "llama_cpp_low",
  "rllm",
 ]
 
diff --git a/cpp-rllm/Cargo.toml b/cpp-rllm/Cargo.toml
index 4b0f3bd8..7da01129 100644
--- a/cpp-rllm/Cargo.toml
+++ b/cpp-rllm/Cargo.toml
@@ -5,9 +5,13 @@ edition = "2021"
 
 [dependencies]
 actix-web = "4.4.0"
+llama_cpp_low = { path = "../llama-cpp-low" }
 rllm = { path = "../rllm", default-features = false, features = ["llamacpp"] }
 
 [[bin]]
 name = "cpp-rllm"
 path = "src/cpp-rllm.rs"
 
+[features]
+default = ["cuda"]
+cuda = ["llama_cpp_low/cuda"]
diff --git a/cpp-rllm/server.sh b/cpp-rllm/cpp-server.sh
similarity index 92%
rename from cpp-rllm/server.sh
rename to cpp-rllm/cpp-server.sh
index 55fe490b..1af4124a 100755
--- a/cpp-rllm/server.sh
+++ b/cpp-rllm/cpp-server.sh
@@ -20,6 +20,11 @@ if [ "$1" = bench ] ; then
     shift
 fi
 
+if [ "$1" = cpu ] ; then
+    REL="--release --no-default-features"
+    shift
+fi
+
 case "$1" in
   orca )
     ARGS="-m https://huggingface.co/TheBloke/Orca-2-13B-GGUF/blob/main/orca-2-13b.Q8_0.gguf -t orca"
diff --git a/llama-cpp-low/Cargo.toml b/llama-cpp-low/Cargo.toml
index 93bf71c7..0cbf62f0 100644
--- a/llama-cpp-low/Cargo.toml
+++ b/llama-cpp-low/Cargo.toml
@@ -11,3 +11,7 @@ num_cpus = "1.16.0"
 [build-dependencies]
 bindgen = "0.69.2"
 cmake = "0.1.50"
+
+[features]
+default = []
+cuda = []
diff --git a/llama-cpp-low/build.rs b/llama-cpp-low/build.rs
index d856c0dd..56641136 100644
--- a/llama-cpp-low/build.rs
+++ b/llama-cpp-low/build.rs
@@ -5,7 +5,7 @@ const SUBMODULE_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/llama.cpp");
 
 fn main() {
     let ccache = true;
-    let cuda = true;
+    let cuda = std::env::var("CARGO_FEATURE_CUDA").unwrap_or(String::new());
 
     let submodule_dir = &PathBuf::from(SUBMODULE_DIR);
     let header_path = submodule_dir.join("llama.h");
@@ -29,7 +29,7 @@ fn main() {
             .configure_arg("-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache");
     }
 
-    if cuda {
+    if cuda == "1" {
         cmake.configure_arg("-DLLAMA_CUBLAS=ON");
         println!("cargo:rustc-link-search=/usr/local/cuda/lib64");
         println!("cargo:rustc-link-lib=cuda");
diff --git a/rllm/Cargo.toml b/rllm/Cargo.toml
index ee24d806..2cdbc570 100644
--- a/rllm/Cargo.toml
+++ b/rllm/Cargo.toml
@@ -53,5 +53,5 @@ rayon = "1.7.0"
 #default = ["llamacpp"]
 default = ["tch", "cuda"]
 tch = ["dep:tch", "dep:torch-sys"]
-cuda = ["tch", "dep:tch-cuda", "dep:cudarc"]
+cuda = ["tch", "dep:tch-cuda", "dep:cudarc", "llama_cpp_low?/cuda"]
 llamacpp = ["dep:llama_cpp_low"]