diff --git a/cpp-rllm/Cargo.toml b/cpp-rllm/Cargo.toml index 7da01129..a9708a45 100644 --- a/cpp-rllm/Cargo.toml +++ b/cpp-rllm/Cargo.toml @@ -13,5 +13,5 @@ name = "cpp-rllm" path = "src/cpp-rllm.rs" [features] -default = ["cuda"] +default = [] cuda = ["llama_cpp_low/cuda"] diff --git a/cpp-rllm/cpp-server.sh b/cpp-rllm/cpp-server.sh index 1af4124a..6d73343e 100755 --- a/cpp-rllm/cpp-server.sh +++ b/cpp-rllm/cpp-server.sh @@ -15,17 +15,27 @@ if [ "X$P" != "X" ] ; then kill $P fi -if [ "$1" = bench ] ; then +VER="--no-default-features" + +if [ "$1" = gpu ] ; then REL=--release + VER="$VER --features cuda" shift -fi - -if [ "$1" = cpu ] ; then - REL="--release --no-default-features" +elif [ "$1" = cpu ] ; then + REL=--release shift +elif [ "$1" = debug ] ; then + REL= + shift +else + echo "usage: $0 [gpu|cpu|debug] [phi2|orca|build]" + exit 1 fi case "$1" in + phi2 ) + ARGS="-m https://huggingface.co/TheBloke/phi-2-GGUF/blob/main/phi-2.Q8_0.gguf -t phi" + ;; orca ) ARGS="-m https://huggingface.co/TheBloke/Orca-2-13B-GGUF/blob/main/orca-2-13b.Q8_0.gguf -t orca" ;; @@ -34,7 +44,7 @@ case "$1" in REL=--release ;; * ) - echo "try one of models: phi, phi2, 7b, code, code34" + echo "try one of models: phi2, orca" exit 1 ;; esac @@ -44,7 +54,7 @@ ARGS="--verbose --port 8080 --aicirt $BIN/release/aicirt $ARGS $ADD_ARGS" (cd ../aicirt; cargo build --release) -cargo build $REL +cargo build $REL $VER if [ "$BUILD" = "1" ] ; then exit