diff --git a/README.md b/README.md index 211145ed..db2da668 100644 --- a/README.md +++ b/README.md @@ -129,6 +129,20 @@ rustup target add wasm32-wasi rustup component add rustfmt ``` +### Running local server + +If you have CUDA, go to `rllm/` and run `./server.sh orca`. +This will run the inference server with Orca-2 13B model (which is expected by testcases). + +If you don't have CUDA, go to `cpp-rllm/` and run `./cpp-server.sh phi2`. + +Both of these commands first compile aicirt and the inference engine, +and then run it. +You can also try other models, see README.md files for [rllm](rllm/README.md) and +[cpp-rllm](cpp-rllm/README.md) as well as the shell scripts themselves for details. + +The command line + ### Interacting with server To get started interacting with a cloud AICI server first export the API key. @@ -152,14 +166,6 @@ Run `./aici.sh -h` to see usage info. If the server is running with Orca-2 13B model, you can also run tests with `pytest` for the DeclCtrl, or with `./scripts/test-pyctrl.sh` for PyCtrl. -### Running local server - -To run rLLM server, go to `rllm/` and run `./server.sh orca`. -This will run the inference server with Orca-2 13B model (which is expected by testcases). -If you don't have CUDA, go to `cpp-rllm/` and run `./cpp-server.sh phi2`. -You can also try other models, see [rllm/README.md](rllm/README.md) and -[cpp-rllm/README.md](cpp-rllm/README.md) for details. - ## Security - `aicirt` runs in a separate process, and can run under a different user than the LLM engine diff --git a/cpp-rllm/cpp-server.sh b/cpp-rllm/cpp-server.sh index 68547b14..463e7d87 100755 --- a/cpp-rllm/cpp-server.sh +++ b/cpp-rllm/cpp-server.sh @@ -46,6 +46,7 @@ case "$1" in ;; * ) echo "usage: $0 [--cuda] [--debug] [phi2|orca|build] [rllm_args...]" + echo "Try $0 phi2 --help to see available rllm_args" exit 1 ;; esac diff --git a/rllm/README.md b/rllm/README.md index bc0b0dde..884933f1 100644 --- a/rllm/README.md +++ b/rllm/README.md @@ -1,7 +1,9 @@ # rLLM This is a partial port of [vLLM](https://github.com/vllm-project/vllm) -to Rust and [tch-rs](https://github.com/LaurentMazare/tch-rs). +to Rust and [tch-rs](https://github.com/LaurentMazare/tch-rs) +(bindings for [libtorch](https://github.com/pytorch/pytorch/blob/main/docs/libtorch.rst) +which is basis of [PyTorch](https://github.com/pytorch/pytorch)). It is mostly meant as a proving ground for AICI (AI Controller Interface) integration. diff --git a/rllm/server.sh b/rllm/server.sh index 98a71328..8353412b 100755 --- a/rllm/server.sh +++ b/rllm/server.sh @@ -1,7 +1,7 @@ #!/bin/sh set -e -REL= +REL=--release LOOP= BUILD= ADD_ARGS= @@ -22,20 +22,13 @@ if [ "X$P" != "X" ] ; then kill $P fi -if [ "$1" = loop ] ; then - REL=--release +if [ "$1" = "--loop" ] ; then LOOP=1 shift fi -if [ "$1" = bench ] ; then - REL=--release - shift -fi - -if [ "$1" = warm ] ; then - REL=--release - ADD_ARGS="--warmup-only" +if [ "$1" = "--debug" ] ; then + REL= shift fi @@ -64,7 +57,8 @@ case "$1" in REL=--release ;; * ) - echo "try one of models: phi, phi2, 7b, code, code34" + echo "usage: $0 [--loop] [--debug] [phi|phi2|7b|code|orca|build] [rllm_args...]" + echo "Try $0 phi2 --help to see available rllm_args" exit 1 ;; esac diff --git a/scripts/host.sh b/scripts/host.sh index 34d74c16..d6143bd4 100755 --- a/scripts/host.sh +++ b/scripts/host.sh @@ -40,7 +40,7 @@ function docker_cmd() { if [ "$INNER" = 1 ] ; then echo "Running inner..." - docker_cmd "cd rllm && ./server.sh loop $MODEL" + docker_cmd "cd rllm && ./server.sh --loop $MODEL" exit 0 fi