diff --git a/README.md b/README.md
index 211145ed..db2da668 100644
--- a/README.md
+++ b/README.md
@@ -129,6 +129,20 @@ rustup target add wasm32-wasi
 rustup component add rustfmt
 ```
 
+### Running local server
+
+If you have CUDA, go to `rllm/` and run `./server.sh orca`.
+This will run the inference server with Orca-2 13B model (which is expected by testcases).
+
+If you don't have CUDA, go to `cpp-rllm/` and run `./cpp-server.sh phi2`.
+
+Both of these commands first compile aicirt and the inference engine,
+and then run it.
+You can also try other models, see README.md files for [rllm](rllm/README.md) and
+[cpp-rllm](cpp-rllm/README.md) as well as the shell scripts themselves for details.
+
+The command line 
+
 ### Interacting with server
 
 To get started interacting with a cloud AICI server first export the API key.
@@ -152,14 +166,6 @@ Run `./aici.sh -h` to see usage info.
 If the server is running with Orca-2 13B model,
 you can also run tests with `pytest` for the DeclCtrl, or with `./scripts/test-pyctrl.sh` for PyCtrl.
 
-### Running local server
-
-To run rLLM server, go to `rllm/` and run `./server.sh orca`.
-This will run the inference server with Orca-2 13B model (which is expected by testcases).
-If you don't have CUDA, go to `cpp-rllm/` and run `./cpp-server.sh phi2`.
-You can also try other models, see [rllm/README.md](rllm/README.md) and
-[cpp-rllm/README.md](cpp-rllm/README.md) for details.
-
 ## Security
 
 - `aicirt` runs in a separate process, and can run under a different user than the LLM engine
diff --git a/cpp-rllm/cpp-server.sh b/cpp-rllm/cpp-server.sh
index 68547b14..463e7d87 100755
--- a/cpp-rllm/cpp-server.sh
+++ b/cpp-rllm/cpp-server.sh
@@ -46,6 +46,7 @@ case "$1" in
     ;;
   * )
     echo "usage: $0 [--cuda] [--debug] [phi2|orca|build] [rllm_args...]"
+    echo "Try $0 phi2 --help to see available rllm_args"
     exit 1
     ;;
 esac
diff --git a/rllm/README.md b/rllm/README.md
index bc0b0dde..884933f1 100644
--- a/rllm/README.md
+++ b/rllm/README.md
@@ -1,7 +1,9 @@
 # rLLM
 
 This is a partial port of [vLLM](https://github.com/vllm-project/vllm)
-to Rust and [tch-rs](https://github.com/LaurentMazare/tch-rs).
+to Rust and [tch-rs](https://github.com/LaurentMazare/tch-rs)
+(bindings for [libtorch](https://github.com/pytorch/pytorch/blob/main/docs/libtorch.rst)
+which is basis of [PyTorch](https://github.com/pytorch/pytorch)).
 It is mostly meant as a proving ground for AICI (AI Controller Interface) integration.
 
 
diff --git a/rllm/server.sh b/rllm/server.sh
index 98a71328..8353412b 100755
--- a/rllm/server.sh
+++ b/rllm/server.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 
 set -e
-REL=
+REL=--release
 LOOP=
 BUILD=
 ADD_ARGS=
@@ -22,20 +22,13 @@ if [ "X$P" != "X" ] ; then
   kill $P
 fi
 
-if [ "$1" = loop ] ; then
-    REL=--release
+if [ "$1" = "--loop" ] ; then
     LOOP=1
     shift
 fi
 
-if [ "$1" = bench ] ; then
-    REL=--release
-    shift
-fi
-
-if [ "$1" = warm ] ; then
-    REL=--release
-    ADD_ARGS="--warmup-only"
+if [ "$1" = "--debug" ] ; then
+    REL=
     shift
 fi
 
@@ -64,7 +57,8 @@ case "$1" in
     REL=--release
     ;;
   * )
-    echo "try one of models: phi, phi2, 7b, code, code34" 
+    echo "usage: $0 [--loop] [--debug] [phi|phi2|7b|code|orca|build] [rllm_args...]"
+    echo "Try $0 phi2 --help to see available rllm_args"
     exit 1
     ;;
 esac
diff --git a/scripts/host.sh b/scripts/host.sh
index 34d74c16..d6143bd4 100755
--- a/scripts/host.sh
+++ b/scripts/host.sh
@@ -40,7 +40,7 @@ function docker_cmd() {
 
 if [ "$INNER" = 1 ] ; then
     echo "Running inner..."
-    docker_cmd "cd rllm && ./server.sh loop $MODEL"
+    docker_cmd "cd rllm && ./server.sh --loop $MODEL"
     exit 0
 fi