diff --git a/.devcontainer/cuda/devcontainer.json b/.devcontainer/cuda/devcontainer.json index be231245..ff0ce3aa 100644 --- a/.devcontainer/cuda/devcontainer.json +++ b/.devcontainer/cuda/devcontainer.json @@ -30,6 +30,6 @@ } }, "forwardPorts": [ - 8080 + 4242 ] } \ No newline at end of file diff --git a/.devcontainer/vllm/devcontainer.json b/.devcontainer/vllm/devcontainer.json index 1bb3bfb3..db03942c 100644 --- a/.devcontainer/vllm/devcontainer.json +++ b/.devcontainer/vllm/devcontainer.json @@ -30,6 +30,6 @@ } }, "forwardPorts": [ - 8080 + 4242 ] } \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index f6c4aa38..2fe42f98 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -15,7 +15,6 @@ }, "args": [ "--verbose", - "--port=8080", "--aicirt=${workspaceFolder}/target/release/aicirt", "--model=https://huggingface.co/TheBloke/Orca-2-13B-GGUF/blob/main/orca-2-13b.Q8_0.gguf", "--tokenizer=orca" diff --git a/aici.sh b/aici.sh index 525b7a14..3fb10e9d 100755 --- a/aici.sh +++ b/aici.sh @@ -1,7 +1,7 @@ #!/bin/sh if [ "X$AICI_API_BASE" = "X" ] ; then - export AICI_API_BASE="http://127.0.0.1:8080/v1/" + export AICI_API_BASE="http://127.0.0.1:4242/v1/" fi PYTHONPATH=`dirname $0` \ diff --git a/promptlib/notebooks/aici_server_example.ipynb b/promptlib/notebooks/aici_server_example.ipynb index a70d0c7f..65a70af0 100644 --- a/promptlib/notebooks/aici_server_example.ipynb +++ b/promptlib/notebooks/aici_server_example.ipynb @@ -720,7 +720,7 @@ } ], "source": [ - "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", + "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", "\n", "endpoint = PromptNode().set_endpoint(ep)\n", "\n", @@ -776,7 +776,7 @@ } ], "source": [ - "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", + "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", "\n", "endpoint = PromptNode().set_endpoint(ep)\n", "\n", @@ -814,7 +814,7 @@ } ], "source": [ - "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", + "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", "\n", "endpoint = PromptNode().set_endpoint(ep)\n", "\n", @@ -838,7 +838,7 @@ "metadata": {}, "outputs": [], "source": [ - "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", + "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", "\n", "endpoint = PromptNode().set_endpoint(ep)\n", "\n", @@ -877,7 +877,7 @@ } ], "source": [ - "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", + "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", "\n", "endpoint = PromptNode().set_endpoint(ep)\n", "\n", @@ -925,7 +925,7 @@ } ], "source": [ - "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", + "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", "\n", "endpoint = PromptNode().set_endpoint(ep)\n", "\n", @@ -956,13 +956,13 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/workspaces/aici/promptlib/notebooks/aici_server_example.ipynb Cell 16\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m ep \u001b[39m=\u001b[39m AICI(base_url\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mhttp://127.0.0.1:8080/v1/\u001b[39m\u001b[39m\"\u001b[39m, wasm_runner_path\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m/workspaces/aici/declvm/target/opt.wasm\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m endpoint \u001b[39m=\u001b[39m PromptNode()\u001b[39m.\u001b[39mset_endpoint(ep)\n\u001b[1;32m 5\u001b[0m pn \u001b[39m=\u001b[39m endpoint\u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m[INST] Please repeat the following list in order:\u001b[39m\u001b[39m\"\u001b[39m) \\\n\u001b[1;32m 6\u001b[0m \u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mApple\u001b[39m\u001b[39m\"\u001b[39m) \\\n\u001b[1;32m 7\u001b[0m \u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mCherries\u001b[39m\u001b[39m\"\u001b[39m, attrs\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mselected\u001b[39m\u001b[39m\"\u001b[39m]) \\\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39mOk now please repeat the list and say DONE when DONE:[/INST]\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m) \\\n\u001b[1;32m 12\u001b[0m \u001b[39m.\u001b[39mgen(max_tokens\u001b[39m=\u001b[39m\u001b[39m30\u001b[39m, stop_at\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mDONE\u001b[39m\u001b[39m\"\u001b[39m, ignore\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mselected\u001b[39m\u001b[39m\"\u001b[39m])\n", + "\u001b[1;32m/workspaces/aici/promptlib/notebooks/aici_server_example.ipynb Cell 16\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m ep \u001b[39m=\u001b[39m AICI(base_url\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mhttp://127.0.0.1:4242/v1/\u001b[39m\u001b[39m\"\u001b[39m, wasm_runner_path\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m/workspaces/aici/declvm/target/opt.wasm\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m endpoint \u001b[39m=\u001b[39m PromptNode()\u001b[39m.\u001b[39mset_endpoint(ep)\n\u001b[1;32m 5\u001b[0m pn \u001b[39m=\u001b[39m endpoint\u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m[INST] Please repeat the following list in order:\u001b[39m\u001b[39m\"\u001b[39m) \\\n\u001b[1;32m 6\u001b[0m \u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mApple\u001b[39m\u001b[39m\"\u001b[39m) \\\n\u001b[1;32m 7\u001b[0m \u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mCherries\u001b[39m\u001b[39m\"\u001b[39m, attrs\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mselected\u001b[39m\u001b[39m\"\u001b[39m]) \\\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39mOk now please repeat the list and say DONE when DONE:[/INST]\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m) \\\n\u001b[1;32m 12\u001b[0m \u001b[39m.\u001b[39mgen(max_tokens\u001b[39m=\u001b[39m\u001b[39m30\u001b[39m, stop_at\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mDONE\u001b[39m\u001b[39m\"\u001b[39m, ignore\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mselected\u001b[39m\u001b[39m\"\u001b[39m])\n", "\u001b[0;31mNameError\u001b[0m: name 'AICI' is not defined" ] } ], "source": [ - "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", + "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", "\n", "endpoint = PromptNode().set_endpoint(ep)\n", "\n", @@ -2125,7 +2125,7 @@ } ], "source": [ - "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", + "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", "\n", "\n", "endpoint = PromptNode().set_endpoint(ep)\n", @@ -2153,7 +2153,7 @@ "metadata": {}, "outputs": [], "source": [ - "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", + "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", "\n", "\n", "endpoint = PromptNode().set_endpoint(ep)\n", @@ -2237,7 +2237,7 @@ } ], "source": [ - "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", + "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n", "\n", "endpoint = PromptNode().set_endpoint(ep)\n", "\n", diff --git a/promptlib/notebooks/ignore-check.ipynb b/promptlib/notebooks/ignore-check.ipynb index da904e0a..364c3e14 100644 --- a/promptlib/notebooks/ignore-check.ipynb +++ b/promptlib/notebooks/ignore-check.ipynb @@ -48,7 +48,7 @@ } ], "source": [ - "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrl/target/opt.wasm\")\n", + "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrl/target/opt.wasm\")\n", "\n", "endpoint = PromptNode().set_endpoint(ep)\n", "\n", diff --git a/promptlib/notebooks/test_token_boundaries.ipynb b/promptlib/notebooks/test_token_boundaries.ipynb index e95f399b..379f2d48 100644 --- a/promptlib/notebooks/test_token_boundaries.ipynb +++ b/promptlib/notebooks/test_token_boundaries.ipynb @@ -43,7 +43,7 @@ "metadata": {}, "outputs": [], "source": [ - "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrl/target/opt.wasm\")\n", + "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrl/target/opt.wasm\")\n", "\n", "endpoint = PromptNode().set_endpoint(ep)\n", "\n", diff --git a/pyaici/rest.py b/pyaici/rest.py index 6af3ac50..ba497114 100644 --- a/pyaici/rest.py +++ b/pyaici/rest.py @@ -9,7 +9,7 @@ BASE_URL_ENV = "AICI_API_BASE" -base_url = os.environ.get(BASE_URL_ENV, "http://127.0.0.1:8080/v1/") +base_url = os.environ.get(BASE_URL_ENV, "http://127.0.0.1:4242/v1/") log_level = 1 ast_module = "" diff --git a/rllm-cuda/src/server/mod.rs b/rllm-cuda/src/server/mod.rs index b2b7bf13..366ee724 100644 --- a/rllm-cuda/src/server/mod.rs +++ b/rllm-cuda/src/server/mod.rs @@ -155,7 +155,7 @@ pub struct RllmCliArgs { pub dtype: String, /// Port to serve on (localhost:port) - #[arg(long, default_value_t = 8080, help_heading = "Server")] + #[arg(long, default_value_t = 4242, help_heading = "Server")] pub port: u16, /// Set verbose mode (print all requests) diff --git a/scripts/vllm-server.sh b/scripts/vllm-server.sh index 25fc4535..30441912 100755 --- a/scripts/vllm-server.sh +++ b/scripts/vllm-server.sh @@ -21,7 +21,7 @@ python3 harness/vllm_server.py \ --model $MODEL \ --aici-rtarg="--wasm-max-pre-step-time=10" \ --tokenizer hf-internal-testing/llama-tokenizer \ - --port 8080 --host 127.0.0.1 + --port 4242 --host 127.0.0.1 # --aici-rtarg="--wasm-max-step-time=50" \ # --aici-rtarg="--wasm-max-pre-step-time=2" \