diff --git a/.devcontainer/cuda/devcontainer.json b/.devcontainer/cuda/devcontainer.json
index be231245..ff0ce3aa 100644
--- a/.devcontainer/cuda/devcontainer.json
+++ b/.devcontainer/cuda/devcontainer.json
@@ -30,6 +30,6 @@
}
},
"forwardPorts": [
- 8080
+ 4242
]
}
\ No newline at end of file
diff --git a/.devcontainer/vllm/devcontainer.json b/.devcontainer/vllm/devcontainer.json
index 1bb3bfb3..db03942c 100644
--- a/.devcontainer/vllm/devcontainer.json
+++ b/.devcontainer/vllm/devcontainer.json
@@ -30,6 +30,6 @@
}
},
"forwardPorts": [
- 8080
+ 4242
]
}
\ No newline at end of file
diff --git a/.vscode/launch.json b/.vscode/launch.json
index f6c4aa38..2fe42f98 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -15,7 +15,6 @@
},
"args": [
"--verbose",
- "--port=8080",
"--aicirt=${workspaceFolder}/target/release/aicirt",
"--model=https://huggingface.co/TheBloke/Orca-2-13B-GGUF/blob/main/orca-2-13b.Q8_0.gguf",
"--tokenizer=orca"
diff --git a/aici.sh b/aici.sh
index 525b7a14..3fb10e9d 100755
--- a/aici.sh
+++ b/aici.sh
@@ -1,7 +1,7 @@
#!/bin/sh
if [ "X$AICI_API_BASE" = "X" ] ; then
- export AICI_API_BASE="http://127.0.0.1:8080/v1/"
+ export AICI_API_BASE="http://127.0.0.1:4242/v1/"
fi
PYTHONPATH=`dirname $0` \
diff --git a/promptlib/notebooks/aici_server_example.ipynb b/promptlib/notebooks/aici_server_example.ipynb
index a70d0c7f..65a70af0 100644
--- a/promptlib/notebooks/aici_server_example.ipynb
+++ b/promptlib/notebooks/aici_server_example.ipynb
@@ -720,7 +720,7 @@
}
],
"source": [
- "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
+ "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
"\n",
"endpoint = PromptNode().set_endpoint(ep)\n",
"\n",
@@ -776,7 +776,7 @@
}
],
"source": [
- "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
+ "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
"\n",
"endpoint = PromptNode().set_endpoint(ep)\n",
"\n",
@@ -814,7 +814,7 @@
}
],
"source": [
- "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
+ "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
"\n",
"endpoint = PromptNode().set_endpoint(ep)\n",
"\n",
@@ -838,7 +838,7 @@
"metadata": {},
"outputs": [],
"source": [
- "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
+ "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
"\n",
"endpoint = PromptNode().set_endpoint(ep)\n",
"\n",
@@ -877,7 +877,7 @@
}
],
"source": [
- "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
+ "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
"\n",
"endpoint = PromptNode().set_endpoint(ep)\n",
"\n",
@@ -925,7 +925,7 @@
}
],
"source": [
- "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
+ "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
"\n",
"endpoint = PromptNode().set_endpoint(ep)\n",
"\n",
@@ -956,13 +956,13 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
- "\u001b[1;32m/workspaces/aici/promptlib/notebooks/aici_server_example.ipynb Cell 16\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m ep \u001b[39m=\u001b[39m AICI(base_url\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mhttp://127.0.0.1:8080/v1/\u001b[39m\u001b[39m\"\u001b[39m, wasm_runner_path\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m/workspaces/aici/declvm/target/opt.wasm\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m endpoint \u001b[39m=\u001b[39m PromptNode()\u001b[39m.\u001b[39mset_endpoint(ep)\n\u001b[1;32m 5\u001b[0m pn \u001b[39m=\u001b[39m endpoint\u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m[INST] Please repeat the following list in order:\u001b[39m\u001b[39m\"\u001b[39m) \\\n\u001b[1;32m 6\u001b[0m \u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mApple\u001b[39m\u001b[39m\"\u001b[39m) \\\n\u001b[1;32m 7\u001b[0m \u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mCherries\u001b[39m\u001b[39m\"\u001b[39m, attrs\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mselected\u001b[39m\u001b[39m\"\u001b[39m]) \\\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39mOk now please repeat the list and say DONE when DONE:[/INST]\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m) \\\n\u001b[1;32m 12\u001b[0m \u001b[39m.\u001b[39mgen(max_tokens\u001b[39m=\u001b[39m\u001b[39m30\u001b[39m, stop_at\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mDONE\u001b[39m\u001b[39m\"\u001b[39m, ignore\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mselected\u001b[39m\u001b[39m\"\u001b[39m])\n",
+ "\u001b[1;32m/workspaces/aici/promptlib/notebooks/aici_server_example.ipynb Cell 16\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m ep \u001b[39m=\u001b[39m AICI(base_url\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mhttp://127.0.0.1:4242/v1/\u001b[39m\u001b[39m\"\u001b[39m, wasm_runner_path\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m/workspaces/aici/declvm/target/opt.wasm\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m endpoint \u001b[39m=\u001b[39m PromptNode()\u001b[39m.\u001b[39mset_endpoint(ep)\n\u001b[1;32m 5\u001b[0m pn \u001b[39m=\u001b[39m endpoint\u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m[INST] Please repeat the following list in order:\u001b[39m\u001b[39m\"\u001b[39m) \\\n\u001b[1;32m 6\u001b[0m \u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mApple\u001b[39m\u001b[39m\"\u001b[39m) \\\n\u001b[1;32m 7\u001b[0m \u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39mCherries\u001b[39m\u001b[39m\"\u001b[39m, attrs\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mselected\u001b[39m\u001b[39m\"\u001b[39m]) \\\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39mOk now please repeat the list and say DONE when DONE:[/INST]\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m) \\\n\u001b[1;32m 12\u001b[0m \u001b[39m.\u001b[39mgen(max_tokens\u001b[39m=\u001b[39m\u001b[39m30\u001b[39m, stop_at\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mDONE\u001b[39m\u001b[39m\"\u001b[39m, ignore\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mselected\u001b[39m\u001b[39m\"\u001b[39m])\n",
"\u001b[0;31mNameError\u001b[0m: name 'AICI' is not defined"
]
}
],
"source": [
- "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
+ "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
"\n",
"endpoint = PromptNode().set_endpoint(ep)\n",
"\n",
@@ -2125,7 +2125,7 @@
}
],
"source": [
- "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
+ "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
"\n",
"\n",
"endpoint = PromptNode().set_endpoint(ep)\n",
@@ -2153,7 +2153,7 @@
"metadata": {},
"outputs": [],
"source": [
- "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
+ "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
"\n",
"\n",
"endpoint = PromptNode().set_endpoint(ep)\n",
@@ -2237,7 +2237,7 @@
}
],
"source": [
- "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
+ "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrlrlrl/target/opt.wasm\")\n",
"\n",
"endpoint = PromptNode().set_endpoint(ep)\n",
"\n",
diff --git a/promptlib/notebooks/ignore-check.ipynb b/promptlib/notebooks/ignore-check.ipynb
index da904e0a..364c3e14 100644
--- a/promptlib/notebooks/ignore-check.ipynb
+++ b/promptlib/notebooks/ignore-check.ipynb
@@ -48,7 +48,7 @@
}
],
"source": [
- "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrl/target/opt.wasm\")\n",
+ "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrl/target/opt.wasm\")\n",
"\n",
"endpoint = PromptNode().set_endpoint(ep)\n",
"\n",
diff --git a/promptlib/notebooks/test_token_boundaries.ipynb b/promptlib/notebooks/test_token_boundaries.ipynb
index e95f399b..379f2d48 100644
--- a/promptlib/notebooks/test_token_boundaries.ipynb
+++ b/promptlib/notebooks/test_token_boundaries.ipynb
@@ -43,7 +43,7 @@
"metadata": {},
"outputs": [],
"source": [
- "ep = AICI(base_url=\"http://127.0.0.1:8080/v1/\", wasm_runner_path=\"/workspaces/aici/declctrl/target/opt.wasm\")\n",
+ "ep = AICI(base_url=\"http://127.0.0.1:4242/v1/\", wasm_runner_path=\"/workspaces/aici/declctrl/target/opt.wasm\")\n",
"\n",
"endpoint = PromptNode().set_endpoint(ep)\n",
"\n",
diff --git a/pyaici/rest.py b/pyaici/rest.py
index 6af3ac50..ba497114 100644
--- a/pyaici/rest.py
+++ b/pyaici/rest.py
@@ -9,7 +9,7 @@
BASE_URL_ENV = "AICI_API_BASE"
-base_url = os.environ.get(BASE_URL_ENV, "http://127.0.0.1:8080/v1/")
+base_url = os.environ.get(BASE_URL_ENV, "http://127.0.0.1:4242/v1/")
log_level = 1
ast_module = ""
diff --git a/rllm-cuda/src/server/mod.rs b/rllm-cuda/src/server/mod.rs
index b2b7bf13..366ee724 100644
--- a/rllm-cuda/src/server/mod.rs
+++ b/rllm-cuda/src/server/mod.rs
@@ -155,7 +155,7 @@ pub struct RllmCliArgs {
pub dtype: String,
/// Port to serve on (localhost:port)
- #[arg(long, default_value_t = 8080, help_heading = "Server")]
+ #[arg(long, default_value_t = 4242, help_heading = "Server")]
pub port: u16,
/// Set verbose mode (print all requests)
diff --git a/scripts/vllm-server.sh b/scripts/vllm-server.sh
index 25fc4535..30441912 100755
--- a/scripts/vllm-server.sh
+++ b/scripts/vllm-server.sh
@@ -21,7 +21,7 @@ python3 harness/vllm_server.py \
--model $MODEL \
--aici-rtarg="--wasm-max-pre-step-time=10" \
--tokenizer hf-internal-testing/llama-tokenizer \
- --port 8080 --host 127.0.0.1
+ --port 4242 --host 127.0.0.1
# --aici-rtarg="--wasm-max-step-time=50" \
# --aici-rtarg="--wasm-max-pre-step-time=2" \