File tree Expand file tree Collapse file tree 5 files changed +6
-6
lines changed Expand file tree Collapse file tree 5 files changed +6
-6
lines changed Original file line number Diff line number Diff line change @@ -27,7 +27,7 @@ export ENABLE_V1_KVCACHE_SCHEDULER=1
2727python -m fastdeploy.entrypoints.openai.api_server \
2828 --model baidu/ERNIE-4.5-VL-424B-A47B-Paddle \
2929 --port 8180 --engine-worker-queue-port 8181 \
30- --cache-queue-port 8182 --metrics-port 8182 \
30+ --cache-queue-port 8183 --metrics-port 8182 \
3131 --tensor-parallel-size 8 \
3232 --quantization wint4 \
3333 --max-model-len 32768 \
Original file line number Diff line number Diff line change @@ -25,7 +25,7 @@ export ENABLE_V1_KVCACHE_SCHEDULER=1
2525python -m fastdeploy.entrypoints.openai.api_server \
2626 --model baidu/ERNIE-4.5-300B-A47B-Paddle \
2727 --port 8180 --engine-worker-queue-port 8181 \
28- --cache-queue-port 8182 --metrics-port 8182 \
28+ --cache-queue-port 8183 --metrics-port 8182 \
2929 --tensor-parallel-size 8 \
3030 --quantization wint4 \
3131 --max-model-len 32768 \
Original file line number Diff line number Diff line change @@ -27,7 +27,7 @@ export ENABLE_V1_KVCACHE_SCHEDULER=1
2727python -m fastdeploy.entrypoints.openai.api_server \
2828 --model baidu/ERNIE-4.5-VL-424B-A47B-Paddle \
2929 --port 8180 --engine-worker-queue-port 8181 \
30- --cache-queue-port 8182 --metrics-port 8182 \
30+ --cache-queue-port 8183 --metrics-port 8182 \
3131 --tensor-parallel-size 8 \
3232 --quantization wint4 \
3333 --max-model-len 32768 \
Original file line number Diff line number Diff line change @@ -25,7 +25,7 @@ export ENABLE_V1_KVCACHE_SCHEDULER=1
2525python -m fastdeploy.entrypoints.openai.api_server \
2626 --model baidu/ERNIE-4.5-300B-A47B-Paddle \
2727 --port 8180 --engine-worker-queue-port 8181 \
28- --cache-queue-port 8182 --metrics-port 8182 \
28+ --cache-queue-port 8183 --metrics-port 8182 \
2929 --tensor-parallel-size 8 \
3030 --quantization wint4 \
3131 --max-model-len 32768 \
Original file line number Diff line number Diff line change 1515python -m fastdeploy.entrypoints.openai.api_server \
1616 --model baidu/ERNIE-4.5-300B-A47B-Paddle \
1717 --port 8180 --engine-worker-queue-port 8181 \
18- --cache-queue-port 8182 --metrics-port 8182 \
18+ --cache-queue-port 8183 --metrics-port 8182 \
1919 --tensor-parallel-size 8 \
2020 --quantization wint8 \
2121 --max-model-len 32768 \
@@ -41,7 +41,7 @@ python -m fastdeploy.entrypoints.openai.api_server \
4141python -m fastdeploy.entrypoints.openai.api_server \
4242 --model baidu/ERNIE-4.5-300B-A47B-Paddle \
4343 --port 8180 --engine-worker-queue-port 8181 \
44- --cache-queue-port 8182 --metrics-port 8182 \
44+ --cache-queue-port 8183 --metrics-port 8182 \
4545 --tensor-parallel-size 8 \
4646 --quantization block_wise_fp8 \
4747 --max-model-len 32768 \
You can’t perform that action at this time.
0 commit comments