Skip to content

Commit d8841b7

Browse files
ltd0924ltd0924
andauthored
[BugFix] fix workers=1 (#4364)
* [Feature] support prefix cache in DP * fix * Update common_engine.py * Update common_engine.py * Update common_engine.py * Update common_engine.py * [BugFix] fix workers more than 1 * fix * Update api_server.py * fix * Update api_server.py * fix --------- Co-authored-by: ltd0924 <[email protected]>
1 parent bcaa98f commit d8841b7

File tree

5 files changed

+47
-18
lines changed

5 files changed

+47
-18
lines changed

fastdeploy/entrypoints/openai/api_server.py

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@
2222
import traceback
2323
from collections.abc import AsyncGenerator
2424
from contextlib import asynccontextmanager
25-
from multiprocessing import current_process
2625

2726
import uvicorn
2827
import zmq
2928
from fastapi import FastAPI, HTTPException, Request
3029
from fastapi.exceptions import RequestValidationError
3130
from fastapi.responses import JSONResponse, Response, StreamingResponse
31+
from gunicorn.app.base import BaseApplication
3232
from opentelemetry import trace
3333
from prometheus_client import CONTENT_TYPE_LATEST
3434

@@ -87,6 +87,21 @@
8787
llm_engine = None
8888

8989

90+
class StandaloneApplication(BaseApplication):
91+
def __init__(self, app, options=None):
92+
self.application = app
93+
self.options = options or {}
94+
super().__init__()
95+
96+
def load_config(self):
97+
config = {key: value for key, value in self.options.items() if key in self.cfg.settings and value is not None}
98+
for key, value in config.items():
99+
self.cfg.set(key.lower(), value)
100+
101+
def load(self):
102+
return self.application
103+
104+
90105
def load_engine():
91106
"""
92107
load engine
@@ -95,10 +110,10 @@ def load_engine():
95110
if llm_engine is not None:
96111
return llm_engine
97112

98-
api_server_logger.info(f"FastDeploy LLM API server starting... {os.getpid()}")
113+
api_server_logger.info(f"FastDeploy LLM API server starting... {os.getpid()}, port: {args.port}")
99114
engine_args = EngineArgs.from_cli_args(args)
100115
engine = LLMEngine.from_engine_args(engine_args)
101-
if not engine.start(api_server_pid=os.getpid()):
116+
if not engine.start(api_server_pid=args.port):
102117
api_server_logger.error("Failed to initialize FastDeploy LLM engine, service exit now!")
103118
return None
104119

@@ -119,12 +134,12 @@ def load_data_service():
119134
global llm_engine
120135
if llm_engine is not None:
121136
return llm_engine
122-
api_server_logger.info(f"FastDeploy LLM API server starting... {os.getpid()}")
137+
api_server_logger.info(f"FastDeploy LLM API server starting... {os.getpid()}, port: {args.port}")
123138
engine_args = EngineArgs.from_cli_args(args)
124139
config = engine_args.create_engine_config()
125140
api_server_logger.info(f"local_data_parallel_id: {config.parallel_config}")
126141
expert_service = ExpertService(config, config.parallel_config.local_data_parallel_id)
127-
if not expert_service.start(os.getpid(), config.parallel_config.local_data_parallel_id):
142+
if not expert_service.start(args.port, config.parallel_config.local_data_parallel_id):
128143
api_server_logger.error("Failed to initialize FastDeploy LLM expert service, service exit now!")
129144
return None
130145
llm_engine = expert_service
@@ -136,13 +151,22 @@ async def lifespan(app: FastAPI):
136151
"""
137152
async context manager for FastAPI lifespan
138153
"""
154+
import logging
155+
156+
uvicorn_access = logging.getLogger("uvicorn.access")
157+
uvicorn_access.handlers.clear()
158+
159+
# 使用 gunicorn 的格式
160+
formatter = logging.Formatter("[%(asctime)s] [%(process)d] [INFO] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
161+
162+
handler = logging.StreamHandler()
163+
handler.setFormatter(formatter)
164+
uvicorn_access.addHandler(handler)
165+
uvicorn_access.propagate = False
139166

140167
if args.tokenizer is None:
141168
args.tokenizer = args.model
142-
if current_process().name != "MainProcess":
143-
pid = os.getppid()
144-
else:
145-
pid = os.getpid()
169+
pid = args.port
146170
api_server_logger.info(f"{pid}")
147171

148172
if args.served_model_name is not None:
@@ -449,16 +473,17 @@ def launch_api_server() -> None:
449473
api_server_logger.info(f"args: {args.__dict__}")
450474
fd_start_span("FD_START")
451475

476+
options = {
477+
"bind": f"{args.host}:{args.port}",
478+
"workers": args.workers,
479+
"worker_class": "uvicorn.workers.UvicornWorker",
480+
"loglevel": "info",
481+
"log_config": UVICORN_CONFIG,
482+
"timeout_graceful_shutdown": args.timeout_graceful_shutdown,
483+
}
484+
452485
try:
453-
uvicorn.run(
454-
app="fastdeploy.entrypoints.openai.api_server:app",
455-
host=args.host,
456-
port=args.port,
457-
workers=args.workers,
458-
log_config=UVICORN_CONFIG,
459-
log_level="info",
460-
timeout_graceful_shutdown=args.timeout_graceful_shutdown,
461-
) # set log level to error to avoid log
486+
StandaloneApplication(app, options).run()
462487
except Exception as e:
463488
api_server_logger.error(f"launch sync http server error, {e}, {str(traceback.format_exc())}")
464489

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ use-triton-in-paddle
3030
crcmod
3131
fastsafetensors==0.1.14
3232
msgpack
33+
gunicorn
3334
modelscope
3435
opentelemetry-api>=1.24.0
3536
opentelemetry-sdk>=1.24.0

requirements_dcu.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use-triton-in-paddle
2828
crcmod
2929
fastsafetensors==0.1.14
3030
msgpack
31+
gunicorn
3132
opentelemetry-api>=1.24.0
3233
opentelemetry-sdk>=1.24.0
3334
opentelemetry-instrumentation-redis

requirements_iluvatar.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ use-triton-in-paddle
2929
crcmod
3030
fastsafetensors==0.1.14
3131
msgpack
32+
gunicorn
3233
opentelemetry-api>=1.24.0
3334
opentelemetry-sdk>=1.24.0
3435
opentelemetry-instrumentation-redis

requirements_metaxgpu.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ use-triton-in-paddle
3030
crcmod
3131
fastsafetensors==0.1.14
3232
msgpack
33+
gunicorn
3334
modelscope
3435
opentelemetry-api>=1.24.0
3536
opentelemetry-sdk>=1.24.0

0 commit comments

Comments
 (0)