2222import traceback
2323from collections .abc import AsyncGenerator
2424from contextlib import asynccontextmanager
25- from multiprocessing import current_process
2625
2726import uvicorn
2827import zmq
2928from fastapi import FastAPI , HTTPException , Request
3029from fastapi .exceptions import RequestValidationError
3130from fastapi .responses import JSONResponse , Response , StreamingResponse
31+ from gunicorn .app .base import BaseApplication
3232from opentelemetry import trace
3333from prometheus_client import CONTENT_TYPE_LATEST
3434
8787llm_engine = None
8888
8989
90+ class StandaloneApplication (BaseApplication ):
91+ def __init__ (self , app , options = None ):
92+ self .application = app
93+ self .options = options or {}
94+ super ().__init__ ()
95+
96+ def load_config (self ):
97+ config = {key : value for key , value in self .options .items () if key in self .cfg .settings and value is not None }
98+ for key , value in config .items ():
99+ self .cfg .set (key .lower (), value )
100+
101+ def load (self ):
102+ return self .application
103+
104+
90105def load_engine ():
91106 """
92107 load engine
@@ -95,10 +110,10 @@ def load_engine():
95110 if llm_engine is not None :
96111 return llm_engine
97112
98- api_server_logger .info (f"FastDeploy LLM API server starting... { os .getpid ()} " )
113+ api_server_logger .info (f"FastDeploy LLM API server starting... { os .getpid ()} , port: { args . port } " )
99114 engine_args = EngineArgs .from_cli_args (args )
100115 engine = LLMEngine .from_engine_args (engine_args )
101- if not engine .start (api_server_pid = os . getpid () ):
116+ if not engine .start (api_server_pid = args . port ):
102117 api_server_logger .error ("Failed to initialize FastDeploy LLM engine, service exit now!" )
103118 return None
104119
@@ -119,12 +134,12 @@ def load_data_service():
119134 global llm_engine
120135 if llm_engine is not None :
121136 return llm_engine
122- api_server_logger .info (f"FastDeploy LLM API server starting... { os .getpid ()} " )
137+ api_server_logger .info (f"FastDeploy LLM API server starting... { os .getpid ()} , port: { args . port } " )
123138 engine_args = EngineArgs .from_cli_args (args )
124139 config = engine_args .create_engine_config ()
125140 api_server_logger .info (f"local_data_parallel_id: { config .parallel_config } " )
126141 expert_service = ExpertService (config , config .parallel_config .local_data_parallel_id )
127- if not expert_service .start (os . getpid () , config .parallel_config .local_data_parallel_id ):
142+ if not expert_service .start (args . port , config .parallel_config .local_data_parallel_id ):
128143 api_server_logger .error ("Failed to initialize FastDeploy LLM expert service, service exit now!" )
129144 return None
130145 llm_engine = expert_service
@@ -136,13 +151,22 @@ async def lifespan(app: FastAPI):
136151 """
137152 async context manager for FastAPI lifespan
138153 """
154+ import logging
155+
156+ uvicorn_access = logging .getLogger ("uvicorn.access" )
157+ uvicorn_access .handlers .clear ()
158+
159+ # 使用 gunicorn 的格式
160+ formatter = logging .Formatter ("[%(asctime)s] [%(process)d] [INFO] %(message)s" , datefmt = "%Y-%m-%d %H:%M:%S" )
161+
162+ handler = logging .StreamHandler ()
163+ handler .setFormatter (formatter )
164+ uvicorn_access .addHandler (handler )
165+ uvicorn_access .propagate = False
139166
140167 if args .tokenizer is None :
141168 args .tokenizer = args .model
142- if current_process ().name != "MainProcess" :
143- pid = os .getppid ()
144- else :
145- pid = os .getpid ()
169+ pid = args .port
146170 api_server_logger .info (f"{ pid } " )
147171
148172 if args .served_model_name is not None :
@@ -449,16 +473,17 @@ def launch_api_server() -> None:
449473 api_server_logger .info (f"args: { args .__dict__ } " )
450474 fd_start_span ("FD_START" )
451475
476+ options = {
477+ "bind" : f"{ args .host } :{ args .port } " ,
478+ "workers" : args .workers ,
479+ "worker_class" : "uvicorn.workers.UvicornWorker" ,
480+ "loglevel" : "info" ,
481+ "log_config" : UVICORN_CONFIG ,
482+ "timeout_graceful_shutdown" : args .timeout_graceful_shutdown ,
483+ }
484+
452485 try :
453- uvicorn .run (
454- app = "fastdeploy.entrypoints.openai.api_server:app" ,
455- host = args .host ,
456- port = args .port ,
457- workers = args .workers ,
458- log_config = UVICORN_CONFIG ,
459- log_level = "info" ,
460- timeout_graceful_shutdown = args .timeout_graceful_shutdown ,
461- ) # set log level to error to avoid log
486+ StandaloneApplication (app , options ).run ()
462487 except Exception as e :
463488 api_server_logger .error (f"launch sync http server error, { e } , { str (traceback .format_exc ())} " )
464489
0 commit comments