Skip to content

Commit 1d90711

Browse files
authored
feat(core): Dynamically loading dbgpts (#1211)
1 parent 673ddaa commit 1d90711

File tree

15 files changed

+504
-33
lines changed

15 files changed

+504
-33
lines changed

dbgpt/app/component_configs.py

+2
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,15 @@ def initialize_components(
2121
):
2222
# Lazy import to avoid high time cost
2323
from dbgpt.app.initialization.embedding_component import _initialize_embedding_model
24+
from dbgpt.app.initialization.scheduler import DefaultScheduler
2425
from dbgpt.app.initialization.serve_initialization import register_serve_apps
2526
from dbgpt.model.cluster.controller.controller import controller
2627

2728
# Register global default executor factory first
2829
system_app.register(
2930
DefaultExecutorFactory, max_workers=param.default_thread_pool_size
3031
)
32+
system_app.register(DefaultScheduler)
3133
system_app.register_instance(controller)
3234

3335
from dbgpt.serve.agent.hub.controller import module_plugin

dbgpt/app/initialization/scheduler.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import logging
2+
import threading
3+
import time
4+
5+
import schedule
6+
7+
from dbgpt.component import BaseComponent, SystemApp
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
class DefaultScheduler(BaseComponent):
13+
"""The default scheduler"""
14+
15+
name = "dbgpt_default_scheduler"
16+
17+
def __init__(
18+
self,
19+
system_app: SystemApp,
20+
scheduler_delay_ms: int = 5000,
21+
scheduler_interval_ms: int = 1000,
22+
):
23+
super().__init__(system_app)
24+
self.system_app = system_app
25+
self._scheduler_interval_ms = scheduler_interval_ms
26+
self._scheduler_delay_ms = scheduler_delay_ms
27+
28+
def init_app(self, system_app: SystemApp):
29+
self.system_app = system_app
30+
31+
def after_start(self):
32+
thread = threading.Thread(target=self._scheduler)
33+
thread.start()
34+
35+
def _scheduler(self):
36+
time.sleep(self._scheduler_delay_ms / 1000)
37+
while True:
38+
try:
39+
schedule.run_pending()
40+
except Exception as e:
41+
logger.debug(f"Scheduler error: {e}")
42+
finally:
43+
time.sleep(self._scheduler_interval_ms / 1000)

dbgpt/cli/cli_scripts.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def stop_all():
163163
from dbgpt.util.dbgpts.cli import add_repo
164164
from dbgpt.util.dbgpts.cli import install as app_install
165165
from dbgpt.util.dbgpts.cli import list_all_apps as app_list
166-
from dbgpt.util.dbgpts.cli import list_repos, remove_repo
166+
from dbgpt.util.dbgpts.cli import list_repos, new_dbgpts, remove_repo
167167
from dbgpt.util.dbgpts.cli import uninstall as app_uninstall
168168
from dbgpt.util.dbgpts.cli import update_repo
169169

@@ -174,6 +174,7 @@ def stop_all():
174174
add_command_alias(app_install, name="install", parent_group=app)
175175
add_command_alias(app_uninstall, name="uninstall", parent_group=app)
176176
add_command_alias(app_list, name="list-remote", parent_group=app)
177+
add_command_alias(new_dbgpts, name="app", parent_group=new)
177178

178179
except ImportError as e:
179180
logging.warning(f"Integrating dbgpt dbgpts command line tool failed: {e}")

dbgpt/core/awel/dag/loader.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -63,19 +63,23 @@ def _process_file(filepath) -> List[DAG]:
6363
return results
6464

6565

66-
def _load_modules_from_file(filepath: str):
66+
def _load_modules_from_file(
67+
filepath: str, mod_name: str | None = None, show_log: bool = True
68+
):
6769
import importlib
6870
import importlib.machinery
6971
import importlib.util
7072

71-
logger.info(f"Importing {filepath}")
73+
if show_log:
74+
logger.info(f"Importing {filepath}")
7275

7376
org_mod_name, _ = os.path.splitext(os.path.split(filepath)[-1])
7477
path_hash = hashlib.sha1(filepath.encode("utf-8")).hexdigest()
75-
mod_name = f"unusual_prefix_{path_hash}_{org_mod_name}"
78+
if mod_name is None:
79+
mod_name = f"unusual_prefix_{path_hash}_{org_mod_name}"
7680

77-
if mod_name in sys.modules:
78-
del sys.modules[mod_name]
81+
if mod_name in sys.modules:
82+
del sys.modules[mod_name]
7983

8084
def parse(mod_name, filepath):
8185
try:

dbgpt/core/awel/trigger/http_trigger.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,7 @@ class RequestedParsedOperator(MapOperator[CommonLLMHttpRequestBody, str]):
10461046
"key",
10471047
str,
10481048
optional=True,
1049-
default="",
1049+
default="messages",
10501050
description="The key of the dict, link 'user_input'",
10511051
)
10521052
],

dbgpt/model/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def reduce(outs: List["WorkerApplyOutput"]) -> "WorkerApplyOutput":
5959
return WorkerApplyOutput("Not outputs")
6060
combined_success = all(out.success for out in outs)
6161
max_timecost = max(out.timecost for out in outs)
62-
combined_message = ", ".join(out.message for out in outs)
62+
combined_message = "\n;".join(out.message for out in outs)
6363
return WorkerApplyOutput(combined_message, combined_success, max_timecost)
6464

6565

dbgpt/model/cluster/worker/manager.py

+26-5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import random
77
import sys
88
import time
9+
import traceback
910
from concurrent.futures import ThreadPoolExecutor
1011
from dataclasses import asdict
1112
from typing import Awaitable, Callable, Iterator
@@ -490,6 +491,8 @@ async def _apply_worker(
490491
async def _start_all_worker(
491492
self, apply_req: WorkerApplyRequest
492493
) -> WorkerApplyOutput:
494+
from httpx import TimeoutException, TransportError
495+
493496
# TODO avoid start twice
494497
start_time = time.time()
495498
logger.info(f"Begin start all worker, apply_req: {apply_req}")
@@ -520,9 +523,24 @@ async def _start_worker(worker_run_data: WorkerRunData):
520523
)
521524
)
522525
out.message = f"{info} start successfully"
523-
except Exception as e:
526+
except TimeoutException as e:
527+
out.success = False
528+
out.message = (
529+
f"{info} start failed for network timeout, please make "
530+
f"sure your port is available, if you are using global network "
531+
f"proxy, please close it"
532+
)
533+
except TransportError as e:
524534
out.success = False
525-
out.message = f"{info} start failed, {str(e)}"
535+
out.message = (
536+
f"{info} start failed for network error, please make "
537+
f"sure your port is available, if you are using global network "
538+
"proxy, please close it"
539+
)
540+
except Exception:
541+
err_msg = traceback.format_exc()
542+
out.success = False
543+
out.message = f"{info} start failed, {err_msg}"
526544
finally:
527545
out.timecost = time.time() - _start_time
528546
return out
@@ -837,10 +855,13 @@ async def start_worker_manager():
837855
try:
838856
await worker_manager.start()
839857
except Exception as e:
840-
logger.error(f"Error starting worker manager: {e}")
841-
sys.exit(1)
858+
import signal
859+
860+
logger.error(f"Error starting worker manager: {str(e)}")
861+
os.kill(os.getpid(), signal.SIGINT)
842862

843-
# It cannot be blocked here because the startup of worker_manager depends on the fastapi app (registered to the controller)
863+
# It cannot be blocked here because the startup of worker_manager depends on
864+
# the fastapi app (registered to the controller)
844865
asyncio.create_task(start_worker_manager())
845866

846867
@app.on_event("shutdown")

dbgpt/serve/flow/config.py

+3
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,6 @@ class ServeConfig(BaseServeConfig):
2020
api_keys: Optional[str] = field(
2121
default=None, metadata={"help": "API keys for the endpoint, if None, allow all"}
2222
)
23+
load_dbgpts_interval: int = field(
24+
default=5, metadata={"help": "Interval to load dbgpts from installed packages"}
25+
)

dbgpt/serve/flow/service/service.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import traceback
44
from typing import Any, List, Optional, cast
55

6+
import schedule
67
from fastapi import HTTPException
78

89
from dbgpt.component import SystemApp
@@ -56,7 +57,10 @@ def init_app(self, system_app: SystemApp) -> None:
5657
self._dao = self._dao or ServeDao(self._serve_config)
5758
self._system_app = system_app
5859
self._dbgpts_loader = system_app.get_component(
59-
DBGPTsLoader.name, DBGPTsLoader, or_register_component=DBGPTsLoader
60+
DBGPTsLoader.name,
61+
DBGPTsLoader,
62+
or_register_component=DBGPTsLoader,
63+
load_dbgpts_interval=self._serve_config.load_dbgpts_interval,
6064
)
6165

6266
def before_start(self):
@@ -68,7 +72,10 @@ def before_start(self):
6872
def after_start(self):
6973
"""Execute after the application starts"""
7074
self.load_dag_from_db()
71-
self.load_dag_from_dbgpts()
75+
self.load_dag_from_dbgpts(is_first_load=True)
76+
schedule.every(self._serve_config.load_dbgpts_interval).seconds.do(
77+
self.load_dag_from_dbgpts
78+
)
7279

7380
@property
7481
def dao(self) -> BaseDao[ServeEntity, ServeRequest, ServerResponse]:
@@ -126,6 +133,7 @@ def create_and_save_dag(
126133
if save_failed_flow:
127134
request.state = State.LOAD_FAILED
128135
request.error_message = str(e)
136+
request.dag_id = ""
129137
return self.dao.create(request)
130138
else:
131139
raise e
@@ -147,6 +155,7 @@ def create_and_save_dag(
147155
if save_failed_flow:
148156
request.state = State.LOAD_FAILED
149157
request.error_message = f"Register DAG error: {str(e)}"
158+
request.dag_id = ""
150159
self.dao.update({"uid": request.uid}, request)
151160
else:
152161
# Rollback
@@ -198,7 +207,7 @@ def _pre_load_dag_from_dbgpts(self):
198207
f"dbgpts error: {str(e)}"
199208
)
200209

201-
def load_dag_from_dbgpts(self):
210+
def load_dag_from_dbgpts(self, is_first_load: bool = False):
202211
"""Load DAG from dbgpts"""
203212
flows = self.dbgpts_loader.get_flows()
204213
for flow in flows:
@@ -208,7 +217,7 @@ def load_dag_from_dbgpts(self):
208217
exist_inst = self.get({"name": flow.name})
209218
if not exist_inst:
210219
self.create_and_save_dag(flow, save_failed_flow=True)
211-
else:
220+
elif is_first_load or exist_inst.state != State.RUNNING:
212221
# TODO check version, must be greater than the exist one
213222
flow.uid = exist_inst.uid
214223
self.update_flow(flow, check_editable=False, save_failed_flow=True)
@@ -242,6 +251,7 @@ def update_flow(
242251
if save_failed_flow:
243252
request.state = State.LOAD_FAILED
244253
request.error_message = str(e)
254+
request.dag_id = ""
245255
return self.dao.update({"uid": request.uid}, request)
246256
else:
247257
raise e
@@ -306,12 +316,13 @@ def delete(self, uid: str) -> Optional[ServerResponse]:
306316
inst = self.get(query_request)
307317
if inst is None:
308318
raise HTTPException(status_code=404, detail=f"Flow {uid} not found")
309-
if not inst.dag_id:
319+
if inst.state == State.RUNNING and not inst.dag_id:
310320
raise HTTPException(
311-
status_code=404, detail=f"Flow {uid}'s dag id not found"
321+
status_code=404, detail=f"Running flow {uid}'s dag id not found"
312322
)
313323
try:
314-
self.dag_manager.unregister_dag(inst.dag_id)
324+
if inst.dag_id:
325+
self.dag_manager.unregister_dag(inst.dag_id)
315326
except Exception as e:
316327
logger.warning(f"Unregister DAG({inst.dag_id}) error: {str(e)}")
317328
self.dao.delete(query_request)

dbgpt/util/dbgpts/base.py

+7
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@
2323
INSTALL_METADATA_FILE = "install_metadata.toml"
2424
DBGPTS_METADATA_FILE = "dbgpts.toml"
2525

26+
TYPE_TO_PACKAGE = {
27+
"agent": "agents",
28+
"app": "apps",
29+
"operator": "operators",
30+
"flow": "workflow",
31+
}
32+
2633

2734
def _get_env_sig() -> str:
2835
"""Get a unique signature for the current Python environment."""

0 commit comments

Comments
 (0)