Update model_server (#164)

* Update model server * Delete model_server/.vscode/settings.json * Update loader.py * Fix errors * Update log mode
katanemo · Oct 10, 2024 · 3b7c586 · 3b7c586
1 parent b8d2756
commit 3b7c586
Show file tree

Hide file tree

Showing 24 changed files with 492 additions and 1,801 deletions.
diff --git a/model_server/app/__init__.py b/model_server/app/__init__.py
@@ -1,11 +1,11 @@
 import sys
-import subprocess
 import os
-import signal
 import time
 import requests
 import psutil
 import tempfile
+import subprocess
+
 
 # Path to the file where the server process ID will be stored
 PID_FILE = os.path.join(tempfile.gettempdir(), "model_server.pid")
@@ -36,7 +36,7 @@ def start_server():
         sys.exit(1)
 
     print(
-        f"Starting Archgw Model Server - Loading some awesomeness, this may take a little time.)"
+        "Starting Archgw Model Server - Loading some awesomeness, this may take a little time.)"
     )
     process = subprocess.Popen(
         ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "51000"],
@@ -49,10 +49,10 @@ def start_server():
         # Write the process ID to the PID file
         with open(PID_FILE, "w") as f:
             f.write(str(process.pid))
-        print(f"ARCH GW Model Server started with PID {process.pid}")
+        print(f"Archgw Model Server started with PID {process.pid}")
     else:
         # Add model_server boot-up logs
-        print(f"ARCH GW Model Server - Didn't Sart In Time. Shutting Down")
+        print("Archgw Model Server - Didn't Sart In Time. Shutting Down")
         process.terminate()
 
 
@@ -66,7 +66,7 @@ def wait_for_health_check(url, timeout=180):
                 return True
         except requests.ConnectionError:
             time.sleep(1)
-    print("Timed out waiting for ARCH GW Model Server to respond.")
+    print("Timed out waiting for Archgw Model Server to respond.")
     return False
 
 

diff --git a/model_server/app/arch_fc/bolt_handler.py b/model_server/app/arch_fc/bolt_handler.py
diff --git a/model_server/app/arch_fc/common.py b/model_server/app/arch_fc/common.py
diff --git a/model_server/app/arch_fc/logger.yaml b/model_server/app/arch_fc/logger.yaml
diff --git a/model_server/app/arch_fc/test_arch_fc.py b/model_server/app/arch_fc/test_arch_fc.py
diff --git a/model_server/app/arch_fc/__init__.py → model_server/app/commons/__init__.py b/model_server/app/arch_fc/__init__.py → model_server/app/commons/__init__.py
diff --git a/model_server/app/commons/constants.py b/model_server/app/commons/constants.py
@@ -0,0 +1,31 @@
+import app.commons.globals as glb
+import app.commons.utilities as utils
+import app.loader as loader
+
+from app.function_calling.model_handler import ArchFunctionHandler
+from app.prompt_guard.model_handler import ArchGuardHanlder
+
+
+arch_function_hanlder = ArchFunctionHandler()
+arch_function_endpoint = "https://api.fc.archgw.com/v1"
+arch_function_client = utils.get_client(arch_function_endpoint)
+arch_function_generation_params = {
+    "temperature": 0.2,
+    "top_p": 1.0,
+    "top_k": 50,
+    "max_tokens": 512,
+    "stop_token_ids": [151645],
+}
+
+arch_guard_model_type = {"cpu": "katanemo/Arch-Guard-cpu", "gpu": "katanemo/Arch-Guard"}
+
+
+# Model definition
+embedding_model = loader.get_embedding_model()
+zero_shot_model = loader.get_zero_shot_model()
+
+prompt_guard_dict = loader.get_prompt_guard(
+    arch_guard_model_type[glb.HARDWARE], glb.HARDWARE
+)
+
+arch_guard_handler = ArchGuardHanlder(model_dict=prompt_guard_dict)
diff --git a/model_server/app/commons/globals.py b/model_server/app/commons/globals.py
@@ -0,0 +1,6 @@
+import app.commons.utilities as utils
+
+
+DEVICE = utils.get_device()
+MODE = utils.get_serving_mode()
+HARDWARE = utils.get_hardware(MODE)