Skip to content

Commit

Permalink
model server build (#127)
Browse files Browse the repository at this point in the history
* first commit to have model_server not be dependent on Docker

* making changes to fix the docker-compose file for archgw to set DNS_V4 and minor fixes with the build

* additional fixes for model server to be separated out in the build

* additional fixes for model server to be separated out in the build

* fix to get model_server to be built as a separate python process. TODO: fix the embeddings logs after cli completes

* fixing init to pull tempfile using the tempfile python package

---------

Co-authored-by: Salman Paracha <[email protected]>
  • Loading branch information
salmanap and Salman Paracha authored Oct 7, 2024
1 parent 7d21359 commit b60ceb9
Show file tree
Hide file tree
Showing 21 changed files with 3,390 additions and 154 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,8 @@ demos/network_copilot/ollama/models/
arch_log/
arch/tools/*.egg-info
arch/tools/config
arch/tools/build
model_server/model_server.egg-info
model_server/venv_model_server
model_server/build
model_server/dist
19 changes: 0 additions & 19 deletions arch/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,5 @@ services:
volumes:
- ${ARCH_CONFIG_FILE:-./demos/function_calling/arch_confg.yaml}:/config/arch_config.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
depends_on:
model_server:
condition: service_healthy
env_file:
- stage.env

model_server:
image: model_server:latest
ports:
- "18081:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost/healthz"]
interval: 5s
retries: 20
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
environment:
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
- OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M
- MODE=${MODE:-cloud}
- FC_URL=${FC_URL:-https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1}
8 changes: 4 additions & 4 deletions arch/envoy.template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ static_resources:
- endpoint:
address:
socket_address:
address: model_server
port_value: 80
address: host.docker.internal
port_value: 51000
hostname: "model_server"
- name: mistral_7b_instruct
connect_timeout: 5s
Expand Down Expand Up @@ -153,8 +153,8 @@ static_resources:
- endpoint:
address:
socket_address:
address: model_server
port_value: 80
address: host.docker.internal
port_value: 51000
hostname: "arch_fc"
{% for _, cluster in arch_clusters.items() %}
- name: {{ cluster.name }}
Expand Down
26 changes: 15 additions & 11 deletions arch/tools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pkg_resources
import sys
import subprocess
from core import start_arch, stop_arch
from core import start_arch_modelserver, stop_arch_modelserver, start_arch, stop_arch
from utils import get_llm_provider_access_keys, load_env_file_to_dict

logo = r"""
Expand All @@ -26,7 +26,7 @@ def main(ctx):

# Command to build archgw and model_server Docker images
ARCHGW_DOCKERFILE = "./arch/Dockerfile"
MODEL_SERVER_DOCKERFILE = "./model_server/Dockerfile"
MODEL_SERVER_BUILD_FILE = "./model_server/pyproject.toml"

@click.command()
def build():
Expand All @@ -44,21 +44,22 @@ def build():
click.echo("Error: Dockerfile not found in /arch")
sys.exit(1)

# Check if /model_server/Dockerfile exists
if os.path.exists(MODEL_SERVER_DOCKERFILE):
click.echo("Building model_server image...")
click.echo("All images built successfully.")

"""Install the model server dependencies using Poetry."""
# Check if pyproject.toml exists
if os.path.exists(MODEL_SERVER_BUILD_FILE):
click.echo("Installing model server dependencies with Poetry...")
try:
subprocess.run(["docker", "build", "-f", MODEL_SERVER_DOCKERFILE, "-t", "model_server:latest", "./model_server"], check=True)
click.echo("model_server image built successfully.")
subprocess.run(["poetry", "install", "--no-cache"], cwd=os.path.dirname(MODEL_SERVER_BUILD_FILE), check=True)
click.echo("Model server dependencies installed successfully.")
except subprocess.CalledProcessError as e:
click.echo(f"Error building model_server image: {e}")
click.echo(f"Error installing model server dependencies: {e}")
sys.exit(1)
else:
click.echo("Error: Dockerfile not found in /model_server")
click.echo(f"Error: pyproject.toml not found in {MODEL_SERVER_BUILD_FILE}")
sys.exit(1)

click.echo("All images built successfully.")

@click.command()
@click.argument('file', required=False) # Optional file argument
@click.option('-path', default='.', help='Path to the directory containing arch_config.yml')
Expand Down Expand Up @@ -120,11 +121,14 @@ def up(file, path):
env = os.environ.copy()
env.update(env_stage)
env['ARCH_CONFIG_FILE'] = arch_config_file

start_arch_modelserver()
start_arch(arch_config_file, env)

@click.command()
def down():
"""Stops Arch."""
stop_arch_modelserver()
stop_arch()

@click.command()
Expand Down
33 changes: 31 additions & 2 deletions arch/tools/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,13 @@
import select
from utils import run_docker_compose_ps, print_service_status, check_services_state

def start_arch(arch_config_file, env, log_timeout=120, check_interval=1):
def start_arch(arch_config_file, env, log_timeout=120):
"""
Start Docker Compose in detached mode and stream logs until services are healthy.
Args:
path (str): The path where the prompt_confi.yml file is located.
log_timeout (int): Time in seconds to show logs before checking for healthy state.
check_interval (int): Time in seconds between health status checks.
"""

compose_file = pkg_resources.resource_filename(__name__, 'config/docker-compose.yaml')
Expand Down Expand Up @@ -96,3 +95,33 @@ def stop_arch():

except subprocess.CalledProcessError as e:
print(f"Failed to shut down services: {str(e)}")

def start_arch_modelserver():
"""
Start the model server. This assumes that the archgw_modelserver package is installed locally
"""
try:
subprocess.run(
['archgw_modelserver', 'restart'],
check=True,
)
print("Successfull run the archgw model_server")
except subprocess.CalledProcessError as e:
print (f"Failed to start model_server. Please check archgw_modelserver logs")
sys.exit(1)

def stop_arch_modelserver():
"""
Stop the model server. This assumes that the archgw_modelserver package is installed locally
"""
try:
subprocess.run(
['archgw_modelserver', 'stop'],
check=True,
)
print("Successfull stopped the archgw model_server")
except subprocess.CalledProcessError as e:
print (f"Failed to start model_server. Please check archgw_modelserver logs")
sys.exit(1)
4 changes: 2 additions & 2 deletions model_server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ WORKDIR /src
ENV MODELS="BAAI/bge-large-en-v1.5"

COPY ./app ./app
COPY ./guard_model_config.yaml .
COPY ./openai_params.yaml .
COPY ./app/guard_model_config.yaml .
COPY ./app/openai_params.yaml .

# comment it out for now as we don't want to download the model every time we build the image
# we will mount host cache to docker image to avoid downloading the model every time
Expand Down
6 changes: 0 additions & 6 deletions model_server/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,8 @@ RUN if command -v nvcc >/dev/null 2>&1; then \

COPY . /src

#
# output
#


# Specify list of models that will go into the image as a comma separated list
ENV MODELS="BAAI/bge-large-en-v1.5"
ENV NER_MODELS="urchade/gliner_large-v2.1"
ENV DEBIAN_FRONTEND=noninteractive

COPY /app /app
Expand Down
1 change: 1 addition & 0 deletions model_server/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
## Model Server Package ##
Empty file added model_server/__init__.py
Empty file.
99 changes: 99 additions & 0 deletions model_server/app/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import sys
import subprocess
import os
import signal
import time
import requests
import psutil
import tempfile

# Path to the file where the server process ID will be stored
PID_FILE = os.path.join(tempfile.gettempdir(), "model_server.pid")

def run_server():
"""Start, stop, or restart the Uvicorn server based on command-line arguments."""
if len(sys.argv) > 1:
action = sys.argv[1]
else:
action = "start"

if action == "start":
start_server()
elif action == "stop":
stop_server()
elif action == "restart":
restart_server()
else:
print(f"Unknown action: {action}")
sys.exit(1)


def start_server():
"""Start the Uvicorn server and save the process ID."""
if os.path.exists(PID_FILE):
print("Server is already running. Use 'model_server restart' to restart it.")
sys.exit(1)

print(f"Starting Archgw Model Server")
process = subprocess.Popen(
["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "51000"],
)

if wait_for_health_check("http://0.0.0.0:51000/healthz"):
# Write the process ID to the PID file
with open(PID_FILE, "w") as f:
f.write(str(process.pid))
print(f"ARCH GW Model Server started with PID {process.pid}")
else:
#Add model_server boot-up logs
print(f"ARCH GW Model Server - Didn't Sart In Time. Shutting Down")
process.terminate()

def wait_for_health_check(url, timeout=180):
"""Wait for the Uvicorn server to respond to health-check requests."""
start_time = time.time()
while time.time() - start_time < timeout:
try:
response = requests.get(url)
if response.status_code == 200:
return True
except requests.ConnectionError:
time.sleep(1)
print("Timed out waiting for ARCH GW Model Server to respond.")
return False


def stop_server():
"""Stop the running Uvicorn server."""
if not os.path.exists(PID_FILE):
print("Status: Archgw Model Server not running")
return

# Read the process ID from the PID file
with open(PID_FILE, "r") as f:
pid = int(f.read())

try:
# Get process by PID
process = psutil.Process(pid)

# Gracefully terminate the process
process.terminate() # Sends SIGTERM by default
process.wait(timeout=10) # Wait for up to 10 seconds for the process to exit

print(f"Server with PID {pid} stopped.")
os.remove(PID_FILE)

except psutil.NoSuchProcess:
print(f"Process with PID {pid} not found. Cleaning up PID file.")
os.remove(PID_FILE)
except psutil.TimeoutExpired:
print(f"Process with PID {pid} did not terminate in time. Forcing shutdown.")
process.kill() # Forcefully kill the process
os.remove(PID_FILE)

def restart_server():
"""Restart the Uvicorn server."""
print("Check: Is Archgw Model Server running?")
stop_server()
start_server()
16 changes: 7 additions & 9 deletions model_server/app/arch_fc/arch_fc.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import random
from fastapi import FastAPI, Response
from app.arch_fc.arch_handler import ArchHandler
from app.arch_fc.bolt_handler import BoltHandler
from app.arch_fc.common import ChatMessage, Message
from .common import ChatMessage, Message
from .arch_handler import ArchHandler
from .bolt_handler import BoltHandler
from app.utils import load_yaml_config
import logging
import yaml
from openai import OpenAI
Expand All @@ -14,17 +15,14 @@
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

with open("openai_params.yaml") as f:
params = yaml.safe_load(f)

params = load_yaml_config("openai_params.yaml")
ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "localhost")
ollama_model = os.getenv("OLLAMA_MODEL", "Arch-Function-Calling-1.5B-Q4_K_M")
fc_url = os.getenv("FC_URL", ollama_endpoint)
fc_url = os.getenv("FC_URL", "https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1")

mode = os.getenv("MODE", "cloud")
if mode not in ["cloud", "local-gpu", "local-cpu"]:
raise ValueError(f"Invalid mode: {mode}")
arch_api_key = os.getenv("ARCH_API_KEY", "vllm")

handler = None
if ollama_model.startswith("Arch"):
Expand Down
Loading

0 comments on commit b60ceb9

Please sign in to comment.