Skip to content

Commit

Permalink
remove dependency on docker-compose when starting up archgw (#305)
Browse files Browse the repository at this point in the history
  • Loading branch information
adilhafeez authored Nov 26, 2024
1 parent 726f1a3 commit 0ff3d43
Show file tree
Hide file tree
Showing 16 changed files with 3,768 additions and 281 deletions.
24 changes: 0 additions & 24 deletions arch/docker-compose.yaml

This file was deleted.

19 changes: 0 additions & 19 deletions arch/tools/build_cli.sh
Original file line number Diff line number Diff line change
@@ -1,23 +1,4 @@
#!/bin/bash

# Define paths
source_schema="../arch_config_schema.yaml"
source_compose="../docker-compose.yaml"
destination_dir="config"

# Ensure the destination directory exists only if it doesn't already
if [ ! -d "$destination_dir" ]; then
mkdir -p "$destination_dir"
echo "Directory $destination_dir created."
fi

# Copy the files
cp "$source_schema" "$destination_dir/arch_config_schema.yaml"
cp "$source_compose" "$destination_dir/docker-compose.yaml"
touch "$destination_dir/env.list"

# Print success message
echo "Files copied successfully!"

echo "Building the cli"
poetry install
4 changes: 1 addition & 3 deletions arch/tools/cli/config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def validate_and_render_schema():
"port": 80, # default port
}

print(inferred_clusters)
endpoints = config_yaml.get("endpoints", {})

# override the inferred clusters with the ones defined in the config
Expand Down Expand Up @@ -88,7 +87,6 @@ def validate_and_render_schema():
}

rendered = template.render(data)
print(rendered)
print(ENVOY_CONFIG_FILE_RENDERED)
with open(ENVOY_CONFIG_FILE_RENDERED, "w") as file:
file.write(rendered)
Expand All @@ -108,7 +106,7 @@ def validate_prompt_config(arch_config_file, arch_config_schema_file):
validate(config_yaml, config_schema_yaml)
except Exception as e:
print(
f"Error validating arch_config file: {arch_config_file}, error: {e.message}"
f"Error validating arch_config file: {arch_config_file}, schema file: {arch_config_schema_file}, error: {e.message}"
)
raise e

Expand Down
2 changes: 2 additions & 0 deletions arch/tools/cli/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@
SERVICE_ALL = "all"
MODEL_SERVER_LOG_FILE = "~/archgw_logs/modelserver.log"
ACCESS_LOG_FILES = "~/archgw_logs/access*"
ARCHGW_DOCKER_NAME = "archgw"
ARCHGW_DOCKER_IMAGE = "katanemo/archgw:latest"
150 changes: 60 additions & 90 deletions arch/tools/cli/core.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,74 @@
import subprocess
import os
import time
import pkg_resources
import select
import sys
import glob
from cli.utils import run_docker_compose_ps, print_service_status, check_services_state
import docker
from cli.utils import getLogger
from cli.consts import (
ARCHGW_DOCKER_IMAGE,
ARCHGW_DOCKER_NAME,
KATANEMO_LOCAL_MODEL_LIST,
MODEL_SERVER_LOG_FILE,
ACCESS_LOG_FILES,
)
from huggingface_hub import snapshot_download
from dotenv import dotenv_values


log = getLogger(__name__)


def start_archgw_docker(client, arch_config_file, env):
logs_path = "~/archgw_logs"
logs_path_abs = os.path.expanduser(logs_path)

return client.containers.run(
name=ARCHGW_DOCKER_NAME,
image=ARCHGW_DOCKER_IMAGE,
detach=True, # Run in detached mode
ports={
"10000/tcp": 10000,
"10001/tcp": 10001,
"11000/tcp": 11000,
"12000/tcp": 12000,
"19901/tcp": 19901,
},
volumes={
f"{arch_config_file}": {
"bind": "/app/arch_config.yaml",
"mode": "ro",
},
"/etc/ssl/cert.pem": {"bind": "/etc/ssl/cert.pem", "mode": "ro"},
logs_path_abs: {"bind": "/var/log"},
},
environment={
"OTEL_TRACING_HTTP_ENDPOINT": "http://host.docker.internal:4318/v1/traces",
**env,
},
extra_hosts={"host.docker.internal": "host-gateway"},
healthcheck={
"test": ["CMD", "curl", "-f", "http://localhost:10000/healthz"],
"interval": 5000000000, # 5 seconds
"timeout": 1000000000, # 1 seconds
"retries": 3,
},
)


def stream_gateway_logs(follow):
"""
Stream logs from the arch gateway service.
"""
compose_file = pkg_resources.resource_filename(
__name__, "../config/docker-compose.yaml"
)

log.info("Logs from arch gateway service.")

options = ["docker", "compose", "-p", "arch", "logs"]
options = ["docker", "logs", "archgw"]
if follow:
options.append("-f")
try:
# Run `docker-compose logs` to stream logs from the gateway service
subprocess.run(
options,
cwd=os.path.dirname(compose_file),
check=True,
stdout=sys.stdout,
stderr=sys.stderr,
Expand Down Expand Up @@ -88,42 +122,20 @@ def start_arch(arch_config_file, env, log_timeout=120):
Start Docker Compose in detached mode and stream logs until services are healthy.
Args:
path (str): The path where the prompt_confi.yml file is located.
path (str): The path where the prompt_config.yml file is located.
log_timeout (int): Time in seconds to show logs before checking for healthy state.
"""
log.info("Starting arch gateway")
compose_file = pkg_resources.resource_filename(
__name__, "../config/docker-compose.yaml"
)

try:
# Run the Docker Compose command in detached mode (-d)
subprocess.run(
[
"docker",
"compose",
"-p",
"arch",
"up",
"-d",
],
cwd=os.path.dirname(
compose_file
), # Ensure the Docker command runs in the correct path
env=env, # Pass the modified environment
check=True, # Raise an exception if the command fails
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
)
log.info(f"Arch docker-compose started in detached.")
client = docker.from_env()

container = start_archgw_docker(client, arch_config_file, env)

start_time = time.time()
services_status = {}
services_running = (
False # assume that the services are not running at the moment
)

while True:
container = client.containers.get(container.id)
current_time = time.time()
elapsed_time = current_time - start_time

Expand All @@ -132,53 +144,16 @@ def start_arch(arch_config_file, env, log_timeout=120):
log.info(f"Stopping log monitoring after {log_timeout} seconds.")
break

current_services_status = run_docker_compose_ps(
compose_file=compose_file, env=env
)
if not current_services_status:
log.info(
"Status for the services could not be detected. Something went wrong. Please run docker logs"
)
break

if not services_status:
services_status = current_services_status # set the first time
print_service_status(
services_status
) # print the services status and proceed.
container_status = container.attrs["State"]["Health"]["Status"]

# check if anyone service is failed or exited state, if so print and break out
unhealthy_states = ["unhealthy", "exit", "exited", "dead", "bad"]
running_states = ["running", "up"]

if check_services_state(current_services_status, running_states):
log.info("Arch gateway is up and running!")
break

if check_services_state(current_services_status, unhealthy_states):
log.info(
"One or more Arch services are unhealthy. Please run `docker logs` for more information"
)
print_service_status(
current_services_status
) # print the services status and proceed.
if container_status == "healthy":
log.info("Container is healthy!")
break
else:
log.info(f"Container health status: {container_status}")
time.sleep(1)

# check to see if the status of one of the services has changed from prior. Print and loop over until finish, or error
for service_name in services_status.keys():
if (
services_status[service_name]["State"]
!= current_services_status[service_name]["State"]
):
log.info(
"One or more Arch services have changed state. Printing current state"
)
print_service_status(current_services_status)
break

services_status = current_services_status

except subprocess.CalledProcessError as e:
except docker.errors.APIError as e:
log.info(f"Failed to start Arch: {str(e)}")


Expand All @@ -189,21 +164,16 @@ def stop_arch():
Args:
path (str): The path where the docker-compose.yml file is located.
"""
compose_file = pkg_resources.resource_filename(
__name__, "../config/docker-compose.yaml"
)

log.info("Shutting down arch gateway service.")

try:
# Run `docker-compose down` to shut down all services
subprocess.run(
["docker", "compose", "-p", "arch", "down"],
cwd=os.path.dirname(compose_file),
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
["docker", "stop", "archgw"],
)
subprocess.run(
["docker", "remove", "archgw"],
)

log.info("Successfully shut down arch gateway service.")

except subprocess.CalledProcessError as e:
Expand Down
25 changes: 8 additions & 17 deletions arch/tools/cli/main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import click
import os
import pkg_resources
import sys
import subprocess
import multiprocessing
import importlib.metadata
from cli import targets
from cli import config_generator
from cli.utils import getLogger, get_llm_provider_access_keys, load_env_file_to_dict
from cli.utils import (
getLogger,
get_llm_provider_access_keys,
load_env_file_to_dict,
validate_schema,
)
from cli.core import (
start_arch_modelserver,
stop_arch_modelserver,
Expand Down Expand Up @@ -160,17 +163,12 @@ def up(file, path, service):
return

log.info(f"Validating {arch_config_file}")
arch_schema_config = pkg_resources.resource_filename(
__name__, "../config/arch_config_schema.yaml"
)

try:
config_generator.validate_prompt_config(
arch_config_file=arch_config_file,
arch_config_schema_file=arch_schema_config,
)
validate_schema(arch_config_file)
except Exception as e:
log.info(f"Exiting archgw up: validation failed")
log.info(f"Error: {str(e)}")
sys.exit(1)

log.info("Starging arch model server and arch gateway")
Expand Down Expand Up @@ -213,14 +211,7 @@ def up(file, path, service):
else:
env_stage[access_key] = env_file_dict[access_key]

with open(
pkg_resources.resource_filename(__name__, "../config/env.list"), "w"
) as file:
for key, value in env_stage.items():
file.write(f"{key}={value}\n")

env.update(env_stage)
env["ARCH_CONFIG_FILE"] = arch_config_file

if service == SERVICE_NAME_ARCHGW:
start_arch(arch_config_file, env)
Expand Down
Loading

0 comments on commit 0ff3d43

Please sign in to comment.