Skip to content

Commit

Permalink
Add support for streaming and fixes few issues (see description) (#202)
Browse files Browse the repository at this point in the history
  • Loading branch information
junr03 authored Oct 29, 2024
1 parent 29ff8da commit 662a840
Show file tree
Hide file tree
Showing 45 changed files with 2,274 additions and 485 deletions.
35 changes: 0 additions & 35 deletions .github/workflows/checks.yml

This file was deleted.

32 changes: 32 additions & 0 deletions .github/workflows/e2e_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: e2e tests

on:
push:
branches:
- main # Run tests on pushes to the main branch
pull_request:

jobs:
test:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"

- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python3 -
export PATH="$HOME/.local/bin:$PATH"
- name: Run e2e tests
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
run: |
cd e2e_tests && bash run_e2e_tests.sh
2 changes: 1 addition & 1 deletion .github/workflows/model-server-tests.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Run Model Server tests
name: model server tests

on:
push:
Expand Down
33 changes: 33 additions & 0 deletions .github/workflows/rust_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: rust tests (prompt and llm gateway)

on:
pull_request:
push:
branches: [main]

jobs:
test:
name: Test
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./crates

steps:
- name: Setup | Checkout
uses: actions/checkout@v4

- name: Setup | Rust
run: rustup toolchain install stable --profile minimal

- name: Setup | Install wasm toolchain
run: rustup target add wasm32-wasi

- name: Build wasm module
run: cargo build --release --target=wasm32-wasi

- name: Run unit tests
run: cargo test --lib

- name: Run integration tests
run: cargo test --test integration
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ arch_logs/
dist/
crates/*/target/
crates/target/
build.log
6 changes: 5 additions & 1 deletion arch/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ FROM envoyproxy/envoy:v1.31-latest as envoy

#Build config generator, so that we have a single build image for both Rust and Python
FROM python:3-slim as arch

RUN apt-get update && apt-get install -y gettext-base && apt-get clean && rm -rf /var/lib/apt/lists/*

COPY --from=builder /arch/target/wasm32-wasi/release/prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
COPY --from=builder /arch/target/wasm32-wasi/release/llm_gateway.wasm /etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy
Expand All @@ -22,4 +25,5 @@ COPY arch/tools/cli/config_generator.py .
COPY arch/envoy.template.yaml .
COPY arch/arch_config_schema.yaml .

CMD ["sh", "-c", "python config_generator.py && envoy -c /etc/envoy/envoy.yaml --component-log-level wasm:debug"]

ENTRYPOINT ["sh", "-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug"]
1 change: 0 additions & 1 deletion arch/arch_config_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,3 @@ required:
- version
- listener
- llm_providers
- prompt_targets
2 changes: 1 addition & 1 deletion arch/build_filter_image.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
docker build -t archgw .. -f Dockerfile
docker build -f Dockerfile .. -t katanemo/archgw
16 changes: 10 additions & 6 deletions arch/docker-compose.dev.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
services:
archgw:
image: archgw:latest
image: katanemo/archgw:latest
ports:
- "10000:10000"
- "11000:11000"
Expand All @@ -10,9 +10,13 @@ services:
- ${ARCH_CONFIG_FILE:-../demos/function_calling/arch_config.yaml}:/config/arch_config.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ./envoy.template.yaml:/config/envoy.template.yaml
- ./target/wasm32-wasi/release/intelligent_prompt_gateway.wasm:/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm
- ./arch_config_schema.yaml:/config/arch_config_schema.yaml
- ./tools/config_generator.py:/config/config_generator.py
- ./arch_logs:/var/log/
env_file:
- stage.env
- ./tools/cli/config_generator.py:/config/config_generator.py
- ../crates/target/wasm32-wasi/release/llm_gateway.wasm:/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
- ../crates/target/wasm32-wasi/release/prompt_gateway.wasm:/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
- ~/archgw_logs:/var/log/
extra_hosts:
- "host.docker.internal:host-gateway"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
17 changes: 17 additions & 0 deletions arch/docker-compose.e2e.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
services:
archgw:
image: katanemo/archgw:latest
ports:
- "10000:10000"
- "11000:11000"
- "12000:12000"
- "19901:9901"
volumes:
- ${ARCH_CONFIG_FILE:-../demos/function_calling/arch_config.yaml}:/config/arch_config.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ~/archgw_logs:/var/log/
extra_hosts:
- "host.docker.internal:host-gateway"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
2 changes: 1 addition & 1 deletion arch/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ services:
- "12000:12000"
- "19901:9901"
volumes:
- ${ARCH_CONFIG_FILE:-./demos/function_calling/arch_confg.yaml}:/config/arch_config.yaml
- ${ARCH_CONFIG_FILE:-../demos/function_calling/arch_config.yaml}:/config/arch_config.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ~/archgw_logs:/var/log/
env_file:
Expand Down
46 changes: 46 additions & 0 deletions arch/envoy.template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ static_resources:
cluster: arch_llm_listener
timeout: 60s
http_filters:
- name: envoy.filters.http.compressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
compressor_library:
name: compress
typed_config:
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
memory_level: 3
window_bits: 10
- name: envoy.filters.http.wasm
typed_config:
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
Expand All @@ -69,6 +78,17 @@ static_resources:
code:
local:
filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm"
- name: envoy.filters.http.decompressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
decompressor_library:
name: decompress
typed_config:
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
window_bits: 9
chunk_size: 8192
# If this ratio is set too low, then body data will not be decompressed completely.
max_inflate_ratio: 1000
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
Expand Down Expand Up @@ -187,6 +207,12 @@ static_resources:
domains:
- "*"
routes:
- match:
prefix: "/healthz"
route:
auto_host_rewrite: true
cluster: openai
timeout: 60s
{% for provider in arch_llm_providers %}
- match:
prefix: "/"
Expand All @@ -206,6 +232,15 @@ static_resources:
body:
inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n"
http_filters:
- name: envoy.filters.http.compressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
compressor_library:
name: compress
typed_config:
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
memory_level: 3
window_bits: 10
- name: envoy.filters.http.wasm
typed_config:
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
Expand All @@ -223,6 +258,17 @@ static_resources:
code:
local:
filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
- name: envoy.filters.http.decompressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
decompressor_library:
name: decompress
typed_config:
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
window_bits: 9
chunk_size: 8192
# If this ratio is set too low, then body data will not be decompressed completely.
max_inflate_ratio: 1000
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
Expand Down
15 changes: 8 additions & 7 deletions arch/tools/cli/config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,14 @@ def validate_and_render_schema():
config_schema_yaml = yaml.safe_load(arch_config_schema)
inferred_clusters = {}

for prompt_target in config_yaml["prompt_targets"]:
name = prompt_target.get("endpoint", {}).get("name", "")
if name not in inferred_clusters:
inferred_clusters[name] = {
"name": name,
"port": 80, # default port
}
if "prompt_targets" in config_yaml:
for prompt_target in config_yaml["prompt_targets"]:
name = prompt_target.get("endpoint", {}).get("name", "")
if name not in inferred_clusters:
inferred_clusters[name] = {
"name": name,
"port": 80, # default port
}

print(inferred_clusters)
endpoints = config_yaml.get("endpoints", {})
Expand Down
4 changes: 4 additions & 0 deletions archgw.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
"name": "chatbot_ui",
"path": "chatbot_ui"
},
{
"name": "e2e_tests",
"path": "e2e_tests"
},
{
"name": "demos/function_calling",
"path": "./demos/function_calling",
Expand Down
16 changes: 3 additions & 13 deletions chatbot_ui/.vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
"console": "integratedTerminal",
"env": {
"LLM": "1",
"CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1"
"CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1",
"STREAMING": "True",
"ARCH_CONFIG": "../../demos/function_calling/arch_config.yaml"
}
},
{
Expand All @@ -29,17 +31,5 @@
"CHAT_COMPLETION_ENDPOINT": "http://localhost:12000/v1"
}
},
{
"name": "chatbot-ui streaming",
"cwd": "${workspaceFolder}/app",
"type": "debugpy",
"request": "launch",
"program": "run_stream.py",
"console": "integratedTerminal",
"env": {
"LLM": "1",
"CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1"
}
}
]
}
20 changes: 20 additions & 0 deletions chatbot_ui/app/arch_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import json


ARCH_STATE_HEADER = "x-arch-state"


def get_arch_messages(response_json):
arch_messages = []
if response_json and "metadata" in response_json:
# load arch_state from metadata
arch_state_str = response_json.get("metadata", {}).get(ARCH_STATE_HEADER, "{}")
# parse arch_state into json object
arch_state = json.loads(arch_state_str)
# load messages from arch_state
arch_messages_str = arch_state.get("messages", "[]")
# parse messages into json object
arch_messages = json.loads(arch_messages_str)
# append messages from arch gateway to history
return arch_messages
return []
Loading

0 comments on commit 662a840

Please sign in to comment.