Add support for streaming and fixes few issues (see description) (#202)

katanemo · Oct 29, 2024 · 662a840 · 662a840
1 parent 29ff8da
commit 662a840
Show file tree

Hide file tree

Showing 45 changed files with 2,274 additions and 485 deletions.
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml
@@ -0,0 +1,32 @@
+name: e2e tests
+
+on:
+  push:
+    branches:
+      - main  # Run tests on pushes to the main branch
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+
+      - name: Install Poetry
+        run: |
+          curl -sSL https://install.python-poetry.org | python3 -
+          export PATH="$HOME/.local/bin:$PATH"
+
+      - name: Run e2e tests
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
+        run: |
+          cd e2e_tests && bash run_e2e_tests.sh
diff --git a/.github/workflows/model-server-tests.yml b/.github/workflows/model-server-tests.yml
@@ -1,4 +1,4 @@
-name: Run Model Server tests
+name: model server tests
 
 on:
   push:

diff --git a/.github/workflows/rust_tests.yml b/.github/workflows/rust_tests.yml
@@ -0,0 +1,33 @@
+name: rust tests (prompt and llm gateway)
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+jobs:
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ./crates
+
+    steps:
+      - name: Setup | Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup | Rust
+        run: rustup toolchain install stable --profile minimal
+
+      - name: Setup | Install wasm toolchain
+        run: rustup target add wasm32-wasi
+
+      - name: Build wasm module
+        run: cargo build --release --target=wasm32-wasi
+
+      - name: Run unit tests
+        run: cargo test --lib
+
+      - name: Run integration tests
+        run: cargo test --test integration
diff --git a/.gitignore b/.gitignore
@@ -31,3 +31,4 @@ arch_logs/
 dist/
 crates/*/target/
 crates/target/
+build.log
diff --git a/arch/Dockerfile b/arch/Dockerfile
@@ -12,6 +12,9 @@ FROM envoyproxy/envoy:v1.31-latest as envoy
 
 #Build config generator, so that we have a single build image for both Rust and Python
 FROM python:3-slim as arch
+
+RUN apt-get update && apt-get install -y gettext-base && apt-get clean && rm -rf /var/lib/apt/lists/*
+
 COPY --from=builder /arch/target/wasm32-wasi/release/prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
 COPY --from=builder /arch/target/wasm32-wasi/release/llm_gateway.wasm /etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
 COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy
@@ -22,4 +25,5 @@ COPY arch/tools/cli/config_generator.py .
 COPY arch/envoy.template.yaml .
 COPY arch/arch_config_schema.yaml .
 
-CMD ["sh", "-c", "python config_generator.py && envoy -c /etc/envoy/envoy.yaml --component-log-level wasm:debug"]
+
+ENTRYPOINT ["sh", "-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug"]
diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml
@@ -160,4 +160,3 @@ required:
   - version
   - listener
   - llm_providers
-  - prompt_targets
diff --git a/arch/build_filter_image.sh b/arch/build_filter_image.sh
@@ -1 +1 @@
-docker build -t archgw .. -f Dockerfile
+docker build  -f Dockerfile .. -t katanemo/archgw
diff --git a/arch/docker-compose.dev.yaml b/arch/docker-compose.dev.yaml
@@ -1,6 +1,6 @@
 services:
   archgw:
-    image: archgw:latest
+    image: katanemo/archgw:latest
     ports:
       - "10000:10000"
       - "11000:11000"
@@ -10,9 +10,13 @@ services:
       - ${ARCH_CONFIG_FILE:-../demos/function_calling/arch_config.yaml}:/config/arch_config.yaml
       - /etc/ssl/cert.pem:/etc/ssl/cert.pem
       - ./envoy.template.yaml:/config/envoy.template.yaml
-      - ./target/wasm32-wasi/release/intelligent_prompt_gateway.wasm:/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm
       - ./arch_config_schema.yaml:/config/arch_config_schema.yaml
-      - ./tools/config_generator.py:/config/config_generator.py
-      - ./arch_logs:/var/log/
-    env_file:
-      - stage.env
+      - ./tools/cli/config_generator.py:/config/config_generator.py
+      - ../crates/target/wasm32-wasi/release/llm_gateway.wasm:/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
+      - ../crates/target/wasm32-wasi/release/prompt_gateway.wasm:/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
+      - ~/archgw_logs:/var/log/
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    environment:
+      - OPENAI_API_KEY=${OPENAI_API_KEY:?error}
+      - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
diff --git a/arch/docker-compose.e2e.yaml b/arch/docker-compose.e2e.yaml
@@ -0,0 +1,17 @@
+services:
+  archgw:
+    image: katanemo/archgw:latest
+    ports:
+      - "10000:10000"
+      - "11000:11000"
+      - "12000:12000"
+      - "19901:9901"
+    volumes:
+      - ${ARCH_CONFIG_FILE:-../demos/function_calling/arch_config.yaml}:/config/arch_config.yaml
+      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
+      - ~/archgw_logs:/var/log/
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    environment:
+      - OPENAI_API_KEY=${OPENAI_API_KEY:?error}
+      - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
diff --git a/arch/docker-compose.yaml b/arch/docker-compose.yaml
@@ -7,7 +7,7 @@ services:
       - "12000:12000"
       - "19901:9901"
     volumes:
-      - ${ARCH_CONFIG_FILE:-./demos/function_calling/arch_confg.yaml}:/config/arch_config.yaml
+      - ${ARCH_CONFIG_FILE:-../demos/function_calling/arch_config.yaml}:/config/arch_config.yaml
       - /etc/ssl/cert.pem:/etc/ssl/cert.pem
       - ~/archgw_logs:/var/log/
     env_file:

diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml
@@ -52,6 +52,15 @@ static_resources:
                             cluster: arch_llm_listener
                             timeout: 60s
                 http_filters:
+                  - name: envoy.filters.http.compressor
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
+                      compressor_library:
+                        name: compress
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
+                          memory_level: 3
+                          window_bits: 10
                   - name: envoy.filters.http.wasm
                     typed_config:
                       "@type": type.googleapis.com/udpa.type.v1.TypedStruct
@@ -69,6 +78,17 @@ static_resources:
                             code:
                               local:
                                 filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm"
+                  - name: envoy.filters.http.decompressor
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
+                      decompressor_library:
+                        name: decompress
+                        typed_config:
+                          "@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
+                          window_bits: 9
+                          chunk_size: 8192
+                          # If this ratio is set too low, then body data will not be decompressed completely.
+                          max_inflate_ratio: 1000
                   - name: envoy.filters.http.router
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
@@ -187,6 +207,12 @@ static_resources:
                       domains:
                         - "*"
                       routes:
+                        - match:
+                            prefix: "/healthz"
+                          route:
+                            auto_host_rewrite: true
+                            cluster: openai
+                            timeout: 60s
                       {% for provider in arch_llm_providers %}
                         - match:
                             prefix: "/"
@@ -206,6 +232,15 @@ static_resources:
                             body:
                               inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n"
                 http_filters:
+                  - name: envoy.filters.http.compressor
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
+                      compressor_library:
+                        name: compress
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
+                          memory_level: 3
+                          window_bits: 10
                   - name: envoy.filters.http.wasm
                     typed_config:
                       "@type": type.googleapis.com/udpa.type.v1.TypedStruct
@@ -223,6 +258,17 @@ static_resources:
                             code:
                               local:
                                 filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
+                  - name: envoy.filters.http.decompressor
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
+                      decompressor_library:
+                        name: decompress
+                        typed_config:
+                          "@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
+                          window_bits: 9
+                          chunk_size: 8192
+                          # If this ratio is set too low, then body data will not be decompressed completely.
+                          max_inflate_ratio: 1000
                   - name: envoy.filters.http.router
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py
@@ -47,13 +47,14 @@ def validate_and_render_schema():
     config_schema_yaml = yaml.safe_load(arch_config_schema)
     inferred_clusters = {}
 
-    for prompt_target in config_yaml["prompt_targets"]:
-        name = prompt_target.get("endpoint", {}).get("name", "")
-        if name not in inferred_clusters:
-            inferred_clusters[name] = {
-                "name": name,
-                "port": 80,  # default port
-            }
+    if "prompt_targets" in config_yaml:
+        for prompt_target in config_yaml["prompt_targets"]:
+            name = prompt_target.get("endpoint", {}).get("name", "")
+            if name not in inferred_clusters:
+                inferred_clusters[name] = {
+                    "name": name,
+                    "port": 80,  # default port
+                }
 
     print(inferred_clusters)
     endpoints = config_yaml.get("endpoints", {})

diff --git a/archgw.code-workspace b/archgw.code-workspace
@@ -20,6 +20,10 @@
       "name": "chatbot_ui",
       "path": "chatbot_ui"
     },
+    {
+      "name": "e2e_tests",
+      "path": "e2e_tests"
+    },
     {
       "name": "demos/function_calling",
       "path": "./demos/function_calling",

diff --git a/chatbot_ui/.vscode/launch.json b/chatbot_ui/.vscode/launch.json
@@ -14,7 +14,9 @@
       "console": "integratedTerminal",
       "env": {
         "LLM": "1",
-        "CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1"
+        "CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1",
+        "STREAMING": "True",
+        "ARCH_CONFIG": "../../demos/function_calling/arch_config.yaml"
       }
     },
     {
@@ -29,17 +31,5 @@
         "CHAT_COMPLETION_ENDPOINT": "http://localhost:12000/v1"
       }
     },
-    {
-      "name": "chatbot-ui streaming",
-      "cwd": "${workspaceFolder}/app",
-      "type": "debugpy",
-      "request": "launch",
-      "program": "run_stream.py",
-      "console": "integratedTerminal",
-      "env": {
-        "LLM": "1",
-        "CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1"
-      }
-    }
   ]
 }
diff --git a/chatbot_ui/app/arch_util.py b/chatbot_ui/app/arch_util.py
@@ -0,0 +1,20 @@
+import json
+
+
+ARCH_STATE_HEADER = "x-arch-state"
+
+
+def get_arch_messages(response_json):
+    arch_messages = []
+    if response_json and "metadata" in response_json:
+        # load arch_state from metadata
+        arch_state_str = response_json.get("metadata", {}).get(ARCH_STATE_HEADER, "{}")
+        # parse arch_state into json object
+        arch_state = json.loads(arch_state_str)
+        # load messages from arch_state
+        arch_messages_str = arch_state.get("messages", "[]")
+        # parse messages into json object
+        arch_messages = json.loads(arch_messages_str)
+        # append messages from arch gateway to history
+        return arch_messages
+    return []
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		docker build -t archgw .. -f Dockerfile
		docker build -f Dockerfile .. -t katanemo/archgw