Merge branch 'helmcharts_switch' into zhenzhong/benchmark_helmcharts

Zhenzhong1 · Oct 21, 2024 · d549a5f · d549a5f
2 parents 399768e + 184e9a4
commit d549a5f
Show file tree

Hide file tree

Showing 58 changed files with 920 additions and 797 deletions.
diff --git a/.github/workflows/_example-workflow.yml b/.github/workflows/_example-workflow.yml
@@ -12,6 +12,10 @@ on:
       example:
         required: true
         type: string
+      services:
+        default: ""
+        required: false
+        type: string
       tag:
         default: "latest"
         required: false
@@ -77,6 +81,7 @@ jobs:
         with:
           work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
           docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
+          service_list: ${{ inputs.services }}
           registry: ${OPEA_IMAGE_REPO}opea
           tag: ${{ inputs.tag }}
 
@@ -105,7 +110,6 @@ jobs:
       example: ${{ inputs.example }}
       hardware: ${{ inputs.node }}
       tag: ${{ inputs.tag }}
-      context: "CD"
     secrets: inherit
 
 ####################################################################################################

diff --git a/.github/workflows/_manifest-e2e.yml b/.github/workflows/_manifest-e2e.yml
@@ -20,11 +20,6 @@ on:
         description: "Tag to apply to images, default is latest"
         required: false
         type: string
-      context:
-        default: "CI"
-        description: "CI or CD"
-        required: false
-        type: string
 
 jobs:
   manifest-test:
@@ -51,7 +46,7 @@ jobs:
 
       - name: Set variables
         run: |
-          echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
+          echo "IMAGE_REPO=${OPEA_IMAGE_REPO}opea" >> $GITHUB_ENV
           echo "IMAGE_TAG=${{ inputs.tag }}" >> $GITHUB_ENV
           lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
           echo "NAMESPACE=$lower_example-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
@@ -60,7 +55,6 @@ jobs:
           echo "continue_test=true" >> $GITHUB_ENV
           echo "should_cleanup=false" >> $GITHUB_ENV
           echo "skip_validate=true" >> $GITHUB_ENV
-          echo "CONTEXT=${{ inputs.context }}" >> $GITHUB_ENV
           echo "NAMESPACE=$NAMESPACE"
 
       - name: Kubectl install

diff --git a/.github/workflows/manual-image-build.yml b/.github/workflows/manual-image-build.yml
@@ -0,0 +1,59 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Build specific images on manual event
+on:
+  workflow_dispatch:
+    inputs:
+      nodes:
+        default: "gaudi,xeon"
+        description: "Hardware to run test"
+        required: true
+        type: string
+      example:
+        default: "ChatQnA"
+        description: 'Build images belong to which example?'
+        required: true
+        type: string
+      services:
+        default: "chatqna,chatqna-without-rerank"
+        description: 'Service list to build'
+        required: true
+        type: string
+      tag:
+        default: "latest"
+        description: "Tag to apply to images"
+        required: true
+        type: string
+      opea_branch:
+        default: "main"
+        description: 'OPEA branch for image build'
+        required: false
+        type: string
+jobs:
+  get-test-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      nodes: ${{ steps.get-matrix.outputs.nodes }}
+    steps:
+      - name: Create Matrix
+        id: get-matrix
+        run: |
+          nodes=($(echo ${{ inputs.nodes }} | tr ',' ' '))
+          nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
+          echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
+
+  image-build:
+    needs: get-test-matrix
+    strategy:
+      matrix:
+        node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
+      fail-fast: false
+    uses: ./.github/workflows/_example-workflow.yml
+    with:
+      node: ${{ matrix.node }}
+      example: ${{ inputs.example }}
+      services: ${{ inputs.services }}
+      tag: ${{ inputs.tag }}
+      opea_branch: ${{ inputs.opea_branch }}
+    secrets: inherit
diff --git a/.github/workflows/pr-bum_list_check.yml b/.github/workflows/pr-bum_list_check.yml
diff --git a/.github/workflows/pr-manifest-e2e.yml b/.github/workflows/pr-manifest-e2e.yml
@@ -8,6 +8,8 @@ on:
     branches: ["main", "*rc"]
     types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
     paths:
+      - "**/Dockerfile**"
+      - "**.py"
       - "**/kubernetes/**/manifests/**"
       - "**/tests/test_manifest**"
       - "!**.md"

diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml
@@ -23,12 +23,10 @@ jobs:
   image-build:
     needs: job1
     strategy:
-      matrix:
-        example: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
-        node: ["gaudi","xeon"]
+      matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
       fail-fast: false
     uses: ./.github/workflows/_example-workflow.yml
     with:
-      node: ${{ matrix.node }}
+      node: ${{ matrix.hardware }}
       example: ${{ matrix.example }}
     secrets: inherit
diff --git a/AgentQnA/README.md b/AgentQnA/README.md
@@ -5,6 +5,73 @@
 This example showcases a hierarchical multi-agent system for question-answering applications. The architecture diagram is shown below. The supervisor agent interfaces with the user and dispatch tasks to the worker agent and other tools to gather information and come up with answers. The worker agent uses the retrieval tool to generate answers to the queries posted by the supervisor agent. Other tools used by the supervisor agent may include APIs to interface knowledge graphs, SQL databases, external knowledge bases, etc.
 ![Architecture Overview](assets/agent_qna_arch.png)
 
+The AgentQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
+
+```mermaid
+---
+config:
+  flowchart:
+    nodeSpacing: 400
+    rankSpacing: 100
+    curve: linear
+  themeVariables:
+    fontSize: 50px
+---
+flowchart LR
+    %% Colors %%
+    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef invisible fill:transparent,stroke:transparent;
+
+    %% Subgraphs %%
+    subgraph DocIndexRetriever-MegaService["DocIndexRetriever MegaService "]
+        direction LR
+        EM([Embedding MicroService]):::blue
+        RET([Retrieval MicroService]):::blue
+        RER([Rerank MicroService]):::blue
+    end
+    subgraph UserInput[" User Input "]
+        direction LR
+        a([User Input Query]):::orchid
+        Ingest([Ingest data]):::orchid
+    end
+    AG_REACT([Agent MicroService - react]):::blue
+    AG_RAG([Agent MicroService - rag]):::blue
+    LLM_gen{{LLM Service <br>}}
+    DP([Data Preparation MicroService]):::blue
+    TEI_RER{{Reranking service<br>}}
+    TEI_EM{{Embedding service <br>}}
+    VDB{{Vector DB<br><br>}}
+    R_RET{{Retriever service <br>}}
+
+
+
+    %% Questions interaction
+    direction LR
+    a[User Input Query] --> AG_REACT
+    AG_REACT --> AG_RAG
+    AG_RAG --> DocIndexRetriever-MegaService
+    EM ==> RET
+    RET ==> RER
+    Ingest[Ingest data] --> DP
+
+    %% Embedding service flow
+    direction LR
+    AG_RAG <-.-> LLM_gen
+    AG_REACT <-.-> LLM_gen
+    EM <-.-> TEI_EM
+    RET <-.-> R_RET
+    RER <-.-> TEI_RER
+
+    direction TB
+    %% Vector DB interaction
+    R_RET <-.-> VDB
+    DP <-.-> VDB
+
+
+```
+
 ### Why Agent for question answering?
 
 1. Improve relevancy of retrieved context.

diff --git a/AudioQnA/Dockerfile.multilang b/AudioQnA/Dockerfile.multilang
@@ -0,0 +1,32 @@
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    git
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+WORKDIR /home/user/
+RUN git clone https://github.com/opea-project/GenAIComps.git
+
+WORKDIR /home/user/GenAIComps
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
+
+COPY ./audioqna_multilang.py /home/user/audioqna_multilang.py
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
+
+USER user
+
+WORKDIR /home/user
+
+ENTRYPOINT ["python", "audioqna_multilang.py"]
diff --git a/AudioQnA/audioqna_multilang.py b/AudioQnA/audioqna_multilang.py
@@ -0,0 +1,98 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import asyncio
+import base64
+import os
+
+from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType
+
+MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
+MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
+
+WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
+WHISPER_SERVER_PORT = int(os.getenv("WHISPER_SERVER_PORT", 7066))
+GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0")
+GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
+LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
+LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
+
+
+def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
+    print(inputs)
+    if self.services[cur_node].service_type == ServiceType.ASR:
+        # {'byte_str': 'UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA'}
+        inputs["audio"] = inputs["byte_str"]
+        del inputs["byte_str"]
+    elif self.services[cur_node].service_type == ServiceType.LLM:
+        # convert TGI/vLLM to unified OpenAI /v1/chat/completions format
+        next_inputs = {}
+        next_inputs["model"] = "tgi"  # specifically clarify the fake model to make the format unified
+        next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
+        next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
+        next_inputs["top_p"] = llm_parameters_dict["top_p"]
+        next_inputs["stream"] = inputs["streaming"]  # False as default
+        next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
+        # next_inputs["presence_penalty"] = inputs["presence_penalty"]
+        # next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
+        next_inputs["temperature"] = inputs["temperature"]
+        inputs = next_inputs
+    elif self.services[cur_node].service_type == ServiceType.TTS:
+        next_inputs = {}
+        next_inputs["text"] = inputs["choices"][0]["message"]["content"]
+        next_inputs["text_language"] = kwargs["tts_text_language"] if "tts_text_language" in kwargs else "zh"
+        inputs = next_inputs
+    return inputs
+
+
+def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
+    if self.services[cur_node].service_type == ServiceType.TTS:
+        audio_base64 = base64.b64encode(data).decode("utf-8")
+        return {"byte_str": audio_base64}
+    return data
+
+
+class AudioQnAService:
+    def __init__(self, host="0.0.0.0", port=8000):
+        self.host = host
+        self.port = port
+        ServiceOrchestrator.align_inputs = align_inputs
+        ServiceOrchestrator.align_outputs = align_outputs
+        self.megaservice = ServiceOrchestrator()
+
+    def add_remote_service(self):
+        asr = MicroService(
+            name="asr",
+            host=WHISPER_SERVER_HOST_IP,
+            port=WHISPER_SERVER_PORT,
+            # endpoint="/v1/audio/transcriptions",
+            endpoint="/v1/asr",
+            use_remote_service=True,
+            service_type=ServiceType.ASR,
+        )
+        llm = MicroService(
+            name="llm",
+            host=LLM_SERVER_HOST_IP,
+            port=LLM_SERVER_PORT,
+            endpoint="/v1/chat/completions",
+            use_remote_service=True,
+            service_type=ServiceType.LLM,
+        )
+        tts = MicroService(
+            name="tts",
+            host=GPT_SOVITS_SERVER_HOST_IP,
+            port=GPT_SOVITS_SERVER_PORT,
+            # endpoint="/v1/audio/speech",
+            endpoint="/",
+            use_remote_service=True,
+            service_type=ServiceType.TTS,
+        )
+        self.megaservice.add(asr).add(llm).add(tts)
+        self.megaservice.flow_to(asr, llm)
+        self.megaservice.flow_to(llm, tts)
+        self.gateway = AudioQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+
+if __name__ == "__main__":
+    audioqna = AudioQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    audioqna.add_remote_service()