opea-project · cameronmorin · Nov 15, 2024 · Nov 15, 2024 · Nov 15, 2024 · Nov 18, 2024
@@ -128,7 +128,7 @@ body:
       required: false
 
 
-  - type: file
+  - type: textarea
     id: attachments
     attributes:
       label: Attachments

@@ -63,7 +63,7 @@ jobs:
               git clone https://github.com/vllm-project/vllm.git vllm-openvino
               cd ./vllm-openvino && git checkout v0.6.1 && git rev-parse HEAD && cd ../
           fi
-          if [[ $(grep -c "vllm-hpu:" ${docker_compose_yml}) != 0 ]]; then
+          if [[ $(grep -c "vllm-gaudi:" ${docker_compose_yml}) != 0 ]]; then
                git clone https://github.com/HabanaAI/vllm-fork.git vllm-fork
                cd vllm-fork && git checkout 3c39626 && cd ../
           fi
@@ -74,10 +74,17 @@ jobs:
           mode: ${{ inputs.mode }}
         run: |
           build_list=$(bash ${{ github.workspace }}/.github/workflows/scripts/get_cicd_list.sh "${mode}" ${docker_compose_path})
-          echo "build_list=${build_list}" >> $GITHUB_OUTPUT
+          echo "${build_list}"
+          if [ -z "${build_list}" ]; then
+            echo "empty=true" >> $GITHUB_OUTPUT
+            echo "${{ inputs.service }} have no ${mode} part."
+          else
+            echo "empty=false" >> $GITHUB_OUTPUT
+            echo "build_list=${build_list}" >> $GITHUB_OUTPUT
+          fi
 
       - name: Build Image
-        if: ${{ fromJSON(inputs.build) && steps.get-yaml-path.outputs.file_exists == 'true' }}
+        if: ${{ fromJSON(inputs.build) && steps.get-yaml-path.outputs.file_exists == 'true' && steps.get-build-list.outputs.empty == 'false' }}
         uses: opea-project/validation/actions/image-build@main
         with:
           work_dir: ${{ github.workspace }}

@@ -54,9 +54,8 @@ jobs:
           cd ${{ github.workspace }}/tests
           test_cases=$(find . -type f -name "test_${service_l}*.sh")
           for script in $test_cases; do
-            echo $script
-            if echo "$script" | grep -q "on"; then
-              hardware=$(echo $script | cut -d'/' -f3 | cut -d'.' -f1 | awk -F'on_' '{print $2}')
+            if echo "$script" | grep -q "_on"; then
+              hardware=$(echo $script | cut -d'/' -f3 | cut -d'.' -f1 | awk -F'_on_' '{print $2}')
             else
               hardware="intel_cpu"
             fi
@@ -93,14 +92,15 @@ jobs:
           GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
           GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
           PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
+          PREDICTIONGUARD_API_KEY: ${{ secrets.PREDICTIONGUARD_API_KEY }}
           service: ${{ inputs.service }}
           test_case: ${{ matrix.test_case }}
           hardware: ${{ matrix.hardware }}
         run: |
           cd ${{ github.workspace }}/tests
-          service=$(echo "${test_case}" | cut -d'_' -f2- |cut -d'.' -f1)
+          service=$(echo "${test_case}" | cut -d'/' -f3 | cut -d'_' -f2- |cut -d'.' -f1)
           echo "service=${service}" >> $GITHUB_ENV
-          if [ -f ${test_case} ]; then timeout 30m bash ${test_case}; else echo "Test script {${test_case}} not found, skip test!"; fi
+          if [ -f ${test_case} ]; then timeout 60m bash ${test_case}; else echo "Test script {${test_case}} not found, skip test!"; fi
 
       - name: Clean up container
         if: cancelled() || failure()

@@ -7,6 +7,10 @@ services:
     build:
       dockerfile: comps/dataprep/redis/langchain/Dockerfile
     image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+  dataprep-opensearch:
+    build:
+      dockerfile: comps/dataprep/opensearch/langchain/Dockerfile
+    image: ${REGISTRY:-opea}/dataprep-opensearch:${TAG:-latest}
   dataprep-qdrant:
     build:
       dockerfile: comps/dataprep/qdrant/langchain/Dockerfile

@@ -36,12 +36,12 @@ services:
       context: vllm-openvino
       dockerfile: Dockerfile.openvino
     image: ${REGISTRY:-opea}/vllm-openvino:${TAG:-latest}
-  vllm-hpu:
+  vllm-gaudi:
     build:
       context: vllm-fork
       dockerfile: Dockerfile.hpu
     shm_size: '128g'
-    image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
+    image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}
   vllm-arc:
     build:
       dockerfile: comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_gpu

@@ -23,10 +23,10 @@ services:
     build:
       dockerfile: comps/lvms/llava/Dockerfile
     image: ${REGISTRY:-opea}/lvm-llava-svc:${TAG:-latest}
-  llava-hpu:
+  llava-gaudi:
     build:
       dockerfile: comps/lvms/llava/dependency/Dockerfile.intel_hpu
-    image: ${REGISTRY:-opea}/llava-hpu:${TAG:-latest}
+    image: ${REGISTRY:-opea}/llava-gaudi:${TAG:-latest}
   lvm-predictionguard:
     build:
       dockerfile: comps/lvms/predictionguard/Dockerfile

@@ -7,6 +7,10 @@ services:
     build:
       dockerfile: comps/retrievers/redis/langchain/Dockerfile
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
+  retriever-opensearch:
+    build:
+      dockerfile: comps/retrievers/opensearch/langchain/Dockerfile
+    image: ${REGISTRY:-opea}/retriever-opensearch:${TAG:-latest}
   retriever-qdrant:
     build:
       dockerfile: comps/retrievers/qdrant/haystack/Dockerfile

@@ -3,7 +3,7 @@
 
 # this file should be run in the root of the repo
 services:
-  texttosql-langchain:
+  texttosql:
     build:
       dockerfile: comps/texttosql/langchain/Dockerfile
     image: ${REGISTRY:-opea}/texttosql:${TAG:-latest}
@@ -67,12 +67,6 @@ jobs:
           if [ -n "$Dockerfiles" ]; then
             for Dockerfile in $Dockerfiles; do
               service=$(echo "$Dockerfile" | awk -F '/' '{print $2}')
-              if grep -q "$Dockerfile" ../GenAIExamples/**/*build.yaml*; then
-                mode=""  #CI
-              else
-                mode="-cd" #CD
-              fi
-
               yaml_file=${{github.workspace}}/.github/workflows/docker/compose/"$service"-compose
               if ! grep -q "$Dockerfile" "$yaml_file"*yaml; then
                   echo "AR: Update $Dockerfile to .github/workflows/docker/compose/"$service"-compose.yaml. The yaml is used for release images build."

@@ -93,10 +93,10 @@ export vllm_volume=${YOUR_LOCAL_DIR_FOR_MODELS}
 # build vLLM image
 git clone https://github.com/HabanaAI/vllm-fork.git
 cd ./vllm-fork
-docker build -f Dockerfile.hpu -t opea/vllm-hpu:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
+docker build -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy
 
 # TGI serving
-docker run -d --runtime=habana --rm --name "comps-vllm-gaudi-service" -p 8080:80 -v $vllm_volume:/data -e HF_TOKEN=$HF_TOKEN -e HF_HOME=/data -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e VLLM_SKIP_WARMUP=true --cap-add=sys_nice --ipc=host opea/vllm-hpu:latest --model ${model} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs  4096 --max-seq_len-to-capture 8192 --enable-auto-tool-choice --tool-call-parser mistral
+docker run -d --runtime=habana --rm --name "comps-vllm-gaudi-service" -p 8080:80 -v $vllm_volume:/data -e HF_TOKEN=$HF_TOKEN -e HF_HOME=/data -e OMPI_MCA_btl_vader_single_copy_mechanism=none -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e VLLM_SKIP_WARMUP=true --cap-add=sys_nice --ipc=host opea/vllm-gaudi:latest --model ${model} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs  4096 --max-seq_len-to-capture 8192 --enable-auto-tool-choice --tool-call-parser mistral
 
 # check status
 docker logs comps-vllm-gaudi-service

@@ -11,11 +11,11 @@ else
 fi
 
 # Download model weights
-wget https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth -O Wav2Lip/face_detection/detection/sfd/s3fd.pth
+wget --no-verbose https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth -O Wav2Lip/face_detection/detection/sfd/s3fd.pth
 mkdir -p Wav2Lip/checkpoints
-wget "https://iiitaphyd-my.sharepoint.com/:f:/g/personal/radrabha_m_research_iiit_ac_in/Eb3LEzbfuKlJiR600lQWRxgBIY27JZg80f7V9jtMfbNDaQ?download=1" -O Wav2Lip/checkpoints/wav2lip.pth
-wget "https://iiitaphyd-my.sharepoint.com/:f:/g/personal/radrabha_m_research_iiit_ac_in/EdjI7bZlgApMqsVoEUUXpLsBxqXbn5z8VTmoxp55YNDcIA?download=1" -O Wav2Lip/checkpoints/wav2lip_gan.pth
-wget https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth -P gfpgan/experiments/pretrained_models
+wget --no-verbose "https://iiitaphyd-my.sharepoint.com/:f:/g/personal/radrabha_m_research_iiit_ac_in/Eb3LEzbfuKlJiR600lQWRxgBIY27JZg80f7V9jtMfbNDaQ?download=1" -O Wav2Lip/checkpoints/wav2lip.pth
+wget --no-verbose "https://iiitaphyd-my.sharepoint.com/:f:/g/personal/radrabha_m_research_iiit_ac_in/EdjI7bZlgApMqsVoEUUXpLsBxqXbn5z8VTmoxp55YNDcIA?download=1" -O Wav2Lip/checkpoints/wav2lip_gan.pth
+wget --no-verbose https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth -P gfpgan/experiments/pretrained_models
 echo "Face Detector, Wav2Lip, GFPGAN weights downloaded."
 
 # Environment variables

@@ -220,7 +220,11 @@ async def handle_request(self, request: Request):
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
             streaming=stream_opt,
             chat_template=chat_request.chat_template if chat_request.chat_template else None,
-            model=chat_request.model if chat_request.model else None,
+            model=(
+                chat_request.model
+                if chat_request.model
+                else os.getenv("MODEL_ID") if os.getenv("MODEL_ID") else "Intel/neural-chat-7b-v3-3"
+            ),
         )
         retriever_parameters = RetrieverParms(
             search_type=chat_request.search_type if chat_request.search_type else "similarity",
@@ -769,7 +773,7 @@ def __init__(self, megaservice, host="0.0.0.0", port=8889):
             host,
             port,
             str(MegaServiceEndpoint.RETRIEVALTOOL),
-            Union[TextDoc, EmbeddingRequest, ChatCompletionRequest],
+            Union[TextDoc, ChatCompletionRequest],
             Union[RerankedDoc, LLMParamsDoc],
         )
 
@@ -785,7 +789,7 @@ def parser_input(data, TypeClass, key):
 
         data = await request.json()
         query = None
-        for key, TypeClass in zip(["text", "input", "messages"], [TextDoc, EmbeddingRequest, ChatCompletionRequest]):
+        for key, TypeClass in zip(["text", "messages"], [TextDoc, ChatCompletionRequest]):
             query, chat_request = parser_input(data, TypeClass, key)
             if query is not None:
                 break

@@ -155,7 +155,7 @@ class ChatCompletionRequest(BaseModel):
         List[Dict[str, str]],
         List[Dict[str, Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]]],
     ]
-    model: Optional[str] = "Intel/neural-chat-7b-v3-3"
+    model: Optional[str] = None
     frequency_penalty: Optional[float] = 0.0
     logit_bias: Optional[Dict[str, float]] = None
     logprobs: Optional[bool] = False

@@ -30,7 +30,7 @@
     encode_filename,
     get_separators,
     get_tables_result,
-    parse_html,
+    parse_html_new,
     remove_folder_with_ignore,
     save_content_to_local_disk,
 )
@@ -39,17 +39,16 @@
 logflag = os.getenv("LOGFLAG", False)
 
 # workaround notes: cp comps/dataprep/utils.py ./milvus/utils.py
-# from utils import document_loader, get_tables_result, parse_html
 index_params = {"index_type": "FLAT", "metric_type": "IP", "params": {}}
 partition_field_name = "filename"
 upload_folder = "./uploaded_files/"
+milvus_uri = f"http://{MILVUS_HOST}:{MILVUS_PORT}"
 
 
 class MosecEmbeddings(OpenAIEmbeddings):
     def _get_len_safe_embeddings(
         self, texts: List[str], *, engine: str, chunk_size: Optional[int] = None
     ) -> List[List[float]]:
-        _chunk_size = chunk_size or self.chunk_size
         batched_embeddings: List[List[float]] = []
         response = self.client.create(input=texts, **self._invocation_params)
         if not isinstance(response, dict):
@@ -93,7 +92,7 @@ def ingest_chunks_to_milvus(file_name: str, chunks: List):
                 batch_docs,
                 embeddings,
                 collection_name=COLLECTION_NAME,
-                connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT},
+                connection_args={"uri": milvus_uri},
                 partition_key_field=partition_field_name,
             )
         except Exception as e:
@@ -211,7 +210,7 @@ async def ingest_documents(
     my_milvus = Milvus(
         embedding_function=embeddings,
         collection_name=COLLECTION_NAME,
-        connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT},
+        connection_args={"uri": milvus_uri},
         index_params=index_params,
         auto_id=True,
     )
@@ -318,7 +317,7 @@ async def ingest_documents(
                     )
 
             save_path = upload_folder + encoded_link + ".txt"
-            content = parse_html([link])[0][0]
+            content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap)
             await save_content_to_local_disk(save_path, content)
             ingest_data_to_milvus(
                 DocPath(
@@ -347,7 +346,7 @@ async def rag_get_file_structure():
     my_milvus = Milvus(
         embedding_function=embeddings,
         collection_name=COLLECTION_NAME,
-        connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT},
+        connection_args={"uri": milvus_uri},
         index_params=index_params,
         auto_id=True,
     )
@@ -405,7 +404,7 @@ async def delete_single_file(file_path: str = Body(..., embed=True)):
     my_milvus = Milvus(
         embedding_function=embeddings,
         collection_name=COLLECTION_NAME,
-        connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT},
+        connection_args={"uri": milvus_uri},
         index_params=index_params,
         auto_id=True,
     )

@@ -4,6 +4,7 @@ docarray[full]
 docx2txt
 easyocr
 fastapi
+html2text
 huggingface_hub
 langchain
 langchain-community

@@ -48,7 +48,7 @@
     encode_filename,
     get_separators,
     get_tables_result,
-    parse_html,
+    parse_html_new,
     save_content_to_local_disk,
 )
 
@@ -654,7 +654,7 @@ async def ingest_documents(
         for link in link_list:
             encoded_link = encode_filename(link)
             save_path = upload_folder + encoded_link + ".txt"
-            content = parse_html([link])[0][0]
+            content = parse_html_new([link], chunk_size=chunk_size, chunk_overlap=chunk_overlap)
             try:
                 await save_content_to_local_disk(save_path, content)
                 index = ingest_data_to_neo4j(

@@ -6,6 +6,7 @@ easyocr
 fastapi
 future
 graspologic 
+html2text
 huggingface_hub
 ipython
 langchain