NVIDIA · grische · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/.github/workflows/yamllint.yml b/.github/workflows/yamllint.yml
@@ -0,0 +1,14 @@
+---
+name: Yaml Lint
+# yamllint disable-line rule:truthy
+on:
+  workflow_dispatch:
+  push:
+  pull_request:
+jobs:
+  lintAllTheThings:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: yaml-lint
+        uses: ibiqlik/action-yamllint@v3
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -23,5 +23,7 @@ repos:
         files: ^RAG/
         name: isort (python)
         args: ["--multi-line=3", "--trailing-comma", "--force-grid-wrap=0", "--use-parenthese", "--line-width=119", "--ws"]
-
-
+  - repo: https://github.com/google/yamlfmt
+    rev: v0.13.0
+    hooks:
+      - id: yamlfmt
diff --git a/.yamlfmt b/.yamlfmt
@@ -0,0 +1,8 @@
+formatter:
+  type: basic
+  eof_newline: true
+  max_line_length: 120
+  pad_line_comments: 2
+  retain_line_breaks_single: true
+  scan_folded_as_literal: true
+  trim_trailing_whitespace: true
diff --git a/.yamllint b/.yamllint
@@ -0,0 +1,7 @@
+---
+
+extends: default
+
+rules:
+  line-length: "disable"
+  document-start: "disable"
diff --git a/RAG/examples/advanced_rag/multi_turn_rag/docker-compose.yaml b/RAG/examples/advanced_rag/multi_turn_rag/docker-compose.yaml
@@ -1,7 +1,7 @@
 include:
   - path:
-    - ../../local_deploy/docker-compose-vectordb.yaml
-    - ../../local_deploy/docker-compose-nim-ms.yaml
+      - ../../local_deploy/docker-compose-vectordb.yaml
+      - ../../local_deploy/docker-compose-nim-ms.yaml
 
 services:
   chain-server:
@@ -28,7 +28,7 @@ services:
       APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l
       APP_TEXTSPLITTER_CHUNKSIZE: 506
       APP_TEXTSPLITTER_CHUNKOVERLAP: 200
-      APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"} # Leave it blank to avoid using ranking
+      APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"}  # Leave it blank to avoid using ranking
       APP_RANKING_MODELENGINE: ${APP_RANKING_MODELENGINE:-nvidia-ai-endpoints}
       APP_RANKING_SERVERURL: ${APP_RANKING_SERVERURL:-""}
       NVIDIA_API_KEY: ${NVIDIA_API_KEY}
@@ -43,9 +43,9 @@ services:
       ENABLE_TRACING: false
       LOGLEVEL: ${LOGLEVEL:-INFO}
     ports:
-    - "8081:8081"
+      - "8081:8081"
     expose:
-    - "8081"
+      - "8081"
     shm_size: 5gb
     depends_on:
       nemollm-embedding:
@@ -75,11 +75,11 @@ services:
       OTEL_EXPORTER_OTLP_PROTOCOL: grpc
       ENABLE_TRACING: false
     ports:
-    - "8090:8090"
+      - "8090:8090"
     expose:
-    - "8090"
+      - "8090"
     depends_on:
-    - chain-server
+      - chain-server
 
 networks:
   default:

diff --git a/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml b/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml
@@ -1,22 +1,22 @@
 chat_template: |
-    You are a helpful, respectful and honest assistant.
-    Always answer as helpfully as possible, while being safe.
-    Please ensure that your responses are positive in nature.
+  You are a helpful, respectful and honest assistant.
+  Always answer as helpfully as possible, while being safe.
+  Please ensure that your responses are positive in nature.
 
 rag_template: |
-    <s>[INST] <<SYS>>
-    Use the following context to answer the user's question. If you don't know the answer,
-    just say that you don't know, don't try to make up an answer.
-    <</SYS>>
-    <s>[INST] Context: {context_str} Question: {query_str} Only return the helpful
-     answer below and nothing else. Helpful answer:[/INST]
+  <s>[INST] <<SYS>>
+  Use the following context to answer the user's question. If you don't know the answer,
+  just say that you don't know, don't try to make up an answer.
+  <</SYS>>
+  <s>[INST] Context: {context_str} Question: {query_str} Only return the helpful
+   answer below and nothing else. Helpful answer:[/INST]
 
-multi_turn_rag_template: |
-    You are a document chatbot. Help the user as they ask questions about documents.
-    User message just asked: {input}\n\n
-    For this, we have retrieved the following potentially-useful info:
-    Conversation History Retrieved:
-    {history}\n\n
-    Document Retrieved:
-    {context}\n\n
-    Answer only from retrieved data. Make your response conversational.
+multi_turn_rag_template: |-
+  You are a document chatbot. Help the user as they ask questions about documents.
+  User message just asked: {input}\n\n
+  For this, we have retrieved the following potentially-useful info:
+  Conversation History Retrieved:
+  {history}\n\n
+  Document Retrieved:
+  {context}\n\n
+  Answer only from retrieved data. Make your response conversational.
diff --git a/RAG/examples/advanced_rag/multimodal_rag/docker-compose.yaml b/RAG/examples/advanced_rag/multimodal_rag/docker-compose.yaml
@@ -1,7 +1,7 @@
 include:
   - path:
-    - ../../local_deploy/docker-compose-vectordb.yaml
-    - ../../local_deploy/docker-compose-nim-ms.yaml
+      - ../../local_deploy/docker-compose-vectordb.yaml
+      - ../../local_deploy/docker-compose-nim-ms.yaml
 
 services:
   chain-server:
@@ -37,9 +37,9 @@ services:
       ENABLE_TRACING: false
       LOGLEVEL: ${LOGLEVEL:-INFO}
     ports:
-    - "8081:8081"
+      - "8081:8081"
     expose:
-    - "8081"
+      - "8081"
     shm_size: 5gb
     depends_on:
       nemollm-embedding:
@@ -66,9 +66,9 @@ services:
       OTEL_EXPORTER_OTLP_PROTOCOL: grpc
       ENABLE_TRACING: false
     ports:
-    - "8090:8090"
+      - "8090:8090"
     expose:
-    - "8090"
+      - "8090"
     depends_on:
       - chain-server
 

diff --git a/RAG/examples/advanced_rag/multimodal_rag/prompt.yaml b/RAG/examples/advanced_rag/multimodal_rag/prompt.yaml
@@ -1,11 +1,23 @@
 chat_template: |
-    You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant.
-    Always answer as helpfully as possible, while being safe.
-    Please ensure that your responses are positive in nature.
+  You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant.
+  Always answer as helpfully as possible, while being safe.
+  Please ensure that your responses are positive in nature.
 
-rag_template: "You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant. You are an expert in the content of the document provided and can provide information using both text and images. The user may also provide an image input, and you will use the image description to retrieve similar images, tables and text. The context given below will provide some technical or financial documentation and whitepapers to help you answer the question. Based on this context, answer the question truthfully. If the question is not related to this, please refrain from answering. Most importantly, if the context provided does not include information about the question from the user, reply saying that you don't know. Do not utilize any information that is not provided in the documents below. All documents will be preceded by tags, for example [[DOCUMENT 1]], [[DOCUMENT 2]], and so on. You can reference them in your reply but without the brackets, so just say document 1 or 2. The question will be preceded by a [[QUESTION]] tag. Be succinct, clear, and helpful. Remember to describe everything in detail by using the knowledge provided, or reply that you don't know the answer. Do not fabricate any responses. Note that you have the ability to reference images, tables, and other multimodal elements when necessary. You can also refer to the image provided by the user, if any."
+rag_template: "You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant. You
+  are an expert in the content of the document provided and can provide information using both text and images. The user may
+  also provide an image input, and you will use the image description to retrieve similar images, tables and text. The context
+  given below will provide some technical or financial documentation and whitepapers to help you answer the question. Based
+  on this context, answer the question truthfully. If the question is not related to this, please refrain from answering.
+  Most importantly, if the context provided does not include information about the question from the user, reply saying that
+  you don't know. Do not utilize any information that is not provided in the documents below. All documents will be preceded
+  by tags, for example [[DOCUMENT 1]], [[DOCUMENT 2]], and so on. You can reference them in your reply but without the brackets,
+  so just say document 1 or 2. The question will be preceded by a [[QUESTION]] tag. Be succinct, clear, and helpful. Remember
+  to describe everything in detail by using the knowledge provided, or reply that you don't know the answer. Do not fabricate
+  any responses. Note that you have the ability to reference images, tables, and other multimodal elements when necessary.
+  You can also refer to the image provided by the user, if any."
 
 describe_image_prompt: |
   Describe this image in detail:
 
-deplot_summarization_prompt: Your responsibility is to explain charts. You are an expert in describing the responses of linearized tables into plain English text for LLMs to use.
+deplot_summarization_prompt: Your responsibility is to explain charts. You are an expert in describing the responses of linearized
+  tables into plain English text for LLMs to use.
diff --git a/RAG/examples/advanced_rag/query_decomposition_rag/docker-compose.yaml b/RAG/examples/advanced_rag/query_decomposition_rag/docker-compose.yaml
@@ -1,7 +1,7 @@
 include:
   - path:
-    - ../../local_deploy/docker-compose-vectordb.yaml
-    - ../../local_deploy/docker-compose-nim-ms.yaml
+      - ../../local_deploy/docker-compose-vectordb.yaml
+      - ../../local_deploy/docker-compose-nim-ms.yaml
 
 services:
   chain-server:
@@ -40,9 +40,9 @@ services:
       ENABLE_TRACING: false
       LOGLEVEL: ${LOGLEVEL:-INFO}
     ports:
-    - "8081:8081"
+      - "8081:8081"
     expose:
-    - "8081"
+      - "8081"
     shm_size: 5gb
     depends_on:
       nemollm-embedding:
@@ -69,11 +69,11 @@ services:
       OTEL_EXPORTER_OTLP_PROTOCOL: grpc
       ENABLE_TRACING: false
     ports:
-    - "8090:8090"
+      - "8090:8090"
     expose:
-    - "8090"
+      - "8090"
     depends_on:
-    - chain-server
+      - chain-server
 
 networks:
   default:

diff --git a/RAG/examples/advanced_rag/query_decomposition_rag/prompt.yaml b/RAG/examples/advanced_rag/query_decomposition_rag/prompt.yaml
@@ -1,45 +1,47 @@
-chat_template: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are positive in nature."
+chat_template: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.
+  Please ensure that your responses are positive in nature."
 
-rag_template: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user."
+rag_template: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you
+  are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user."
 
 tool_selector_prompt: |
- Your task is to answer questions. If you cannot answer the question, you can request use for a tool and break the question into specific sub questions. Fill with Nil where no action is required. You should only return a JSON containing the tool and the generated sub questions. Consider the contextual information and only ask for information that you do not already have. Do not return any other explanations or text. The output should be a simple JSON structure! You are given two tools:
- - Search
- - Math
- Search tool quickly finds and retrieves relevant answers from a given context, providing accurate and precise information to meet search needs.
- Math tool performs essential operations, including multiplication, addition, subtraction, division, and greater than or less than comparisons, providing accurate results with ease. Utilize math tool when asked to find sum, difference of values.
- Do not pass sub questions to any tool if they already have an answer in the Contextual Information.
- If you have all the information needed to answer the question, mark the Tool_Request as Nil.
-
- Contextual Information:
- {{ context }}
-
- Question:
- {{ question }}
-
- {"Tool_Request": "<Fill>", "Generated Sub Questions": [<Fill>]}
-
-math_tool_prompt: |
- Your task is to identify 2 variables and an operation from given questions. If you cannot answer the question, you can simply return "Not Possible". You should only return a JSON containing the `IsPossible`, `variable1`, `variable2`, and `operation`. Do not return any other explanations or text. The output should be a simple JSON structure!
- You are given two options for `IsPossible`:
- - Possible
- - Not Possible
- `variable1` and `variable2` should be real floating point numbers.
- You are given four options for `operation symbols`:
- - '+' (addition)
- - '-' (subtraction)
- - '*' (multiplication)
- - '/' (division)
- - '=' (equal to)
- - '>' (greater than)
- - '<' (less than)
- - '>=' (greater than or equal to)
- - '<=' (less than or equal to)
-    Only return the symbols for the specified operations and nothing else.
- Contextual Information:
- {{ context }}
-
- Question:
- {{ question }}
-
- {"IsPossible": "<Fill>", "variable1": [<Fill>], "variable2": [<Fill>], "operation": [<Fill>]}
+  Your task is to answer questions. If you cannot answer the question, you can request use for a tool and break the question into specific sub questions. Fill with Nil where no action is required. You should only return a JSON containing the tool and the generated sub questions. Consider the contextual information and only ask for information that you do not already have. Do not return any other explanations or text. The output should be a simple JSON structure! You are given two tools:
+  - Search
+  - Math
+  Search tool quickly finds and retrieves relevant answers from a given context, providing accurate and precise information to meet search needs.
+  Math tool performs essential operations, including multiplication, addition, subtraction, division, and greater than or less than comparisons, providing accurate results with ease. Utilize math tool when asked to find sum, difference of values.
+  Do not pass sub questions to any tool if they already have an answer in the Contextual Information.
+  If you have all the information needed to answer the question, mark the Tool_Request as Nil.
+
+  Contextual Information:
+  {{ context }}
+
+  Question:
+  {{ question }}
+
+  {"Tool_Request": "<Fill>", "Generated Sub Questions": [<Fill>]}
+
+math_tool_prompt: |-
+  Your task is to identify 2 variables and an operation from given questions. If you cannot answer the question, you can simply return "Not Possible". You should only return a JSON containing the `IsPossible`, `variable1`, `variable2`, and `operation`. Do not return any other explanations or text. The output should be a simple JSON structure!
+  You are given two options for `IsPossible`:
+  - Possible
+  - Not Possible
+  `variable1` and `variable2` should be real floating point numbers.
+  You are given four options for `operation symbols`:
+  - '+' (addition)
+  - '-' (subtraction)
+  - '*' (multiplication)
+  - '/' (division)
+  - '=' (equal to)
+  - '>' (greater than)
+  - '<' (less than)
+  - '>=' (greater than or equal to)
+  - '<=' (less than or equal to)
+     Only return the symbols for the specified operations and nothing else.
+  Contextual Information:
+  {{ context }}
+
+  Question:
+  {{ question }}
+
+  {"IsPossible": "<Fill>", "variable1": [<Fill>], "variable2": [<Fill>], "operation": [<Fill>]}
diff --git a/RAG/examples/advanced_rag/structured_data_rag/docker-compose.yaml b/RAG/examples/advanced_rag/structured_data_rag/docker-compose.yaml
@@ -1,6 +1,6 @@
 include:
   - path:
-    - ../../local_deploy/docker-compose-nim-ms.yaml
+      - ../../local_deploy/docker-compose-nim-ms.yaml
 
 services:
   chain-server:
@@ -20,16 +20,19 @@ services:
       APP_LLM_MODELENGINE: nvidia-ai-endpoints
       APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
       APP_LLM_MODELNAMEPANDASAI: ${APP_LLM_MODELNAME:-meta/llama3-70b-instruct}
-      APP_PROMPTS_CHATTEMPLATE: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are positive in nature."
-      APP_PROMPTS_RAGTEMPLATE: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user."
+      APP_PROMPTS_CHATTEMPLATE: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible,
+        while being safe. Please ensure that your responses are positive in nature."
+      APP_PROMPTS_RAGTEMPLATE: "You are a helpful AI assistant named Envie. You will reply to questions only based on the
+        context that you are provided. If something is out of context, you will refrain from replying and politely decline
+        to respond to the user."
       NVIDIA_API_KEY: ${NVIDIA_API_KEY}
       COLLECTION_NAME: ${COLLECTION_NAME:-structured_data_rag}
       CSV_NAME: PdM_machines
       LOGLEVEL: ${LOGLEVEL:-INFO}
     ports:
-    - "8081:8081"
+      - "8081:8081"
     expose:
-    - "8081"
+      - "8081"
     shm_size: 5gb
     depends_on:
       nemollm-inference:
@@ -50,11 +53,11 @@ services:
       APP_SERVERPORT: 8081
       APP_MODELNAME: ${APP_LLM_MODELNAME:-meta/llama3-70b-instruct}
     ports:
-    - "8090:8090"
+      - "8090:8090"
     expose:
-    - "8090"
+      - "8090"
     depends_on:
-    - chain-server
+      - chain-server
 
 networks:
   default: