diff --git a/.github/workflows/yamllint.yml b/.github/workflows/yamllint.yml
new file mode 100644
index 00000000..23dfbe6a
--- /dev/null
+++ b/.github/workflows/yamllint.yml
@@ -0,0 +1,14 @@
+---
+name: Yaml Lint
+# yamllint disable-line rule:truthy
+on:
+  workflow_dispatch:
+  push:
+  pull_request:
+jobs:
+  lintAllTheThings:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: yaml-lint
+        uses: ibiqlik/action-yamllint@v3
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 11fada31..5e43807c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,5 +23,7 @@ repos:
         files: ^RAG/
         name: isort (python)
         args: ["--multi-line=3", "--trailing-comma", "--force-grid-wrap=0", "--use-parenthese", "--line-width=119", "--ws"]
-
-
+  - repo: https://github.com/google/yamlfmt
+    rev: v0.13.0
+    hooks:
+      - id: yamlfmt
diff --git a/.yamlfmt b/.yamlfmt
new file mode 100644
index 00000000..0c4c3682
--- /dev/null
+++ b/.yamlfmt
@@ -0,0 +1,8 @@
+formatter:
+  type: basic
+  eof_newline: true
+  max_line_length: 120
+  pad_line_comments: 2
+  retain_line_breaks_single: true
+  scan_folded_as_literal: true
+  trim_trailing_whitespace: true
diff --git a/.yamllint b/.yamllint
new file mode 100644
index 00000000..5364339c
--- /dev/null
+++ b/.yamllint
@@ -0,0 +1,7 @@
+---
+
+extends: default
+
+rules:
+  line-length: "disable"
+  document-start: "disable"
diff --git a/RAG/examples/advanced_rag/multi_turn_rag/docker-compose.yaml b/RAG/examples/advanced_rag/multi_turn_rag/docker-compose.yaml
index b5e9c43a..79d76ec2 100644
--- a/RAG/examples/advanced_rag/multi_turn_rag/docker-compose.yaml
+++ b/RAG/examples/advanced_rag/multi_turn_rag/docker-compose.yaml
@@ -1,7 +1,7 @@
 include:
   - path:
-    - ../../local_deploy/docker-compose-vectordb.yaml
-    - ../../local_deploy/docker-compose-nim-ms.yaml
+      - ../../local_deploy/docker-compose-vectordb.yaml
+      - ../../local_deploy/docker-compose-nim-ms.yaml
 
 services:
   chain-server:
@@ -28,7 +28,7 @@ services:
       APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l
       APP_TEXTSPLITTER_CHUNKSIZE: 506
       APP_TEXTSPLITTER_CHUNKOVERLAP: 200
-      APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"} # Leave it blank to avoid using ranking
+      APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"}  # Leave it blank to avoid using ranking
       APP_RANKING_MODELENGINE: ${APP_RANKING_MODELENGINE:-nvidia-ai-endpoints}
       APP_RANKING_SERVERURL: ${APP_RANKING_SERVERURL:-""}
       NVIDIA_API_KEY: ${NVIDIA_API_KEY}
@@ -43,9 +43,9 @@ services:
       ENABLE_TRACING: false
       LOGLEVEL: ${LOGLEVEL:-INFO}
     ports:
-    - "8081:8081"
+      - "8081:8081"
     expose:
-    - "8081"
+      - "8081"
     shm_size: 5gb
     depends_on:
       nemollm-embedding:
@@ -75,11 +75,11 @@ services:
       OTEL_EXPORTER_OTLP_PROTOCOL: grpc
       ENABLE_TRACING: false
     ports:
-    - "8090:8090"
+      - "8090:8090"
     expose:
-    - "8090"
+      - "8090"
     depends_on:
-    - chain-server
+      - chain-server
 
 networks:
   default:
diff --git a/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml b/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml
index 5e63a644..60f7575a 100644
--- a/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml
+++ b/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml
@@ -1,22 +1,22 @@
 chat_template: |
-    You are a helpful, respectful and honest assistant.
-    Always answer as helpfully as possible, while being safe.
-    Please ensure that your responses are positive in nature.
+  You are a helpful, respectful and honest assistant.
+  Always answer as helpfully as possible, while being safe.
+  Please ensure that your responses are positive in nature.
 
 rag_template: |
-    <s>[INST] <<SYS>>
-    Use the following context to answer the user's question. If you don't know the answer,
-    just say that you don't know, don't try to make up an answer.
-    <</SYS>>
-    <s>[INST] Context: {context_str} Question: {query_str} Only return the helpful
-     answer below and nothing else. Helpful answer:[/INST]
+  <s>[INST] <<SYS>>
+  Use the following context to answer the user's question. If you don't know the answer,
+  just say that you don't know, don't try to make up an answer.
+  <</SYS>>
+  <s>[INST] Context: {context_str} Question: {query_str} Only return the helpful
+   answer below and nothing else. Helpful answer:[/INST]
 
-multi_turn_rag_template: |
-    You are a document chatbot. Help the user as they ask questions about documents.
-    User message just asked: {input}\n\n
-    For this, we have retrieved the following potentially-useful info:
-    Conversation History Retrieved:
-    {history}\n\n
-    Document Retrieved:
-    {context}\n\n
-    Answer only from retrieved data. Make your response conversational.
+multi_turn_rag_template: |-
+  You are a document chatbot. Help the user as they ask questions about documents.
+  User message just asked: {input}\n\n
+  For this, we have retrieved the following potentially-useful info:
+  Conversation History Retrieved:
+  {history}\n\n
+  Document Retrieved:
+  {context}\n\n
+  Answer only from retrieved data. Make your response conversational.
diff --git a/RAG/examples/advanced_rag/multimodal_rag/docker-compose.yaml b/RAG/examples/advanced_rag/multimodal_rag/docker-compose.yaml
index 948f2652..d8f0d934 100644
--- a/RAG/examples/advanced_rag/multimodal_rag/docker-compose.yaml
+++ b/RAG/examples/advanced_rag/multimodal_rag/docker-compose.yaml
@@ -1,7 +1,7 @@
 include:
   - path:
-    - ../../local_deploy/docker-compose-vectordb.yaml
-    - ../../local_deploy/docker-compose-nim-ms.yaml
+      - ../../local_deploy/docker-compose-vectordb.yaml
+      - ../../local_deploy/docker-compose-nim-ms.yaml
 
 services:
   chain-server:
@@ -37,9 +37,9 @@ services:
       ENABLE_TRACING: false
       LOGLEVEL: ${LOGLEVEL:-INFO}
     ports:
-    - "8081:8081"
+      - "8081:8081"
     expose:
-    - "8081"
+      - "8081"
     shm_size: 5gb
     depends_on:
       nemollm-embedding:
@@ -66,9 +66,9 @@ services:
       OTEL_EXPORTER_OTLP_PROTOCOL: grpc
       ENABLE_TRACING: false
     ports:
-    - "8090:8090"
+      - "8090:8090"
     expose:
-    - "8090"
+      - "8090"
     depends_on:
       - chain-server
 
diff --git a/RAG/examples/advanced_rag/multimodal_rag/prompt.yaml b/RAG/examples/advanced_rag/multimodal_rag/prompt.yaml
index aa63fa28..a62c5c30 100644
--- a/RAG/examples/advanced_rag/multimodal_rag/prompt.yaml
+++ b/RAG/examples/advanced_rag/multimodal_rag/prompt.yaml
@@ -1,11 +1,23 @@
 chat_template: |
-    You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant.
-    Always answer as helpfully as possible, while being safe.
-    Please ensure that your responses are positive in nature.
+  You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant.
+  Always answer as helpfully as possible, while being safe.
+  Please ensure that your responses are positive in nature.
 
-rag_template: "You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant. You are an expert in the content of the document provided and can provide information using both text and images. The user may also provide an image input, and you will use the image description to retrieve similar images, tables and text. The context given below will provide some technical or financial documentation and whitepapers to help you answer the question. Based on this context, answer the question truthfully. If the question is not related to this, please refrain from answering. Most importantly, if the context provided does not include information about the question from the user, reply saying that you don't know. Do not utilize any information that is not provided in the documents below. All documents will be preceded by tags, for example [[DOCUMENT 1]], [[DOCUMENT 2]], and so on. You can reference them in your reply but without the brackets, so just say document 1 or 2. The question will be preceded by a [[QUESTION]] tag. Be succinct, clear, and helpful. Remember to describe everything in detail by using the knowledge provided, or reply that you don't know the answer. Do not fabricate any responses. Note that you have the ability to reference images, tables, and other multimodal elements when necessary. You can also refer to the image provided by the user, if any."
+rag_template: "You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant. You
+  are an expert in the content of the document provided and can provide information using both text and images. The user may
+  also provide an image input, and you will use the image description to retrieve similar images, tables and text. The context
+  given below will provide some technical or financial documentation and whitepapers to help you answer the question. Based
+  on this context, answer the question truthfully. If the question is not related to this, please refrain from answering.
+  Most importantly, if the context provided does not include information about the question from the user, reply saying that
+  you don't know. Do not utilize any information that is not provided in the documents below. All documents will be preceded
+  by tags, for example [[DOCUMENT 1]], [[DOCUMENT 2]], and so on. You can reference them in your reply but without the brackets,
+  so just say document 1 or 2. The question will be preceded by a [[QUESTION]] tag. Be succinct, clear, and helpful. Remember
+  to describe everything in detail by using the knowledge provided, or reply that you don't know the answer. Do not fabricate
+  any responses. Note that you have the ability to reference images, tables, and other multimodal elements when necessary.
+  You can also refer to the image provided by the user, if any."
 
 describe_image_prompt: |
   Describe this image in detail:
 
-deplot_summarization_prompt: Your responsibility is to explain charts. You are an expert in describing the responses of linearized tables into plain English text for LLMs to use.
\ No newline at end of file
+deplot_summarization_prompt: Your responsibility is to explain charts. You are an expert in describing the responses of linearized
+  tables into plain English text for LLMs to use.
diff --git a/RAG/examples/advanced_rag/query_decomposition_rag/docker-compose.yaml b/RAG/examples/advanced_rag/query_decomposition_rag/docker-compose.yaml
index aff87171..bffe3cc9 100644
--- a/RAG/examples/advanced_rag/query_decomposition_rag/docker-compose.yaml
+++ b/RAG/examples/advanced_rag/query_decomposition_rag/docker-compose.yaml
@@ -1,7 +1,7 @@
 include:
   - path:
-    - ../../local_deploy/docker-compose-vectordb.yaml
-    - ../../local_deploy/docker-compose-nim-ms.yaml
+      - ../../local_deploy/docker-compose-vectordb.yaml
+      - ../../local_deploy/docker-compose-nim-ms.yaml
 
 services:
   chain-server:
@@ -40,9 +40,9 @@ services:
       ENABLE_TRACING: false
       LOGLEVEL: ${LOGLEVEL:-INFO}
     ports:
-    - "8081:8081"
+      - "8081:8081"
     expose:
-    - "8081"
+      - "8081"
     shm_size: 5gb
     depends_on:
       nemollm-embedding:
@@ -69,11 +69,11 @@ services:
       OTEL_EXPORTER_OTLP_PROTOCOL: grpc
       ENABLE_TRACING: false
     ports:
-    - "8090:8090"
+      - "8090:8090"
     expose:
-    - "8090"
+      - "8090"
     depends_on:
-    - chain-server
+      - chain-server
 
 networks:
   default:
diff --git a/RAG/examples/advanced_rag/query_decomposition_rag/prompt.yaml b/RAG/examples/advanced_rag/query_decomposition_rag/prompt.yaml
index cc38c8af..c77a9512 100644
--- a/RAG/examples/advanced_rag/query_decomposition_rag/prompt.yaml
+++ b/RAG/examples/advanced_rag/query_decomposition_rag/prompt.yaml
@@ -1,45 +1,47 @@
-chat_template: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are positive in nature."
+chat_template: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.
+  Please ensure that your responses are positive in nature."
 
-rag_template: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user."
+rag_template: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you
+  are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user."
 
 tool_selector_prompt: |
- Your task is to answer questions. If you cannot answer the question, you can request use for a tool and break the question into specific sub questions. Fill with Nil where no action is required. You should only return a JSON containing the tool and the generated sub questions. Consider the contextual information and only ask for information that you do not already have. Do not return any other explanations or text. The output should be a simple JSON structure! You are given two tools:
- - Search
- - Math
- Search tool quickly finds and retrieves relevant answers from a given context, providing accurate and precise information to meet search needs.
- Math tool performs essential operations, including multiplication, addition, subtraction, division, and greater than or less than comparisons, providing accurate results with ease. Utilize math tool when asked to find sum, difference of values.
- Do not pass sub questions to any tool if they already have an answer in the Contextual Information.
- If you have all the information needed to answer the question, mark the Tool_Request as Nil.
-
- Contextual Information:
- {{ context }}
-
- Question:
- {{ question }}
-
- {"Tool_Request": "<Fill>", "Generated Sub Questions": [<Fill>]}
-
-math_tool_prompt: |
- Your task is to identify 2 variables and an operation from given questions. If you cannot answer the question, you can simply return "Not Possible". You should only return a JSON containing the `IsPossible`, `variable1`, `variable2`, and `operation`. Do not return any other explanations or text. The output should be a simple JSON structure!
- You are given two options for `IsPossible`:
- - Possible
- - Not Possible
- `variable1` and `variable2` should be real floating point numbers.
- You are given four options for `operation symbols`:
- - '+' (addition)
- - '-' (subtraction)
- - '*' (multiplication)
- - '/' (division)
- - '=' (equal to)
- - '>' (greater than)
- - '<' (less than)
- - '>=' (greater than or equal to)
- - '<=' (less than or equal to)
-    Only return the symbols for the specified operations and nothing else.
- Contextual Information:
- {{ context }}
-
- Question:
- {{ question }}
-
- {"IsPossible": "<Fill>", "variable1": [<Fill>], "variable2": [<Fill>], "operation": [<Fill>]}
\ No newline at end of file
+  Your task is to answer questions. If you cannot answer the question, you can request use for a tool and break the question into specific sub questions. Fill with Nil where no action is required. You should only return a JSON containing the tool and the generated sub questions. Consider the contextual information and only ask for information that you do not already have. Do not return any other explanations or text. The output should be a simple JSON structure! You are given two tools:
+  - Search
+  - Math
+  Search tool quickly finds and retrieves relevant answers from a given context, providing accurate and precise information to meet search needs.
+  Math tool performs essential operations, including multiplication, addition, subtraction, division, and greater than or less than comparisons, providing accurate results with ease. Utilize math tool when asked to find sum, difference of values.
+  Do not pass sub questions to any tool if they already have an answer in the Contextual Information.
+  If you have all the information needed to answer the question, mark the Tool_Request as Nil.
+
+  Contextual Information:
+  {{ context }}
+
+  Question:
+  {{ question }}
+
+  {"Tool_Request": "<Fill>", "Generated Sub Questions": [<Fill>]}
+
+math_tool_prompt: |-
+  Your task is to identify 2 variables and an operation from given questions. If you cannot answer the question, you can simply return "Not Possible". You should only return a JSON containing the `IsPossible`, `variable1`, `variable2`, and `operation`. Do not return any other explanations or text. The output should be a simple JSON structure!
+  You are given two options for `IsPossible`:
+  - Possible
+  - Not Possible
+  `variable1` and `variable2` should be real floating point numbers.
+  You are given four options for `operation symbols`:
+  - '+' (addition)
+  - '-' (subtraction)
+  - '*' (multiplication)
+  - '/' (division)
+  - '=' (equal to)
+  - '>' (greater than)
+  - '<' (less than)
+  - '>=' (greater than or equal to)
+  - '<=' (less than or equal to)
+     Only return the symbols for the specified operations and nothing else.
+  Contextual Information:
+  {{ context }}
+
+  Question:
+  {{ question }}
+
+  {"IsPossible": "<Fill>", "variable1": [<Fill>], "variable2": [<Fill>], "operation": [<Fill>]}
diff --git a/RAG/examples/advanced_rag/structured_data_rag/docker-compose.yaml b/RAG/examples/advanced_rag/structured_data_rag/docker-compose.yaml
index e8ae62cb..4ce52a0f 100644
--- a/RAG/examples/advanced_rag/structured_data_rag/docker-compose.yaml
+++ b/RAG/examples/advanced_rag/structured_data_rag/docker-compose.yaml
@@ -1,6 +1,6 @@
 include:
   - path:
-    - ../../local_deploy/docker-compose-nim-ms.yaml
+      - ../../local_deploy/docker-compose-nim-ms.yaml
 
 services:
   chain-server:
@@ -20,16 +20,19 @@ services:
       APP_LLM_MODELENGINE: nvidia-ai-endpoints
       APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""}
       APP_LLM_MODELNAMEPANDASAI: ${APP_LLM_MODELNAME:-meta/llama3-70b-instruct}
-      APP_PROMPTS_CHATTEMPLATE: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are positive in nature."
-      APP_PROMPTS_RAGTEMPLATE: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user."
+      APP_PROMPTS_CHATTEMPLATE: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible,
+        while being safe. Please ensure that your responses are positive in nature."
+      APP_PROMPTS_RAGTEMPLATE: "You are a helpful AI assistant named Envie. You will reply to questions only based on the
+        context that you are provided. If something is out of context, you will refrain from replying and politely decline
+        to respond to the user."
       NVIDIA_API_KEY: ${NVIDIA_API_KEY}
       COLLECTION_NAME: ${COLLECTION_NAME:-structured_data_rag}
       CSV_NAME: PdM_machines
       LOGLEVEL: ${LOGLEVEL:-INFO}
     ports:
-    - "8081:8081"
+      - "8081:8081"
     expose:
-    - "8081"
+      - "8081"
     shm_size: 5gb
     depends_on:
       nemollm-inference:
@@ -50,11 +53,11 @@ services:
       APP_SERVERPORT: 8081
       APP_MODELNAME: ${APP_LLM_MODELNAME:-meta/llama3-70b-instruct}
     ports:
-    - "8090:8090"
+      - "8090:8090"
     expose:
-    - "8090"
+      - "8090"
     depends_on:
-    - chain-server
+      - chain-server
 
 networks:
   default:
diff --git a/RAG/examples/advanced_rag/structured_data_rag/prompt.yaml b/RAG/examples/advanced_rag/structured_data_rag/prompt.yaml
index a01430d5..c9d2a11d 100644
--- a/RAG/examples/advanced_rag/structured_data_rag/prompt.yaml
+++ b/RAG/examples/advanced_rag/structured_data_rag/prompt.yaml
@@ -1,11 +1,12 @@
 prompts:
 
-  chat_template: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are positive in nature.
+  chat_template: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.
+    Please ensure that your responses are positive in nature.
 
-  rag_template: You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user.
+  rag_template: You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you
+    are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user.
 
   csv_data_retrieval_template: |
-
     You are an expert data retrieval agent who writes functional python code and utilzes Pandas library in python for data retrieval.
 
     Provide a functional and accurate code based on the provided pandas dataframe for the user's query.
@@ -19,7 +20,6 @@ prompts:
     - dfs is a list containing df a pandas dataframe. Always use the first entry from the list like df = dfs[0].
     {instructions}
 
-
   csv_response_template: |
     Provide a response to user's queries based on the given Data point.
     You are provided with the required data value and your job is to formulate a natural language response based on the data.
@@ -48,14 +48,17 @@ prompts:
             df['age'] = df['age'].str.extract('(\d+)').astype(int)
         - For any age related query, always provide the response as string with the age and its unit together. Unit of age for all machines is in months.
     - name: PdM_errors
-      description: These are errors encountered by the machines while in operating condition. Since, these errors don't shut down the machines, these are not considered as failures. The error date and times are rounded to the closest hour since the telemetry data is collected at an hourly rate.
+      description: These are errors encountered by the machines while in operating condition. Since, these errors don't shut
+        down the machines, these are not considered as failures. The error date and times are rounded to the closest hour
+        since the telemetry data is collected at an hourly rate.
       instructions: |
         - Convert the datetime column to pandas datetime like df['datetime'] = pd.to_datetime(df['datetime'])
         - Use pandas datatime only for filtering date time columns based on date or time. Like df['datetime'].dt.day
         - If year is not mentioned explicitly in queries containing dates, then consider the year to be 2015 by default.
     - name: PdM_failures
-      description: Each record represents replacement of a component due to failure. This data is a subset of Maintenance data. This data is rounded to the closest hour since the telemetry data is collected at an hourly rate.
-      instructions: |
+      description: Each record represents replacement of a component due to failure. This data is a subset of Maintenance
+        data. This data is rounded to the closest hour since the telemetry data is collected at an hourly rate.
+      instructions: |-
         - Convert the datetime column to pandas datetime like df['datetime'] = pd.to_datetime(df['datetime'])
         - Use pandas datatime only for filtering date time columns based on date or time. Like df['datetime'].dt.day
-        - If year is not mentioned explicitly in queries containing dates, then consider the year to be 2015 by default.
\ No newline at end of file
+        - If year is not mentioned explicitly in queries containing dates, then consider the year to be 2015 by default.
diff --git a/RAG/examples/basic_rag/langchain/docker-compose.yaml b/RAG/examples/basic_rag/langchain/docker-compose.yaml
index 33483c56..39db3cb2 100644
--- a/RAG/examples/basic_rag/langchain/docker-compose.yaml
+++ b/RAG/examples/basic_rag/langchain/docker-compose.yaml
@@ -1,7 +1,7 @@
 include:
   - path:
-    - ../../local_deploy/docker-compose-vectordb.yaml
-    - ../../local_deploy/docker-compose-nim-ms.yaml
+      - ../../local_deploy/docker-compose-vectordb.yaml
+      - ../../local_deploy/docker-compose-nim-ms.yaml
 
 services:
   chain-server:
@@ -40,7 +40,7 @@ services:
       APP_TEXTSPLITTER_CHUNKSIZE: 506
       APP_TEXTSPLITTER_CHUNKOVERLAP: 200
       NVIDIA_API_KEY: ${NVIDIA_API_KEY}
-      # vectorstore collection name to store embeddings 
+      # vectorstore collection name to store embeddings
       COLLECTION_NAME: ${COLLECTION_NAME:-nvidia_api_catalog}
       APP_RETRIEVER_TOPK: 4
       APP_RETRIEVER_SCORETHRESHOLD: 0.25
@@ -52,9 +52,9 @@ services:
       # Log level for server, supported level NOTSET, DEBUG, INFO, WARN, ERROR, CRITICAL
       LOGLEVEL: ${LOGLEVEL:-INFO}
     ports:
-    - "8081:8081"
+      - "8081:8081"
     expose:
-    - "8081"
+      - "8081"
     shm_size: 5gb
     depends_on:
       nemollm-embedding:
@@ -86,11 +86,11 @@ services:
       # enable observability in rag playground
       ENABLE_TRACING: false
     ports:
-    - "8090:8090"
+      - "8090:8090"
     expose:
-    - "8090"
+      - "8090"
     depends_on:
-    - chain-server
+      - chain-server
 
 networks:
   default:
diff --git a/RAG/examples/basic_rag/langchain/prompt.yaml b/RAG/examples/basic_rag/langchain/prompt.yaml
index ebbaa8d9..b64776ca 100644
--- a/RAG/examples/basic_rag/langchain/prompt.yaml
+++ b/RAG/examples/basic_rag/langchain/prompt.yaml
@@ -1,9 +1,9 @@
 chat_template: |
-    You are a helpful, respectful and honest assistant. 
-    Always answer as helpfully as possible, while being safe. 
-    Please ensure that your responses are positive in nature.
+  You are a helpful, respectful and honest assistant.
+  Always answer as helpfully as possible, while being safe.
+  Please ensure that your responses are positive in nature.
 
-rag_template: |
-    You are a helpful AI assistant named Envie. 
-    You will reply to questions only based on the context that you are provided. 
-    If something is out of context, you will refrain from replying and politely decline to respond to the user.
\ No newline at end of file
+rag_template: |-
+  You are a helpful AI assistant named Envie.
+  You will reply to questions only based on the context that you are provided.
+  If something is out of context, you will refrain from replying and politely decline to respond to the user.
diff --git a/RAG/examples/basic_rag/llamaindex/docker-compose.yaml b/RAG/examples/basic_rag/llamaindex/docker-compose.yaml
index 520f7037..91213825 100644
--- a/RAG/examples/basic_rag/llamaindex/docker-compose.yaml
+++ b/RAG/examples/basic_rag/llamaindex/docker-compose.yaml
@@ -1,7 +1,7 @@
 include:
   - path:
-    - ../../local_deploy/docker-compose-vectordb.yaml
-    - ../../local_deploy/docker-compose-nim-ms.yaml
+      - ../../local_deploy/docker-compose-vectordb.yaml
+      - ../../local_deploy/docker-compose-nim-ms.yaml
 
 services:
   chain-server:
@@ -36,7 +36,7 @@ services:
       # embedding model engine used for inference, supported type nvidia-ai-endpoints
       APP_LLM_MODELENGINE: ${APP_LLM_MODELENGINE:-nvidia-ai-endpoints}
       NVIDIA_API_KEY: ${NVIDIA_API_KEY}
-      # vectorstore collection name to store embeddings 
+      # vectorstore collection name to store embeddings
       COLLECTION_NAME: ${COLLECTION_NAME:-developer_rag}
       APP_RETRIEVER_TOPK: 4
       APP_RETRIEVER_SCORETHRESHOLD: 0.25
@@ -52,9 +52,9 @@ services:
       # Log level for server, supported level NOTSET, DEBUG, INFO, WARN, ERROR, CRITICAL
       LOGLEVEL: ${LOGLEVEL:-INFO}
     ports:
-    - "8081:8081"
+      - "8081:8081"
     expose:
-    - "8081"
+      - "8081"
     shm_size: 5gb
     depends_on:
       nemollm-embedding:
@@ -64,7 +64,6 @@ services:
         condition: service_healthy
         required: false
 
-
   rag-playground:
     container_name: rag-playground
     image: rag-playground:${TAG:-latest}
@@ -88,11 +87,11 @@ services:
       # enable observability in rag playground
       ENABLE_TRACING: false
     ports:
-    - "8090:8090"
+      - "8090:8090"
     expose:
-    - "8090"
+      - "8090"
     depends_on:
-    - chain-server
+      - chain-server
 
 networks:
   default:
diff --git a/RAG/examples/basic_rag/llamaindex/prompt.yaml b/RAG/examples/basic_rag/llamaindex/prompt.yaml
index 4bf764a0..7afefde6 100644
--- a/RAG/examples/basic_rag/llamaindex/prompt.yaml
+++ b/RAG/examples/basic_rag/llamaindex/prompt.yaml
@@ -1,10 +1,10 @@
 chat_template: |
-    You are a helpful, respectful and honest assistant.
-    Always answer as helpfully as possible, while being safe.
-    Please ensure that your responses are positive in nature.
+  You are a helpful, respectful and honest assistant.
+  Always answer as helpfully as possible, while being safe.
+  Please ensure that your responses are positive in nature.
 
-rag_template: |
-    Use the following context to answer the user's question. If you don't know the answer,
-    just say that you don't know, don't try to make up an answer.
-    Context: {context_str} Question: {query_str} Only return the helpful
-     answer below and nothing else. Helpful answer:
\ No newline at end of file
+rag_template: |-
+  Use the following context to answer the user's question. If you don't know the answer,
+  just say that you don't know, don't try to make up an answer.
+  Context: {context_str} Question: {query_str} Only return the helpful
+   answer below and nothing else. Helpful answer:
diff --git a/RAG/examples/local_deploy/docker-compose-nim-ms.yaml b/RAG/examples/local_deploy/docker-compose-nim-ms.yaml
index 6c73fff0..d06119c8 100644
--- a/RAG/examples/local_deploy/docker-compose-nim-ms.yaml
+++ b/RAG/examples/local_deploy/docker-compose-nim-ms.yaml
@@ -3,12 +3,12 @@ services:
     container_name: nemollm-inference-microservice
     image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.3
     volumes:
-    - ${MODEL_DIRECTORY}:/opt/nim/.cache
+      - ${MODEL_DIRECTORY}:/opt/nim/.cache
     user: "${USERID}"
     ports:
-    - "8000:8000"
+      - "8000:8000"
     expose:
-    - "8000"
+      - "8000"
     environment:
       NGC_API_KEY: ${NGC_API_KEY}
     shm_size: 20gb
@@ -31,11 +31,11 @@ services:
     container_name: nemo-retriever-embedding-microservice
     image: nvcr.io/nim/nvidia/nv-embedqa-e5-v5:1.0.1
     volumes:
-    - ${MODEL_DIRECTORY}:/opt/nim/.cache
+      - ${MODEL_DIRECTORY}:/opt/nim/.cache
     ports:
-    - "9080:8000"
+      - "9080:8000"
     expose:
-    - "8000"
+      - "8000"
     environment:
       NGC_API_KEY: ${NGC_API_KEY}
     user: "${USERID}"
@@ -59,11 +59,11 @@ services:
     container_name: nemo-retriever-ranking-microservice
     image: nvcr.io/nim/nvidia/nv-rerankqa-mistral-4b-v3:1.0.1
     volumes:
-    - ${MODEL_DIRECTORY}:/opt/nim/.cache
+      - ${MODEL_DIRECTORY}:/opt/nim/.cache
     ports:
-    - "1976:8000"
+      - "1976:8000"
     expose:
-    - "8000"
+      - "8000"
     environment:
       NGC_API_KEY: ${NGC_API_KEY}
     user: "${USERID}"
diff --git a/RAG/examples/local_deploy/docker-compose-vectordb.yaml b/RAG/examples/local_deploy/docker-compose-vectordb.yaml
index cd76bc98..4abf756a 100644
--- a/RAG/examples/local_deploy/docker-compose-vectordb.yaml
+++ b/RAG/examples/local_deploy/docker-compose-vectordb.yaml
@@ -3,18 +3,17 @@ services:
     container_name: pgvector
     image: pgvector/pgvector:pg16
     ports:
-    - 5432:5432
+      - 5432:5432
     expose:
-    - "5432"
+      - "5432"
     volumes:
-    - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/data:/var/lib/postgresql/data
+      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/data:/var/lib/postgresql/data
     environment:
-    - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}
-    - POSTGRES_USER=${POSTGRES_USER:-postgres}
-    - POSTGRES_DB=${POSTGRES_DB:-api}
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}
+      - POSTGRES_USER=${POSTGRES_USER:-postgres}
+      - POSTGRES_DB=${POSTGRES_DB:-api}
     profiles: ["pgvector"]
 
-
   etcd:
     container_name: milvus-etcd
     image: quay.io/coreos/etcd:v3.5.5
diff --git a/RAG/tools/evaluation/docker-compose.yaml b/RAG/tools/evaluation/docker-compose.yaml
index d0b84f53..e6f085b3 100644
--- a/RAG/tools/evaluation/docker-compose.yaml
+++ b/RAG/tools/evaluation/docker-compose.yaml
@@ -10,12 +10,13 @@ services:
     volumes:
       - ${DATASET_DIRECTORY}:/data_dir
       - ${RESULT_DIRECTORY}:/result_dir
-    command:  python3 /opt/tools/evaluation/main.py --generate_answer ${GENERATE_ANSWERS:-False} --base_url ${BASE_URL:-0.0.0.0:8081} --docs /data_dir --ga_input /result_dir/qna.json --ga_output /result_dir/eval.json --evaluate ${EVALUATE:-True} --metrics ${METRICS:-ragas} --judge_llm_model ${JUDGE_LLM_MODEL} --ev_input /result_dir/eval.json --ev_result /result_dir/result
+    command: python3 /opt/tools/evaluation/main.py --generate_answer ${GENERATE_ANSWERS:-False} --base_url ${BASE_URL:-0.0.0.0:8081}
+      --docs /data_dir --ga_input /result_dir/qna.json --ga_output /result_dir/eval.json --evaluate ${EVALUATE:-True} --metrics
+      ${METRICS:-ragas} --judge_llm_model ${JUDGE_LLM_MODEL} --ev_input /result_dir/eval.json --ev_result /result_dir/result
     environment:
       NVIDIA_API_KEY: ${NVIDIA_API_KEY}
     shm_size: 5gb
 
-
   synthetic_data_generator:
     container_name: data-generator
     image: data-generator:${TAG:-latest}
@@ -27,7 +28,7 @@ services:
     volumes:
       - ${DATASET_DIRECTORY}:/data_dir
       - ${RESULT_DIRECTORY}:/result_dir
-    command: python3 /opt/tools/evaluation/main.py --docs /data_dir --gd_output /result_dir/qna.json 
+    command: python3 /opt/tools/evaluation/main.py --docs /data_dir --gd_output /result_dir/qna.json
     environment:
       NVIDIA_API_KEY: ${NVIDIA_API_KEY}
 
diff --git a/RAG/tools/observability/configs/jaeger.yaml b/RAG/tools/observability/configs/jaeger.yaml
index 64d3513c..41e3b326 100644
--- a/RAG/tools/observability/configs/jaeger.yaml
+++ b/RAG/tools/observability/configs/jaeger.yaml
@@ -1,3 +1,3 @@
 query.base-path: /jaeger/ui
 cassandra.keyspace: jaeger_v1_dc1
-cassandra.servers: cassandra
\ No newline at end of file
+cassandra.servers: cassandra
diff --git a/RAG/tools/observability/configs/otel-collector-config.yaml b/RAG/tools/observability/configs/otel-collector-config.yaml
index 8e79ef48..8bf59d0a 100644
--- a/RAG/tools/observability/configs/otel-collector-config.yaml
+++ b/RAG/tools/observability/configs/otel-collector-config.yaml
@@ -4,7 +4,7 @@ receivers:
       grpc:
         endpoint: 0.0.0.0:4317
       http:
-        # endpoint: 0.0.0.0:4318
+      # endpoint: 0.0.0.0:4318
 
 processors:
   tail_sampling:
@@ -51,4 +51,3 @@ service:
       receivers: [otlp]
       exporters: [otlp]
       processors: [tail_sampling, transform]
-
diff --git a/RAG/tools/observability/docker-compose.yaml b/RAG/tools/observability/docker-compose.yaml
index c5edab14..4ff48a79 100644
--- a/RAG/tools/observability/docker-compose.yaml
+++ b/RAG/tools/observability/docker-compose.yaml
@@ -1,4 +1,4 @@
-services: 
+services:
   otel-collector:
     container_name: otel-collector
     image: otel/opentelemetry-collector-contrib:0.102.0
@@ -30,7 +30,7 @@ services:
       - ${JAEGER_CONFIG_FILE}:/etc/jaeger.yaml
     depends_on:
       - cassandra-schema
-    
+
   cassandra:
     image: cassandra:4.0
     container_name: cassandra
diff --git a/community/digital-human-security-analyst/conda_env.yml b/community/digital-human-security-analyst/conda_env.yml
index 36ebd7d4..70431737 100644
--- a/community/digital-human-security-analyst/conda_env.yml
+++ b/community/digital-human-security-analyst/conda_env.yml
@@ -15,24 +15,24 @@
 
 name: morpheus
 channels:
-    - rapidsai
-    - nvidia
-    - nvidia/label/dev # For pre-releases of MRC. Should still default to full releases if available
-    - conda-forge
+  - rapidsai
+  - nvidia
+  - nvidia/label/dev  # For pre-releases of MRC. Should still default to full releases if available
+  - conda-forge
 dependencies:
-    ####### Morpheus Dependencies (keep sorted!) #######
-    - boto3
-    - dask
-    - dill
-    - distributed
-    - kfp
-    - librdkafka
-    - mlflow>=2.10.0,<3
-    - nodejs=18.*
-    - nvtabular=23.06
-    - papermill
-    - s3fs>=2023.6
+  ####### Morpheus Dependencies (keep sorted!) #######
+  - boto3
+  - dask
+  - dill
+  - distributed
+  - kfp
+  - librdkafka
+  - mlflow>=2.10.0,<3
+  - nodejs=18.*
+  - nvtabular=23.06
+  - papermill
+  - s3fs>=2023.6
 
-    ##### Pip Dependencies (keep sorted!) #######
-    - pip:
-        - python-logging-loki
+  ##### Pip Dependencies (keep sorted!) #######
+  - pip:
+      - python-logging-loki
diff --git a/community/digital-human-security-analyst/docker-compose.yml b/community/digital-human-security-analyst/docker-compose.yml
index b22b08a6..98c9781f 100644
--- a/community/digital-human-security-analyst/docker-compose.yml
+++ b/community/digital-human-security-analyst/docker-compose.yml
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 version: '3.3'
 
 services:
@@ -28,7 +27,8 @@ services:
     networks:
       - frontend
       - backend
-    command: mlflow server --gunicorn-opts "--log-level debug" --backend-store-uri sqlite:////opt/mlflow/dbdata/mlflow.db --serve-artifacts --artifacts-destination /opt/mlflow/artifacts --host 0.0.0.0
+    command: mlflow server --gunicorn-opts "--log-level debug" --backend-store-uri sqlite:////opt/mlflow/dbdata/mlflow.db
+      --serve-artifacts --artifacts-destination /opt/mlflow/artifacts --host 0.0.0.0
     # Run the container with this command to upgrade if needed: mlflow db upgrade sqlite:////opt/mlflow/dbdata/mlflow.db
     volumes:
       - db_data:/opt/mlflow/dbdata
@@ -37,7 +37,7 @@ services:
   # nim-llm:
   #   image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0
   #   runtime: nvidia
-  #   ulimits: 
+  #   ulimits:
   #     memlock: -1
   #     stack: 67108864
   #   environment:
@@ -65,9 +65,9 @@ services:
       resources:
         reservations:
           devices:
-          - driver: nvidia
-            device_ids: ['2']
-            capabilities: [gpu]
+            - driver: nvidia
+              device_ids: ['2']
+              capabilities: [gpu]
     image: morpheus_jupyter
     container_name: jupyter_security_analyst
     ports:
diff --git a/community/event-driven-rag-cve-analysis/docker-compose.yml b/community/event-driven-rag-cve-analysis/docker-compose.yml
index cba6e620..6036c714 100755
--- a/community/event-driven-rag-cve-analysis/docker-compose.yml
+++ b/community/event-driven-rag-cve-analysis/docker-compose.yml
@@ -38,7 +38,7 @@ services:
         reservations:
           devices:
             - driver: nvidia
-              capabilities: [ gpu ]
+              capabilities: [gpu]
     networks:
       - proxy
     # Uncomment if the .env file issue is resolved: https://github.com/docker/compose/issues/9181#issuecomment-1996016211
diff --git a/community/event-driven-rag-cve-analysis/requirements.yaml b/community/event-driven-rag-cve-analysis/requirements.yaml
index 22d00294..6651e4a3 100644
--- a/community/event-driven-rag-cve-analysis/requirements.yaml
+++ b/community/event-driven-rag-cve-analysis/requirements.yaml
@@ -37,8 +37,8 @@ dependencies:
   ####### Pip Dependencies (keep sorted!) #######
   - pip
   - pip:
-    - google-search-results==2.4
-    - langchain-nvidia-ai-endpoints==0.0.3
-    - langchain==0.1.9
-    - nemollm==0.3.5
-    - pydpkg==1.9.2
+      - google-search-results==2.4
+      - langchain-nvidia-ai-endpoints==0.0.3
+      - langchain==0.1.9
+      - nemollm==0.3.5
+      - pydpkg==1.9.2
diff --git a/community/fm-asr-streaming-rag/deploy/docker-compose-file-replay.yaml b/community/fm-asr-streaming-rag/deploy/docker-compose-file-replay.yaml
index 6bc2d340..1ec2add4 100644
--- a/community/fm-asr-streaming-rag/deploy/docker-compose-file-replay.yaml
+++ b/community/fm-asr-streaming-rag/deploy/docker-compose-file-replay.yaml
@@ -51,4 +51,4 @@ services:
           devices:
             - driver: nvidia
               device_ids: ['${REPLAY_GPU:-0}']
-              capabilities: [gpu]
\ No newline at end of file
+              capabilities: [gpu]
diff --git a/community/fm-asr-streaming-rag/deploy/docker-compose-fm-asr.yaml b/community/fm-asr-streaming-rag/deploy/docker-compose-fm-asr.yaml
index d150d6b5..d188a7bb 100644
--- a/community/fm-asr-streaming-rag/deploy/docker-compose-fm-asr.yaml
+++ b/community/fm-asr-streaming-rag/deploy/docker-compose-fm-asr.yaml
@@ -76,9 +76,9 @@ services:
       resources:
         reservations:
           devices:
-          - driver: nvidia
-            device_ids: ['${FRONTEND_GPU:-0}']
-            capabilities: [gpu]
+            - driver: nvidia
+              device_ids: ['${FRONTEND_GPU:-0}']
+              capabilities: [gpu]
 
   server:
     container_name: fm-asr-chain-server
@@ -110,6 +110,6 @@ services:
       resources:
         reservations:
           devices:
-          - driver: nvidia
-            device_ids: ['${CHAIN_GPU:-0}']
-            capabilities: [gpu]
\ No newline at end of file
+            - driver: nvidia
+              device_ids: ['${CHAIN_GPU:-0}']
+              capabilities: [gpu]
diff --git a/community/fm-asr-streaming-rag/deploy/docker-compose-milvus-standalone.yaml b/community/fm-asr-streaming-rag/deploy/docker-compose-milvus-standalone.yaml
index 4664fda9..e8161a24 100644
--- a/community/fm-asr-streaming-rag/deploy/docker-compose-milvus-standalone.yaml
+++ b/community/fm-asr-streaming-rag/deploy/docker-compose-milvus-standalone.yaml
@@ -23,9 +23,7 @@ services:
       - ETCD_SNAPSHOT_COUNT=50000
     volumes:
       - ${NEMO_RET_DIR?:source compose.env}/volumes/etcd:/etcd:Z
-    command:
-      etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls
-      http://0.0.0.0:2379 --data-dir /etcd
+    command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
     healthcheck:
       test: ["CMD", "etcdctl", "endpoint", "health"]
       interval: 30s
diff --git a/community/fm-asr-streaming-rag/deploy/docker-compose-nemo-retriever.yaml b/community/fm-asr-streaming-rag/deploy/docker-compose-nemo-retriever.yaml
index 78fca84d..565f2304 100644
--- a/community/fm-asr-streaming-rag/deploy/docker-compose-nemo-retriever.yaml
+++ b/community/fm-asr-streaming-rag/deploy/docker-compose-nemo-retriever.yaml
@@ -76,8 +76,7 @@ services:
     command:
       - "/bin/sh"
       - "-c"
-      - "opentelemetry-instrument \
-        uvicorn retrieval.main:app --host 0.0.0.0 --port 8000"
+      - "opentelemetry-instrument uvicorn retrieval.main:app --host 0.0.0.0 --port 8000"
 
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
@@ -99,9 +98,7 @@ services:
     image: nvcr.io/ohlfw0olaadg/ea-participants/nemo-retriever-embedding-microservice:24.02
     ports:
       - "${NEMO_EMBEDDING_PORT:-1985}:8080"
-    command: ./bin/web -p 8080 -n 1  -g
-      model_config_templates/NV-Embed-QA_template.yaml -c
-      /models/nv-embed-qa_v4/NV-Embed-QA-4.nemo
+    command: ./bin/web -p 8080 -n 1  -g model_config_templates/NV-Embed-QA_template.yaml -c /models/nv-embed-qa_v4/NV-Embed-QA-4.nemo
     volumes:
       - ${NEMO_RET_DIR?:source compose.env}/models:/models:ro
     healthcheck:
@@ -149,9 +146,7 @@ services:
       - ETCD_SNAPSHOT_COUNT=50000
     volumes:
       - ${NEMO_RET_DIR?:source compose.env}/volumes/etcd:/etcd:Z
-    command:
-      etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls
-      http://0.0.0.0:2379 --data-dir /etcd
+    command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
     healthcheck:
       test: ["CMD", "etcdctl", "endpoint", "health"]
       interval: 30s
@@ -254,11 +249,11 @@ services:
     volumes:
       - ${NEMO_RET_DIR?:source compose.env}/config/otel-collector-config.yaml:/etc/otel-collector-config.yaml
     ports:
-      - "13133:13133" # health check
-      - "4317:4317" # OTLP over gRPC receiver
-      - "55679:55679" # UI
+      - "13133:13133"  # health check
+      - "4317:4317"  # OTLP over gRPC receiver
+      - "55679:55679"  # UI
 
   zipkin:
     image: openzipkin/zipkin:3.0.6
     ports:
-      - "9411:9411" # Zipkin UI and API
+      - "9411:9411"  # Zipkin UI and API
diff --git a/community/fm-asr-streaming-rag/deploy/docker-compose-nim-build.yaml b/community/fm-asr-streaming-rag/deploy/docker-compose-nim-build.yaml
index 76b4399d..b65312a3 100644
--- a/community/fm-asr-streaming-rag/deploy/docker-compose-nim-build.yaml
+++ b/community/fm-asr-streaming-rag/deploy/docker-compose-nim-build.yaml
@@ -51,4 +51,4 @@ services:
           devices:
             - driver: nvidia
               device_ids: ['${NIM_GPU:-0}']
-              capabilities: [gpu]
\ No newline at end of file
+              capabilities: [gpu]
diff --git a/community/fm-asr-streaming-rag/deploy/docker-compose-nim-llm.yaml b/community/fm-asr-streaming-rag/deploy/docker-compose-nim-llm.yaml
index e4404180..792a7aaa 100644
--- a/community/fm-asr-streaming-rag/deploy/docker-compose-nim-llm.yaml
+++ b/community/fm-asr-streaming-rag/deploy/docker-compose-nim-llm.yaml
@@ -50,4 +50,4 @@ services:
           devices:
             - driver: nvidia
               device_ids: ['${NIM_GPU:-0}']
-              capabilities: [gpu]
\ No newline at end of file
+              capabilities: [gpu]
diff --git a/community/fm-asr-streaming-rag/nemo-retriever/config/milvus-config.yaml b/community/fm-asr-streaming-rag/nemo-retriever/config/milvus-config.yaml
index 160b68d2..07b653f9 100644
--- a/community/fm-asr-streaming-rag/nemo-retriever/config/milvus-config.yaml
+++ b/community/fm-asr-streaming-rag/nemo-retriever/config/milvus-config.yaml
@@ -17,11 +17,11 @@
 # Related configuration of etcd, used to store Milvus metadata & service discovery.
 etcd:
   endpoints: localhost:2379
-  rootPath: by-dev # The root path where data is stored in etcd
-  metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath
-  kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath
+  rootPath: by-dev  # The root path where data is stored in etcd
+  metaSubPath: meta  # metaRootPath = rootPath + '/' + metaSubPath
+  kvSubPath: kv  # kvRootPath = rootPath + '/' + kvSubPath
   log:
-    level: error # Only supports debug, info, warn, error, panic, or fatal. Default 'info'.
+    level: error  # Only supports debug, info, warn, error, panic, or fatal. Default 'info'.
     # path is one of:
     #  - "default" as os.Stderr,
     #  - "stderr" as os.Stderr,
@@ -30,18 +30,18 @@ etcd:
     # please adjust in embedded Milvus: /tmp/milvus/logs/etcd.log
     path: stdout
   ssl:
-    enabled: false # Whether to support ETCD secure connection mode
-    tlsCert: /path/to/etcd-client.pem # path to your cert file
-    tlsKey: /path/to/etcd-client-key.pem # path to your key file
-    tlsCACert: /path/to/ca.pem # path to your CACert file
+    enabled: false  # Whether to support ETCD secure connection mode
+    tlsCert: /path/to/etcd-client.pem  # path to your cert file
+    tlsKey: /path/to/etcd-client-key.pem  # path to your key file
+    tlsCACert: /path/to/ca.pem  # path to your CACert file
     # TLS min version
     # Optional values: 1.0, 1.1, 1.2, 1.3。
     # We recommend using version 1.2 and above.
     tlsMinVersion: 1.3
   use:
-    embed: false # Whether to enable embedded Etcd (an in-process EtcdServer).
+    embed: false  # Whether to enable embedded Etcd (an in-process EtcdServer).
   data:
-    dir: default.etcd # Embedded Etcd only. please adjust in embedded Milvus: /tmp/milvus/etcdData/
+    dir: default.etcd  # Embedded Etcd only. please adjust in embedded Milvus: /tmp/milvus/etcdData/
 
 metastore:
   # Default value: etcd
@@ -54,23 +54,23 @@ metastore:
 tikv:
   # Note that the default pd port of tikv is 2379, which conflicts with etcd.
   endpoints: 127.0.0.1:2389
-  rootPath: by-dev # The root path where data is stored
-  metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath
-  kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath
+  rootPath: by-dev  # The root path where data is stored
+  metaSubPath: meta  # metaRootPath = rootPath + '/' + metaSubPath
+  kvSubPath: kv  # kvRootPath = rootPath + '/' + kvSubPath
 
 localStorage:
-  path: /var/lib/milvus/data/ # please adjust in embedded Milvus: /tmp/milvus/data/
+  path: /var/lib/milvus/data/  # please adjust in embedded Milvus: /tmp/milvus/data/
 
 # Related configuration of MinIO/S3/GCS or any other service supports S3 API, which is responsible for data persistence for Milvus.
 # We refer to the storage service as MinIO/S3 in the following description for simplicity.
 minio:
-  address: localhost # Address of MinIO/S3
-  port: 9000 # Port of MinIO/S3
-  accessKeyID: minioadmin # accessKeyID of MinIO/S3
-  secretAccessKey: minioadmin # MinIO/S3 encryption string
-  useSSL: false # Access to MinIO/S3 with SSL
-  bucketName: a-bucket # Bucket name in MinIO/S3
-  rootPath: files # The root path where the message is stored in MinIO/S3
+  address: localhost  # Address of MinIO/S3
+  port: 9000  # Port of MinIO/S3
+  accessKeyID: minioadmin  # accessKeyID of MinIO/S3
+  secretAccessKey: minioadmin  # MinIO/S3 encryption string
+  useSSL: false  # Access to MinIO/S3 with SSL
+  bucketName: a-bucket  # Bucket name in MinIO/S3
+  rootPath: files  # The root path where the message is stored in MinIO/S3
   # Whether to useIAM role to access S3/GCS instead of access/secret keys
   # For more information, refer to
   # aws: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use.html
@@ -109,14 +109,14 @@ mq:
 
 # Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services.
 pulsar:
-  address: localhost # Address of pulsar
-  port: 6650 # Port of Pulsar
-  webport: 80 # Web port of pulsar, if you connect directly without proxy, should use 8080
-  maxMessageSize: 5242880 # 5 * 1024 * 1024 Bytes, Maximum size of each message in pulsar.
+  address: localhost  # Address of pulsar
+  port: 6650  # Port of Pulsar
+  webport: 80  # Web port of pulsar, if you connect directly without proxy, should use 8080
+  maxMessageSize: 5242880  # 5 * 1024 * 1024 Bytes, Maximum size of each message in pulsar.
   tenant: public
   namespace: default
-  requestTimeout: 60 # pulsar client global request timeout in seconds
-  enableClientMetrics: false # Whether to register pulsar client metrics into milvus metrics path.
+  requestTimeout: 60  # pulsar client global request timeout in seconds
+  enableClientMetrics: false  # Whether to register pulsar client metrics into milvus metrics path.
 
 # If you want to enable kafka, needs to comment the pulsar configs
 # kafka:
@@ -131,11 +131,11 @@ rocksmq:
   # The path where the message is stored in rocksmq
   # please adjust in embedded Milvus: /tmp/milvus/rdb_data
   path: /var/lib/milvus/rdb_data
-  lrucacheratio: 0.06 # rocksdb cache memory ratio
-  rocksmqPageSize: 67108864 # 64 MB, 64 * 1024 * 1024 bytes, The size of each page of messages in rocksmq
-  retentionTimeInMinutes: 4320 # 3 days, 3 * 24 * 60 minutes, The retention time of the message in rocksmq.
-  retentionSizeInMB: 8192 # 8 GB, 8 * 1024 MB, The retention size of the message in rocksmq.
-  compactionInterval: 86400 # 1 day, trigger rocksdb compaction every day to remove deleted data
+  lrucacheratio: 0.06  # rocksdb cache memory ratio
+  rocksmqPageSize: 67108864  # 64 MB, 64 * 1024 * 1024 bytes, The size of each page of messages in rocksmq
+  retentionTimeInMinutes: 4320  # 3 days, 3 * 24 * 60 minutes, The retention time of the message in rocksmq.
+  retentionSizeInMB: 8192  # 8 GB, 8 * 1024 MB, The retention size of the message in rocksmq.
+  compactionInterval: 86400  # 1 day, trigger rocksdb compaction every day to remove deleted data
   # compaction compression type, only support use 0,7.
   # 0 means not compress, 7 will use zstd
   # len of types means num of rocksdb level.
@@ -144,36 +144,36 @@ rocksmq:
 # natsmq configuration.
 # more detail: https://docs.nats.io/running-a-nats-service/configuration
 natsmq:
-  server: # server side configuration for natsmq.
-    port: 4222 # 4222 by default, Port for nats server listening.
-    storeDir: /var/lib/milvus/nats # /var/lib/milvus/nats by default, directory to use for JetStream storage of nats.
-    maxFileStore: 17179869184 # (B) 16GB by default, Maximum size of the 'file' storage.
-    maxPayload: 8388608 # (B) 8MB by default, Maximum number of bytes in a message payload.
-    maxPending: 67108864 # (B) 64MB by default, Maximum number of bytes buffered for a connection Applies to client connections.
-    initializeTimeout: 4000 # (ms) 4s by default, waiting for initialization of natsmq finished.
+  server:  # server side configuration for natsmq.
+    port: 4222  # 4222 by default, Port for nats server listening.
+    storeDir: /var/lib/milvus/nats  # /var/lib/milvus/nats by default, directory to use for JetStream storage of nats.
+    maxFileStore: 17179869184  # (B) 16GB by default, Maximum size of the 'file' storage.
+    maxPayload: 8388608  # (B) 8MB by default, Maximum number of bytes in a message payload.
+    maxPending: 67108864  # (B) 64MB by default, Maximum number of bytes buffered for a connection Applies to client connections.
+    initializeTimeout: 4000  # (ms) 4s by default, waiting for initialization of natsmq finished.
     monitor:
-      trace: false # false by default, If true enable protocol trace log messages.
-      debug: false # false by default, If true enable debug log messages.
-      logTime: true # true by default, If set to false, log without timestamps.
-      logFile: /tmp/milvus/logs/nats.log # /tmp/milvus/logs/nats.log by default, Log file path relative to .. of milvus binary if use relative path.
-      logSizeLimit: 536870912 # (B) 512MB by default, Size in bytes after the log file rolls over to a new one.
+      trace: false  # false by default, If true enable protocol trace log messages.
+      debug: false  # false by default, If true enable debug log messages.
+      logTime: true  # true by default, If set to false, log without timestamps.
+      logFile: /tmp/milvus/logs/nats.log  # /tmp/milvus/logs/nats.log by default, Log file path relative to .. of milvus binary if use relative path.
+      logSizeLimit: 536870912  # (B) 512MB by default, Size in bytes after the log file rolls over to a new one.
     retention:
-      maxAge: 4320 # (min) 3 days by default, Maximum age of any message in the P-channel.
-      maxBytes: # (B) None by default, How many bytes the single P-channel may contain. Removing oldest messages if the P-channel exceeds this size.
-      maxMsgs: # None by default, How many message the single P-channel may contain. Removing oldest messages if the P-channel exceeds this limit.
+      maxAge: 4320  # (min) 3 days by default, Maximum age of any message in the P-channel.
+      maxBytes:  # (B) None by default, How many bytes the single P-channel may contain. Removing oldest messages if the P-channel exceeds this size.
+      maxMsgs:  # None by default, How many message the single P-channel may contain. Removing oldest messages if the P-channel exceeds this limit.
 
 # Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests
 rootCoord:
-  dmlChannelNum: 16 # The number of dml channels created at system startup
-  maxDatabaseNum: 64 # Maximum number of database
-  maxPartitionNum: 4096 # Maximum number of partitions in a collection
-  minSegmentSizeToEnableIndex: 1024 # It's a threshold. When the segment size is less than this value, the segment will not be indexed
-  importTaskExpiration: 900 # (in seconds) Duration after which an import task will expire (be killed). Default 900 seconds (15 minutes).
-  importTaskRetention: 86400 # (in seconds) Milvus will keep the record of import tasks for at least `importTaskRetention` seconds. Default 86400, seconds (24 hours).
+  dmlChannelNum: 16  # The number of dml channels created at system startup
+  maxDatabaseNum: 64  # Maximum number of database
+  maxPartitionNum: 4096  # Maximum number of partitions in a collection
+  minSegmentSizeToEnableIndex: 1024  # It's a threshold. When the segment size is less than this value, the segment will not be indexed
+  importTaskExpiration: 900  # (in seconds) Duration after which an import task will expire (be killed). Default 900 seconds (15 minutes).
+  importTaskRetention: 86400  # (in seconds) Milvus will keep the record of import tasks for at least `importTaskRetention` seconds. Default 86400, seconds (24 hours).
   enableActiveStandby: false
   # can specify ip for example
   # ip: 127.0.0.1
-  ip: # if not specify address, will use the first unicastable address as local ip
+  ip:  # if not specify address, will use the first unicastable address as local ip
   port: 53100
   grpc:
     serverMaxSendSize: 536870912
@@ -183,33 +183,33 @@ rootCoord:
 
 # Related configuration of proxy, used to validate client requests and reduce the returned results.
 proxy:
-  timeTickInterval: 200 # ms, the interval that proxy synchronize the time tick
-  healthCheckTimeout: 3000 # ms, the interval that to do component healthy check
+  timeTickInterval: 200  # ms, the interval that proxy synchronize the time tick
+  healthCheckTimeout: 3000  # ms, the interval that to do component healthy check
   msgStream:
     timeTick:
       bufSize: 512
-  maxNameLength: 255 # Maximum length of name for a collection or alias
+  maxNameLength: 255  # Maximum length of name for a collection or alias
   # Maximum number of fields in a collection.
   # As of today (2.2.0 and after) it is strongly DISCOURAGED to set maxFieldNum >= 64.
   # So adjust at your risk!
   maxFieldNum: 64
-  maxShardNum: 16 # Maximum number of shards in a collection
-  maxDimension: 32768 # Maximum dimension of a vector
+  maxShardNum: 16  # Maximum number of shards in a collection
+  maxDimension: 32768  # Maximum dimension of a vector
   # Whether to produce gin logs.\n
   # please adjust in embedded Milvus: false
   ginLogging: true
-  maxTaskNum: 1024 # max task number of proxy task queue
+  maxTaskNum: 1024  # max task number of proxy task queue
   accessLog:
     enable: false
-    filename: "" # Log filename, leave empty to use stdout.
+    filename: ""  # Log filename, leave empty to use stdout.
     # localPath: /tmp/milvus_accesslog // log file rootpath
     # maxSize: 64 # max log file size of singal log file to trigger rotate.
   http:
-    enabled: true # Whether to enable the http server
-    debug_mode: false # Whether to enable http server debug mode
+    enabled: true  # Whether to enable the http server
+    debug_mode: false  # Whether to enable http server debug mode
   # can specify ip for example
   # ip: 127.0.0.1
-  ip: # if not specify address, will use the first unicastable address as local ip
+  ip:  # if not specify address, will use the first unicastable address as local ip
   port: 19530
   internalPort: 19529
   grpc:
@@ -220,25 +220,25 @@ proxy:
 
 # Related configuration of queryCoord, used to manage topology and load balancing for the query nodes, and handoff from growing segments to sealed segments.
 queryCoord:
-  autoHandoff: true # Enable auto handoff
-  autoBalance: false # Enable auto balance
-  balancer: ScoreBasedBalancer # Balancer to use
-  globalRowCountFactor: 0.1 # expert parameters, only used by scoreBasedBalancer
-  scoreUnbalanceTolerationFactor: 0.05 # expert parameters, only used by scoreBasedBalancer
-  reverseUnBalanceTolerationFactor: 1.3 #expert parameters, only used by scoreBasedBalancer
-  overloadedMemoryThresholdPercentage: 90 # The threshold percentage that memory overload
+  autoHandoff: true  # Enable auto handoff
+  autoBalance: false  # Enable auto balance
+  balancer: ScoreBasedBalancer  # Balancer to use
+  globalRowCountFactor: 0.1  # expert parameters, only used by scoreBasedBalancer
+  scoreUnbalanceTolerationFactor: 0.05  # expert parameters, only used by scoreBasedBalancer
+  reverseUnBalanceTolerationFactor: 1.3  # expert parameters, only used by scoreBasedBalancer
+  overloadedMemoryThresholdPercentage: 90  # The threshold percentage that memory overload
   balanceIntervalSeconds: 60
   memoryUsageMaxDifferencePercentage: 30
   checkInterval: 1000
-  channelTaskTimeout: 60000 # 1 minute
-  segmentTaskTimeout: 120000 # 2 minute
+  channelTaskTimeout: 60000  # 1 minute
+  segmentTaskTimeout: 120000  # 2 minute
   distPullInterval: 500
-  heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available
+  heartbeatAvailableInterval: 10000  # 10s, Only QueryNodes which fetched heartbeats within the duration are available
   loadTimeoutSeconds: 600
   checkHandoffInterval: 5000
   # can specify ip for example
   # ip: 127.0.0.1
-  ip: # if not specify address, will use the first unicastable address as local ip
+  ip:  # if not specify address, will use the first unicastable address as local ip
   port: 19531
   grpc:
     serverMaxSendSize: 536870912
@@ -247,35 +247,35 @@ queryCoord:
     clientMaxRecvSize: 268435456
   taskMergeCap: 1
   taskExecutionCap: 256
-  enableActiveStandby: false # Enable active-standby
-  brokerTimeout: 5000 # broker rpc timeout in milliseconds
+  enableActiveStandby: false  # Enable active-standby
+  brokerTimeout: 5000  # broker rpc timeout in milliseconds
 
 # Related configuration of queryNode, used to run hybrid search between vector and scalar data.
 queryNode:
   dataSync:
     flowGraph:
-      maxQueueLength: 16 # Maximum length of task queue in flowgraph
-      maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph
+      maxQueueLength: 16  # Maximum length of task queue in flowgraph
+      maxParallelism: 1024  # Maximum number of tasks executed in parallel in the flowgraph
   stats:
-    publishInterval: 1000 # Interval for querynode to report node information (milliseconds)
+    publishInterval: 1000  # Interval for querynode to report node information (milliseconds)
   segcore:
-    cgoPoolSizeRatio: 2.0 # cgo pool size ratio to max read concurrency
+    cgoPoolSizeRatio: 2.0  # cgo pool size ratio to max read concurrency
     knowhereThreadPoolNumRatio: 4
     # Use more threads to make better use of SSD throughput in disk index.
     # This parameter is only useful when enable-disk = true.
     # And this value should be a number greater than 1 and less than 32.
-    chunkRows: 1024 # The number of vectors in a chunk.
-    growing: # growing a vector index for growing segment to accelerate search
+    chunkRows: 1024  # The number of vectors in a chunk.
+    growing:  # growing a vector index for growing segment to accelerate search
       enableIndex: true
-      nlist: 128 # growing segment index nlist
-      nprobe: 16 # nprobe to search growing segment, based on your accuracy requirement, must smaller than nlist
-  loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments
-  enableDisk: false # enable querynode load disk index, and search on disk index
+      nlist: 128  # growing segment index nlist
+      nprobe: 16  # nprobe to search growing segment, based on your accuracy requirement, must smaller than nlist
+  loadMemoryUsageFactor: 1  # The multiply factor of calculating the memory usage while loading segments
+  enableDisk: false  # enable querynode load disk index, and search on disk index
   maxDiskUsagePercentage: 95
   cache:
-    enabled: true # deprecated, TODO: remove it
-    memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024 # deprecated, TODO: remove it
-    readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed`
+    enabled: true  # deprecated, TODO: remove it
+    memoryLimit: 2147483648  # 2 GB, 2 * 1024 *1024 *1024 # deprecated, TODO: remove it
+    readAheadPolicy: willneed  # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed`
   grouping:
     enabled: true
     maxNQ: 1000
@@ -289,7 +289,7 @@ queryNode:
     # Max read concurrency must greater than or equal to 1, and less than or equal to runtime.NumCPU * 100.
     # (0, 100]
     maxReadConcurrentRatio: 1
-    cpuRatio: 10 # ratio used to estimate read task cpu usage.
+    cpuRatio: 10  # ratio used to estimate read task cpu usage.
     maxTimestampLag: 86400
     # read task schedule policy: fifo(by default), user-task-polling.
     scheduleReadPolicy:
@@ -303,13 +303,13 @@ queryNode:
       name: fifo
       maxPendingTask: 10240
       # user-task-polling configure:
-      taskQueueExpire: 60 # 1 min by default, expire time of inner user task queue since queue is empty.
-      enableCrossUserGrouping: false # false by default Enable Cross user grouping when using user-task-polling policy. (close it if task of any user can not merge others).
-      maxPendingTaskPerUser: 1024 # 50 by default, max pending task in scheduler per user.
+      taskQueueExpire: 60  # 1 min by default, expire time of inner user task queue since queue is empty.
+      enableCrossUserGrouping: false  # false by default Enable Cross user grouping when using user-task-polling policy. (close it if task of any user can not merge others).
+      maxPendingTaskPerUser: 1024  # 50 by default, max pending task in scheduler per user.
 
   # can specify ip for example
   # ip: 127.0.0.1
-  ip: # if not specify address, will use the first unicastable address as local ip
+  ip:  # if not specify address, will use the first unicastable address as local ip
   port: 21123
   grpc:
     serverMaxSendSize: 536870912
@@ -324,16 +324,16 @@ indexCoord:
     withCred: false
     nodeID: 0
   segment:
-    minSegmentNumRowsToEnableIndex: 1024 # It's a threshold. When the segment num rows is less than this value, the segment will not be indexed
+    minSegmentNumRowsToEnableIndex: 1024  # It's a threshold. When the segment num rows is less than this value, the segment will not be indexed
 
 indexNode:
   scheduler:
     buildParallel: 1
-  enableDisk: true # enable index node build disk vector index
+  enableDisk: true  # enable index node build disk vector index
   maxDiskUsagePercentage: 95
   # can specify ip for example
   # ip: 127.0.0.1
-  ip: # if not specify address, will use the first unicastable address as local ip
+  ip:  # if not specify address, will use the first unicastable address as local ip
   port: 21121
   grpc:
     serverMaxSendSize: 536870912
@@ -343,12 +343,12 @@ indexNode:
 
 dataCoord:
   channel:
-    watchTimeoutInterval: 300 # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer.
-    balanceSilentDuration: 300 # The duration before the channelBalancer on datacoord to run
-    balanceInterval: 360 #The interval for the channelBalancer on datacoord to check balance status
+    watchTimeoutInterval: 300  # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer.
+    balanceSilentDuration: 300  # The duration before the channelBalancer on datacoord to run
+    balanceInterval: 360  # The interval for the channelBalancer on datacoord to check balance status
   segment:
-    maxSize: 512 # Maximum size of a segment in MB
-    diskSegmentMaxSize: 2048 # Maximum size of a segment in MB for collection which has Disk index
+    maxSize: 512  # Maximum size of a segment in MB
+    diskSegmentMaxSize: 2048  # Maximum size of a segment in MB for collection which has Disk index
     sealProportion: 0.23
     # The time of the assignment expiration in ms
     # Warning! this parameter is an expert variable and closely related to data integrity. Without specific
@@ -356,16 +356,16 @@ dataCoord:
     # this parameter, make sure that the newly changed value is larger than the previous value used before restart
     # otherwise there could be a large possibility of data loss
     assignmentExpiration: 2000
-    maxLife: 86400 # The max lifetime of segment in seconds, 24*60*60
+    maxLife: 86400  # The max lifetime of segment in seconds, 24*60*60
     # If a segment didn't accept dml records in maxIdleTime and the size of segment is greater than
     # minSizeFromIdleToSealed, Milvus will automatically seal it.
     # The max idle time of segment in seconds, 10*60.
     maxIdleTime: 600
-    minSizeFromIdleToSealed: 16 # The min size in MB of segment which can be idle from sealed.
+    minSizeFromIdleToSealed: 16  # The min size in MB of segment which can be idle from sealed.
     # The max number of binlog file for one segment, the segment will be sealed if
     # the number of binlog file reaches to max value.
     maxBinlogFileNumber: 32
-    smallProportion: 0.5 # The segment is considered as "small segment" when its # of rows is smaller than
+    smallProportion: 0.5  # The segment is considered as "small segment" when its # of rows is smaller than
     # (smallProportion * segment max # of rows).
     # A compaction will happen on small segments if the segment after compaction will have
     compactableProportion: 0.85
@@ -373,22 +373,22 @@ dataCoord:
     # MUST BE GREATER THAN OR EQUAL TO <smallProportion>!!!
     # During compaction, the size of segment # of rows is able to exceed segment max # of rows by (expansionRate-1) * 100%.
     expansionRate: 1.25
-  enableCompaction: true # Enable data segment compaction
+  enableCompaction: true  # Enable data segment compaction
   compaction:
     enableAutoCompaction: true
-    rpcTimeout: 10 # compaction rpc request timeout in seconds
-    maxParallelTaskNum: 10 # max parallel compaction task number
+    rpcTimeout: 10  # compaction rpc request timeout in seconds
+    maxParallelTaskNum: 10  # max parallel compaction task number
     indexBasedCompaction: true
 
   enableGarbageCollection: true
   gc:
-    interval: 3600 # gc interval in seconds
-    missingTolerance: 3600 # file meta missing tolerance duration in seconds, 3600
-    dropTolerance: 10800 # file belongs to dropped entity tolerance duration in seconds. 10800
+    interval: 3600  # gc interval in seconds
+    missingTolerance: 3600  # file meta missing tolerance duration in seconds, 3600
+    dropTolerance: 10800  # file belongs to dropped entity tolerance duration in seconds. 10800
   enableActiveStandby: false
   # can specify ip for example
   # ip: 127.0.0.1
-  ip: # if not specify address, will use the first unicastable address as local ip
+  ip:  # if not specify address, will use the first unicastable address as local ip
   port: 13333
   grpc:
     serverMaxSendSize: 536870912
@@ -399,16 +399,16 @@ dataCoord:
 dataNode:
   dataSync:
     flowGraph:
-      maxQueueLength: 16 # Maximum length of task queue in flowgraph
-      maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph
-    maxParallelSyncTaskNum: 6 # Maximum number of sync tasks executed in parallel in each flush manager
+      maxQueueLength: 16  # Maximum length of task queue in flowgraph
+      maxParallelism: 1024  # Maximum number of tasks executed in parallel in the flowgraph
+    maxParallelSyncTaskNum: 6  # Maximum number of sync tasks executed in parallel in each flush manager
   segment:
-    insertBufSize: 16777216 # Max buffer size to flush for a single segment.
-    deleteBufBytes: 67108864 # Max buffer size to flush del for a single channel
-    syncPeriod: 600 # The period to sync segments if buffer is not empty.
+    insertBufSize: 16777216  # Max buffer size to flush for a single segment.
+    deleteBufBytes: 67108864  # Max buffer size to flush del for a single channel
+    syncPeriod: 600  # The period to sync segments if buffer is not empty.
   # can specify ip for example
   # ip: 127.0.0.1
-  ip: # if not specify address, will use the first unicastable address as local ip
+  ip:  # if not specify address, will use the first unicastable address as local ip
   port: 21124
   grpc:
     serverMaxSendSize: 536870912
@@ -416,10 +416,10 @@ dataNode:
     clientMaxSendSize: 268435456
     clientMaxRecvSize: 268435456
   memory:
-    forceSyncEnable: true # `true` to force sync if memory usage is too high
-    forceSyncSegmentNum: 1 # number of segments to sync, segments with top largest buffer will be synced.
-    watermarkStandalone: 0.2 # memory watermark for standalone, upon reaching this watermark, segments will be synced.
-    watermarkCluster: 0.5 # memory watermark for cluster, upon reaching this watermark, segments will be synced.
+    forceSyncEnable: true  # `true` to force sync if memory usage is too high
+    forceSyncSegmentNum: 1  # number of segments to sync, segments with top largest buffer will be synced.
+    watermarkStandalone: 0.2  # memory watermark for standalone, upon reaching this watermark, segments will be synced.
+    watermarkCluster: 0.5  # memory watermark for cluster, upon reaching this watermark, segments will be synced.
   timetick:
     byRPC: true
   channel:
@@ -430,14 +430,14 @@ dataNode:
 
 # Configures the system log output.
 log:
-  level: error # Only supports debug, info, warn, error, panic, or fatal. Default 'info'.
+  level: error  # Only supports debug, info, warn, error, panic, or fatal. Default 'info'.
   file:
-    rootPath: # root dir path to put logs, default "" means no log file will print. please adjust in embedded Milvus: /tmp/milvus/logs
-    maxSize: 300 # MB
-    maxAge: 10 # Maximum time for log retention in day.
+    rootPath:  # root dir path to put logs, default "" means no log file will print. please adjust in embedded Milvus: /tmp/milvus/logs
+    maxSize: 300  # MB
+    maxAge: 10  # Maximum time for log retention in day.
     maxBackups: 20
-  format: text # text or json
-  stdout: true # Stdout enable or not
+  format: text  # text or json
+  stdout: true  # Stdout enable or not
 
 grpc:
   log:
@@ -450,9 +450,9 @@ grpc:
     keepAliveTime: 10000
     keepAliveTimeout: 20000
     maxMaxAttempts: 10
-    initialBackOff: 0.2 # seconds
-    maxBackoff: 10 # seconds
-    backoffMultiplier: 2.0 # deprecated
+    initialBackOff: 0.2  # seconds
+    maxBackoff: 10  # seconds
+    backoffMultiplier: 2.0  # deprecated
   clientMaxSendSize: 268435456
   clientMaxRecvSize: 268435456
 
@@ -482,14 +482,14 @@ common:
     queryNodeSubNamePrefix: queryNode
     dataCoordSubNamePrefix: dataCoord
     dataNodeSubNamePrefix: dataNode
-  defaultPartitionName: _default # default partition name for a collection
-  defaultIndexName: _default_idx # default index name
-  entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire
-  indexSliceSize: 16 # MB
+  defaultPartitionName: _default  # default partition name for a collection
+  defaultIndexName: _default_idx  # default index name
+  entityExpiration: -1  # Entity expiration in seconds, CAUTION -1 means never expire
+  indexSliceSize: 16  # MB
   threadCoreCoefficient:
-    highPriority: 10 # This parameter specify how many times the number of threads is the number of cores in high priority thread pool
-    middlePriority: 5 # This parameter specify how many times the number of threads is the number of cores in middle priority thread pool
-    lowPriority: 1 # This parameter specify how many times the number of threads is the number of cores in low priority thread pool
+    highPriority: 10  # This parameter specify how many times the number of threads is the number of cores in high priority thread pool
+    middlePriority: 5  # This parameter specify how many times the number of threads is the number of cores in middle priority thread pool
+    lowPriority: 1  # This parameter specify how many times the number of threads is the number of cores in low priority thread pool
   DiskIndex:
     MaxDegree: 56
     SearchListSize: 100
@@ -498,9 +498,9 @@ common:
     SearchCacheBudgetGBRatio: 0.1
     LoadNumThreadRatio: 8
     BeamWidthRatio: 4
-  gracefulTime: 5000 # milliseconds. it represents the interval (in ms) by which the request arrival time needs to be subtracted in the case of Bounded Consistency.
-  gracefulStopTimeout: 1800 # seconds. it will force quit the server if the graceful stop process is not completed during this time.
-  storageType: minio # please adjust in embedded Milvus: local
+  gracefulTime: 5000  # milliseconds. it represents the interval (in ms) by which the request arrival time needs to be subtracted in the case of Bounded Consistency.
+  gracefulStopTimeout: 1800  # seconds. it will force quit the server if the graceful stop process is not completed during this time.
+  storageType: minio  # please adjust in embedded Milvus: local
   # Default value: auto
   # Valid values: [auto, avx512, avx2, avx, sse4_2]
   # This configuration is only used by querynode and indexnode, it selects CPU instruction set for Searching and Index-building.
@@ -512,8 +512,8 @@ common:
     # superUsers: root
     tlsMode: 0
   session:
-    ttl: 30 # ttl value when session granting a lease to register service
-    retryTimes: 30 # retry times when session sending etcd requests
+    ttl: 30  # ttl value when session granting a lease to register service
+    retryTimes: 30  # retry times when session sending etcd requests
 
   # preCreatedTopic decides whether using existed topic
   preCreatedTopic:
@@ -524,16 +524,16 @@ common:
     # need to set a separated topic to stand for currently consumed timestamp for each channel
     timeticker: "timetick-channel"
 
-  ImportMaxFileSize: 17179869184 # 16 * 1024 * 1024 * 1024
+  ImportMaxFileSize: 17179869184  # 16 * 1024 * 1024 * 1024
   # max file size to import for bulkInsert
 
   locks:
     metrics:
       enable: false
     threshold:
-      info: 500 # minimum milliseconds for printing durations in info level
-      warn: 1000 # minimum milliseconds for printing durations in warn level
-  ttMsgEnabled: true # Whether the instance disable sending ts messages
+      info: 500  # minimum milliseconds for printing durations in info level
+      warn: 1000  # minimum milliseconds for printing durations in warn level
+  ttMsgEnabled: true  # Whether the instance disable sending ts messages
 
 # QuotaConfig, configurations of Milvus quota and limits.
 # By default, we enable:
@@ -547,7 +547,7 @@ common:
 #   4. DQL result rate protection;
 # If necessary, you can also manually force to deny RW requests.
 quotaAndLimits:
-  enabled: true # `true` to enable quota and limits, `false` to disable.
+  enabled: true  # `true` to enable quota and limits, `false` to disable.
   limits:
     maxCollectionNum: 65536
     maxCollectionNumPerDB: 65536
@@ -557,49 +557,49 @@ quotaAndLimits:
   quotaCenterCollectInterval: 3
   ddl:
     enabled: false
-    collectionRate: -1 # qps, default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection
-    partitionRate: -1 # qps, default no limit, rate for CreatePartition, DropPartition, LoadPartition, ReleasePartition
+    collectionRate: -1  # qps, default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection
+    partitionRate: -1  # qps, default no limit, rate for CreatePartition, DropPartition, LoadPartition, ReleasePartition
   indexRate:
     enabled: false
-    max: -1 # qps, default no limit, rate for CreateIndex, DropIndex
+    max: -1  # qps, default no limit, rate for CreateIndex, DropIndex
   flushRate:
     enabled: false
-    max: -1 # qps, default no limit, rate for flush
+    max: -1  # qps, default no limit, rate for flush
   compactionRate:
     enabled: false
-    max: -1 # qps, default no limit, rate for manualCompaction
+    max: -1  # qps, default no limit, rate for manualCompaction
   dml:
     # dml limit rates, default no limit.
     # The maximum rate will not be greater than max.
     enabled: false
     insertRate:
       collection:
-        max: -1 # MB/s, default no limit
-      max: -1 # MB/s, default no limit
+        max: -1  # MB/s, default no limit
+      max: -1  # MB/s, default no limit
     upsertRate:
       collection:
-        max: -1 # MB/s, default no limit
-      max: -1 # MB/s, default no limit
+        max: -1  # MB/s, default no limit
+      max: -1  # MB/s, default no limit
     deleteRate:
       collection:
-        max: -1 # MB/s, default no limit
-      max: -1 # MB/s, default no limit
+        max: -1  # MB/s, default no limit
+      max: -1  # MB/s, default no limit
     bulkLoadRate:
       collection:
-        max: -1 # MB/s, default no limit, not support yet. TODO: limit bulkLoad rate
-      max: -1 # MB/s, default no limit, not support yet. TODO: limit bulkLoad rate
+        max: -1  # MB/s, default no limit, not support yet. TODO: limit bulkLoad rate
+      max: -1  # MB/s, default no limit, not support yet. TODO: limit bulkLoad rate
   dql:
     # dql limit rates, default no limit.
     # The maximum rate will not be greater than max.
     enabled: false
     searchRate:
       collection:
-        max: -1 # vps (vectors per second), default no limit
-      max: -1 # vps (vectors per second), default no limit
+        max: -1  # vps (vectors per second), default no limit
+      max: -1  # vps (vectors per second), default no limit
     queryRate:
       collection:
-        max: -1 # qps, default no limit
-      max: -1 # qps, default no limit
+        max: -1  # qps, default no limit
+      max: -1  # qps, default no limit
   limitWriting:
     # forceDeny false means dml requests are allowed (except for some
     # specific conditions, such as memory of nodes to water marker), true means always reject all dml requests.
@@ -616,10 +616,10 @@ quotaAndLimits:
       # When memoryLowWaterLevel < memory usage < memoryHighWaterLevel, reduce the dml rate;
       # When memory usage < memoryLowWaterLevel, no action.
       enabled: true
-      dataNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in DataNodes
-      dataNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in DataNodes
-      queryNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in QueryNodes
-      queryNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in QueryNodes
+      dataNodeMemoryLowWaterLevel: 0.85  # (0, 1], memoryLowWaterLevel in DataNodes
+      dataNodeMemoryHighWaterLevel: 0.95  # (0, 1], memoryHighWaterLevel in DataNodes
+      queryNodeMemoryLowWaterLevel: 0.85  # (0, 1], memoryLowWaterLevel in QueryNodes
+      queryNodeMemoryHighWaterLevel: 0.95  # (0, 1], memoryHighWaterLevel in QueryNodes
     growingSegmentsSizeProtection:
       # No action will be taken if the growing segments size is less than the low watermark.
       # When the growing segments size exceeds the low watermark, the dml rate will be reduced,
@@ -629,9 +629,9 @@ quotaAndLimits:
       lowWaterLevel: 0.2
       highWaterLevel: 0.4
     diskProtection:
-      enabled: true # When the total file size of object storage is greater than `diskQuota`, all dml requests would be rejected;
-      diskQuota: -1 # MB, (0, +inf), default no limit
-      diskQuotaPerCollection: -1 # MB, (0, +inf), default no limit
+      enabled: true  # When the total file size of object storage is greater than `diskQuota`, all dml requests would be rejected;
+      diskQuota: -1  # MB, (0, +inf), default no limit
+      diskQuotaPerCollection: -1  # MB, (0, +inf), default no limit
   limitReading:
     # forceDeny false means dql requests are allowed (except for some
     # specific conditions, such as collection has been dropped), true means always reject all dql requests.
@@ -669,7 +669,7 @@ trace:
   # Fractions >= 1 will always sample. Fractions < 0 are treated as zero.
   sampleFraction: 0
   jaeger:
-    url: # "http://127.0.0.1:14268/api/traces"
+    url:  # "http://127.0.0.1:14268/api/traces"
     # when exporter is jaeger should set the jaeger's URL
 
 autoIndex:
diff --git a/community/fm-asr-streaming-rag/nemo-retriever/model_configs/nv-rerank-qa-mistral-4b-A6000.yaml b/community/fm-asr-streaming-rag/nemo-retriever/model_configs/nv-rerank-qa-mistral-4b-A6000.yaml
index a88baa3f..6c9b550f 100644
--- a/community/fm-asr-streaming-rag/nemo-retriever/model_configs/nv-rerank-qa-mistral-4b-A6000.yaml
+++ b/community/fm-asr-streaming-rag/nemo-retriever/model_configs/nv-rerank-qa-mistral-4b-A6000.yaml
@@ -14,4 +14,4 @@ models:
       max_shapes: [64, 512]
       max_queue_delay_microseconds: 100
       dtype: float16
-      override_layernorm_precision_to_fp32: true
\ No newline at end of file
+      override_layernorm_precision_to_fp32: true
diff --git a/community/fm-asr-streaming-rag/nim/configs/mistral-7b.yaml b/community/fm-asr-streaming-rag/nim/configs/mistral-7b.yaml
index b978ca5f..9756782c 100644
--- a/community/fm-asr-streaming-rag/nim/configs/mistral-7b.yaml
+++ b/community/fm-asr-streaming-rag/nim/configs/mistral-7b.yaml
@@ -14,7 +14,13 @@ pipeline:
   num_instances: 4
 preprocessor:
   prompt_templates:
-    chat: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'].strip() + '\n\n' %}{% else %}{% set loop_messages = messages %}{% set system_message = '' %}{% endif %}{{ '<s>' }}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{% set content = system_message + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + '</s>' }}{% endif %}{% endfor %}"
+    chat: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'].strip()
+      + '\n\n' %}{% else %}{% set loop_messages = messages %}{% set system_message = '' %}{% endif %}{{ '<s>' }}{% for message
+      in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles
+      must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{% set content = system_message
+      + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{
+      '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' '
+      + '</s>' }}{% endif %}{% endfor %}"
     stop_words: [</s>]
 trt_llm:
   use: true
@@ -36,4 +42,4 @@ trt_llm:
   enable_custom_all_reduce: 0
   per_column_scaling: false
   kv_cache_free_gpu_mem_fraction: 0.3
-  max_tokens_in_paged_kv_cache: "12288"
\ No newline at end of file
+  max_tokens_in_paged_kv_cache: "12288"
diff --git a/community/fm-asr-streaming-rag/sdr-holoscan/params.yml b/community/fm-asr-streaming-rag/sdr-holoscan/params.yml
index 8059054a..f189553a 100644
--- a/community/fm-asr-streaming-rag/sdr-holoscan/params.yml
+++ b/community/fm-asr-streaming-rag/sdr-holoscan/params.yml
@@ -14,30 +14,30 @@
 # limitations under the License.
 
 sensor:
-    sample_rate: 1_000_000  # Sample rate of sensor (Hz)
+  sample_rate: 1_000_000  # Sample rate of sensor (Hz)
 
 network_rx:
-    ip_addr: "0.0.0.0"
-    dst_port: 5005
-    l4_proto: "udp"
-    batch_size: 47104  # Bytes to accumulate before emitting
-    header_bytes: 8
-    max_payload_size: 1472
+  ip_addr: "0.0.0.0"
+  dst_port: 5005
+  l4_proto: "udp"
+  batch_size: 47104  # Bytes to accumulate before emitting
+  header_bytes: 8
+  max_payload_size: 1472
 
 pkt_format:
-    log_period: 5  # Log bandwidth processed every N (seconds)
+  log_period: 5  # Log bandwidth processed every N (seconds)
 
 lowpassfilt:
-    cutoff: 100_000  # Cutoff frequency of filter (Hz)
-    numtaps: 101
+  cutoff: 100_000  # Cutoff frequency of filter (Hz)
+  numtaps: 101
 
 resample:
-    sample_rate_out: 16_000  # Sample rate required by Riva ASR (16KHz PCM)
-    gain: 10.0
+  sample_rate_out: 16_000  # Sample rate required by Riva ASR (16KHz PCM)
+  gain: 10.0
 
 riva:
-    src_lang_code: "en-US"
-    uri: "localhost:50051"       # Riva server URI ("riva_speech_api_port" in riva/config.sh)
-    automatic_punctuation: true
-    verbatim_transcripts: false
-    sample_rate: 16000           # Sample rate required by Riva ASR (16KHz PCM)
\ No newline at end of file
+  src_lang_code: "en-US"
+  uri: "localhost:50051"  # Riva server URI ("riva_speech_api_port" in riva/config.sh)
+  automatic_punctuation: true
+  verbatim_transcripts: false
+  sample_rate: 16000  # Sample rate required by Riva ASR (16KHz PCM)
diff --git a/community/llm-prompt-design-helper/config.yaml b/community/llm-prompt-design-helper/config.yaml
index c806dcf4..a2eae6b7 100644
--- a/community/llm-prompt-design-helper/config.yaml
+++ b/community/llm-prompt-design-helper/config.yaml
@@ -1,66 +1,66 @@
 default: &default
-      system_prompt: >
-            You are an assistant to help answer user's question. Politely answer the question based on your knowledge.
-      few_shot_examples: 
-            # - role: user
-            #   content: |      
-            #       The odd numbers in this group add up to an even number: 4, 8, 9, 15, 12, 2, 1.
+  system_prompt: >
+    You are an assistant to help answer user's question. Politely answer the question based on your knowledge.
+  few_shot_examples:
+  # - role: user
+  #   content: |
+  #       The odd numbers in this group add up to an even number: 4, 8, 9, 15, 12, 2, 1.
 
-            # - role: assistant
-            #   content: |
-            #       The answer is False.
+  # - role: assistant
+  #   content: |
+  #       The answer is False.
 
-            # - role: user
-            #   content: |      
-            #       The odd numbers in this group add up to an even number: 17,  10, 19, 4, 8, 12, 24.
+  # - role: user
+  #   content: |
+  #       The odd numbers in this group add up to an even number: 17,  10, 19, 4, 8, 12, 24.
 
-            # - role: assistant
-            #   content: |
-            #       The answer is True.
-      temperature: 0.0
-      top_p: 0.7
-      max_tokens: 1024
-      seed: 42
+  # - role: assistant
+  #   content: |
+  #       The answer is True.
+  temperature: 0.0
+  top_p: 0.7
+  max_tokens: 1024
+  seed: 42
 
-"meta/llama3-70b-instruct": 
-      system_prompt: >
-            You are an assistant to help answer user's question. Politely answer the question based on your knowledge.
-      # few_shot_examples: 
-      #       - role: user
-      #         content: |      
-      #             The odd numbers in this group add up to an even number: 4, 8, 9, 15, 12, 2, 1.
+"meta/llama3-70b-instruct":
+  system_prompt: >
+    You are an assistant to help answer user's question. Politely answer the question based on your knowledge.
+  # few_shot_examples:
+  #       - role: user
+  #         content: |
+  #             The odd numbers in this group add up to an even number: 4, 8, 9, 15, 12, 2, 1.
 
-      #       - role: assistant
-      #         content: |
-      #             The answer is False.
+  #       - role: assistant
+  #         content: |
+  #             The answer is False.
 
-      #       - role: user
-      #         content: |      
-      #             The odd numbers in this group add up to an even number: 17,  10, 19, 4, 8, 12, 24.
+  #       - role: user
+  #         content: |
+  #             The odd numbers in this group add up to an even number: 17,  10, 19, 4, 8, 12, 24.
 
-      #       - role: assistant
-      #         content: |
-      #             The answer is True.
-      temperature: 0.0
-      top_p: 0.7
-      max_tokens: 1024
-      seed: 42
+  #       - role: assistant
+  #         content: |
+  #             The answer is True.
+  temperature: 0.0
+  top_p: 0.7
+  max_tokens: 1024
+  seed: 42
 "nvidia/nemotron-4-340b-instruct":
-      system_prompt: >
-            You are an assistant to help answer user's question. Politely answer the question based on your knowledge.
-      few_shot_examples: 
-            
-      temperature: 0.1
-      top_p: 0.7
-      max_tokens: 1024
-      seed: 42
+  system_prompt: >
+    You are an assistant to help answer user's question. Politely answer the question based on your knowledge.
+  few_shot_examples:
+
+  temperature: 0.1
+  top_p: 0.7
+  max_tokens: 1024
+  seed: 42
 
 "microsoft/phi-3-mini-128k-instruct":
-      system_prompt: >
-            You are an assistant to help answer user's question. Politely answer the question based on your knowledge.
-      few_shot_examples: 
-            
-      temperature: 0.1
-      top_p: 0.7
-      max_tokens: 1024
-      seed: 42
\ No newline at end of file
+  system_prompt: >
+    You are an assistant to help answer user's question. Politely answer the question based on your knowledge.
+  few_shot_examples:
+
+  temperature: 0.1
+  top_p: 0.7
+  max_tokens: 1024
+  seed: 42
diff --git a/community/oran-chatbot-multimodal/config.yaml b/community/oran-chatbot-multimodal/config.yaml
index 08fcb163..754da5b9 100644
--- a/community/oran-chatbot-multimodal/config.yaml
+++ b/community/oran-chatbot-multimodal/config.yaml
@@ -1,6 +1,6 @@
 ## Default settings
 nvidia_api_key: "nvapi--***"
-## Set these to required models endpoints from NVIDIA NGC 
+## Set these to required models endpoints from NVIDIA NGC
 llm_model: "mistralai/mixtral-8x7b-instruct-v0.1"
 embedding_model: "nvidia/nv-embedqa-e5-v5"
 reranker_model: "cross-encoder/ms-marco-MiniLM-L-6-v2"
diff --git a/community/rag-developer-chatbot/docker-compose-dev-rag.yaml b/community/rag-developer-chatbot/docker-compose-dev-rag.yaml
index ac0e9286..ddfae574 100644
--- a/community/rag-developer-chatbot/docker-compose-dev-rag.yaml
+++ b/community/rag-developer-chatbot/docker-compose-dev-rag.yaml
@@ -7,13 +7,13 @@ services:
       context: .
       dockerfile: Dockerfile.notebook
     ports:
-    - "8888:8888"
-    - "7860:7860"
+      - "8888:8888"
+      - "7860:7860"
     expose:
-    - "8888"
-    - "7860"
+      - "8888"
+      - "7860"
     env_file:
-    - compose.env
+      - compose.env
     deploy:
       resources:
         reservations:
diff --git a/community/streaming_ingest_rag/docker-compose.yml b/community/streaming_ingest_rag/docker-compose.yml
index eb4946cf..d7111b26 100644
--- a/community/streaming_ingest_rag/docker-compose.yml
+++ b/community/streaming_ingest_rag/docker-compose.yml
@@ -64,7 +64,7 @@ services:
     command: ["milvus", "run", "standalone"]
     hostname: milvus
     security_opt:
-    - seccomp:unconfined
+      - seccomp:unconfined
     environment:
       ETCD_ENDPOINTS: etcd:2379
       MINIO_ADDRESS: minio:9000
@@ -146,7 +146,7 @@ services:
     depends_on:
       - kafka
     container_name: init-kafka
-    entrypoint: [ '/bin/sh', '-c' ]
+    entrypoint: ['/bin/sh', '-c']
     command: |
       "
       # blocks until kafka is reachable
@@ -160,7 +160,7 @@ services:
       kafka-topics --bootstrap-server kafka:19092 --list
       "
     networks:
-     - default
+      - default
 
   streaming-ingest-dev:
     restart: always
@@ -177,9 +177,9 @@ services:
       resources:
         reservations:
           devices:
-          - driver: nvidia
-            device_ids: ['0']
-            capabilities: [gpu]
+            - driver: nvidia
+              device_ids: ['0']
+              capabilities: [gpu]
     image: streaming_ingest_morpheus_jupyter
     container_name: streaming-ingest-dev
     ports:
@@ -215,9 +215,9 @@ services:
       resources:
         reservations:
           devices:
-          - driver: nvidia
-            device_ids: ['0']
-            capabilities: [gpu]
+            - driver: nvidia
+              device_ids: ['0']
+              capabilities: [gpu]
     networks:
       - default
     environment:
@@ -268,14 +268,14 @@ services:
       "--model-control-mode=explicit",
       "--load-model",
       "all-MiniLM-L6-v2",
-      ]
+    ]
     deploy:
       resources:
         reservations:
           devices:
-          - driver: nvidia
-            device_ids: ['0']
-            capabilities: [gpu]
+            - driver: nvidia
+              device_ids: ['0']
+              capabilities: [gpu]
     healthcheck:
       test: ["CMD", "curl", "-f", "localhost:8000/v2/health/ready"]
       interval: 30s
diff --git a/community/streaming_ingest_rag/docker/conda/environments/conda_env.yaml b/community/streaming_ingest_rag/docker/conda/environments/conda_env.yaml
index 3eee24c8..20c350f2 100644
--- a/community/streaming_ingest_rag/docker/conda/environments/conda_env.yaml
+++ b/community/streaming_ingest_rag/docker/conda/environments/conda_env.yaml
@@ -18,7 +18,7 @@ channels:
   - defaults
 dependencies:
   - arxiv=1.4
-  - onnx # required for triton model export
+  - onnx  # required for triton model export
   - pip
   - pypdf=3.16
   - newspaper3k=0.2
@@ -35,9 +35,9 @@ dependencies:
 
   ####### Pip Dependencies (keep sorted!) #######
   - pip:
-    - farm-haystack[file-conversion]
-    - grpcio-status==1.58 # To keep in sync with 1.58 grpcio which is installed for Morpheus
-    - langchain==0.0.310
-    - pymilvus==2.3.2 # The conda package is woefully out of date and incorrect
-    - sentence_transformers==2.3.0
-    - PyMuPDF==1.23.21
+      - farm-haystack[file-conversion]
+      - grpcio-status==1.58  # To keep in sync with 1.58 grpcio which is installed for Morpheus
+      - langchain==0.0.310
+      - pymilvus==2.3.2  # The conda package is woefully out of date and incorrect
+      - sentence_transformers==2.3.0
+      - PyMuPDF==1.23.21
diff --git a/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/kafka_config.yaml b/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/kafka_config.yaml
index 95b77972..36026793 100644
--- a/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/kafka_config.yaml
+++ b/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/kafka_config.yaml
@@ -38,7 +38,7 @@ vdb_pipeline:
           module_output_id: "output"
           transform_type: web_scraper
         deserialize_config:
-          output_batch_size: 2048 # Number of chunked documents per output batch
+          output_batch_size: 2048  # Number of chunked documents per output batch
         kafka_config:
           max_batch_size: 64
           bootstrap_servers: "kafka:19092"
@@ -65,13 +65,13 @@ vdb_pipeline:
       config:
         stage_config:
           enable_monitor: true
-          run_indefinitely: true # TODO map to kafka source
+          run_indefinitely: true  # TODO map to kafka source
           namespace: "morpheus_examples_llm"
           module_id: "kafka_source_pipe"
           module_output_id: "output"
           transform_type: raw_chunker
         deserialize_config:
-          output_batch_size: 2048 # Number of chunked documents per output batch
+          output_batch_size: 2048  # Number of chunked documents per output batch
         kafka_config:
           max_batch_size: 256
           bootstrap_servers: "kafka:19092"
@@ -88,7 +88,7 @@ vdb_pipeline:
           chunk_size: 512
           payload_column: "payload"
         vdb_config:
-          vdb_resource_name: "vdb_kafka_raw"          
+          vdb_resource_name: "vdb_kafka_raw"
 
   tokenizer:
     model_kwargs:
@@ -100,11 +100,11 @@ vdb_pipeline:
     model_name: "bert-base-uncased-hash"
 
   vdb:
-    batch_size: 16384 # Vector DB max batch size
+    batch_size: 16384  # Vector DB max batch size
     resource_name: "vdb_kafka_raw"  # Identifier for the resource in the vector database
     embedding_size: 384
-    write_time_interval: 20 # Max time between successive uploads
-    recreate: False  # Whether to recreate the resource if it already exists
+    write_time_interval: 20  # Max time between successive uploads
+    recreate: false  # Whether to recreate the resource if it already exists
     service: "milvus"  # Specify the type of vector database
     uri: "http://milvus:19530"  # URI for connecting to the Vector Database server
     resource_schemas:
@@ -153,45 +153,5 @@ vdb_pipeline:
             - name: embedding
               dtype: FLOAT_VECTOR
               description: Embedding vectors representing the data entry
-              dim: 384 # Size of the embeddings to store in the vector database
-          description: Collection schema for diverse data sources            
-
-      vdb_kafka_scrape:
-        index_conf:
-          field_name: embedding
-          metric_type: L2
-          index_type: HNSW
-          params:
-            M: 8
-            efConstruction: 64
-
-        schema_conf:
-          enable_dynamic_field: true
-          schema_fields:
-            - name: id
-              dtype: INT64
-              description: Primary key for the collection
-              is_primary: true
-              auto_id: true
-            - name: title
-              dtype: VARCHAR
-              description: Title or heading of the data entry
-              max_length: 65_535
-            - name: source
-              dtype: VARCHAR
-              description: Source or origin of the data entry
-              max_length: 65_535
-            - name: summary
-              dtype: VARCHAR
-              description: Brief summary or abstract of the data content
-              max_length: 65_535
-            - name: content
-              dtype: VARCHAR
-              description: Main content or body of the data entry
-              max_length: 65_535
-            - name: embedding
-              dtype: FLOAT_VECTOR
-              description: Embedding vectors representing the data entry
-              dim: 384 # Size of the embeddings to store in the vector database
+              dim: 384  # Size of the embeddings to store in the vector database
           description: Collection schema for diverse data sources
-               
\ No newline at end of file
diff --git a/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_config.yaml b/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_config.yaml
index 340cbe3c..f0b8d8c3 100644
--- a/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_config.yaml
+++ b/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_config.yaml
@@ -32,11 +32,11 @@ vdb_pipeline:
     - type: rss
       name: "rss_cve"
       config:
-        batch_size: 128 # Number of rss feeds per batch
+        batch_size: 128  # Number of rss feeds per batch
         cache_dir: "./.cache/http"
         cooldown_interval_sec: 600
-        enable_cache: False
-        enable_monitor: True
+        enable_cache: false
+        enable_monitor: true
         feed_input:
           - "https://www.theregister.com/security/headlines.atom"
           - "https://isc.sans.edu/dailypodcast.xml"
@@ -71,7 +71,7 @@ vdb_pipeline:
           - "https://blog.google/threat-analysis-group/rss/"
           - "https://intezer.com/feed/"
         interval_sec: 600
-        output_batch_size: 2048 # Number of chunked documents per output batch
+        output_batch_size: 2048  # Number of chunked documents per output batch
         request_timeout_sec: 2.0
         run_indefinitely: true
         stop_after_sec: 0
@@ -89,9 +89,9 @@ vdb_pipeline:
           chunk_size: 512
           num_threads: 10
           chunk_overlap: 51
-        enable_monitor: True
+        enable_monitor: true
         filenames:
-          - "./morpheus/data/randomly_generated_cybersecurity_text.txt" # will need to supply
+          - "./morpheus/data/randomly_generated_cybersecurity_text.txt"  # will need to supply
         vdb_resource_name: "vdb_pdf"
         watch: false
 
@@ -105,10 +105,10 @@ vdb_pipeline:
           csv:
             chunk_overlap: 51
             chunk_size: 1024
-            text_column_names: # For CSV files, the data from each text_column_name will be concatenated together.
+            text_column_names:  # For CSV files, the data from each text_column_name will be concatenated together.
               - "raw"  # Requires same schema for all CSV files.
               - "request_header_referer"
-        enable_monitor: True
+        enable_monitor: true
         filenames:
           - "./models/datasets/training-data/log-parsing-training-data.csv"
         vdb_resource_name: "vdb_csv"
@@ -118,7 +118,7 @@ vdb_pipeline:
       name: "custom_source_text"
       config:
         batch_size: 1024
-        enable_monitor: True
+        enable_monitor: true
         extractor_config:
           chunk_size: 512
           chunk_overlap: 51
@@ -144,7 +144,7 @@ vdb_pipeline:
     batch_size: 5120
     resource_name: "VDBGENERAL"  # Identifier for the resource in the vector database
     embedding_size: 384
-    recreate: True  # Whether to recreate the resource if it already exists
+    recreate: true  # Whether to recreate the resource if it already exists
     service: "milvus"  # Specify the type of vector database
     uri: "http://localhost:19530"  # URI for connecting to the Vector Database server
     resource_schemas:
@@ -184,7 +184,7 @@ vdb_pipeline:
             - name: embedding
               dtype: FLOAT_VECTOR
               description: Embedding vectors representing the data entry
-              dim: 384 # Size of the embeddings to store in the vector database
+              dim: 384  # Size of the embeddings to store in the vector database
           description: Collection schema for diverse data sources
       vdb_pdf:
         index_conf:
@@ -222,7 +222,7 @@ vdb_pipeline:
             - name: embedding
               dtype: FLOAT_VECTOR
               description: Embedding vectors representing the data entry
-              dim: 384 # Size of the embeddings to store in the vector database
+              dim: 384  # Size of the embeddings to store in the vector database
           description: Collection schema for diverse data sources
       vdb_csv:
         index_conf:
@@ -260,7 +260,7 @@ vdb_pipeline:
             - name: embedding
               dtype: FLOAT_VECTOR
               description: Embedding vectors representing the data entry
-              dim: 384 # Size of the embeddings to store in the vector database
+              dim: 384  # Size of the embeddings to store in the vector database
           description: Collection schema for diverse data sources
       vdb_rss:
         index_conf:
@@ -298,5 +298,5 @@ vdb_pipeline:
             - name: embedding
               dtype: FLOAT_VECTOR
               description: Embedding vectors representing the data entry
-              dim: 384 # Size of the embeddings to store in the vector database
-          description: Collection schema for diverse data sources
\ No newline at end of file
+              dim: 384  # Size of the embeddings to store in the vector database
+          description: Collection schema for diverse data sources
diff --git a/industries/healthcare/agentic-healthcare-front-desk/docker-compose.yaml b/industries/healthcare/agentic-healthcare-front-desk/docker-compose.yaml
index 8bc03afc..299bf046 100644
--- a/industries/healthcare/agentic-healthcare-front-desk/docker-compose.yaml
+++ b/industries/healthcare/agentic-healthcare-front-desk/docker-compose.yaml
@@ -3,54 +3,52 @@ services:
     container_name: chain-server-healthcare-assistant
     image: chain-server-healthcare-assistant:${TAG:-latest}
     env_file:
-    - path: ./vars.env
-      required: true 
+      - path: ./vars.env
+        required: true
     build:
       context: ./
       dockerfile: Dockerfile
     entrypoint: python3 chain_server/chain_server.py --assistant intake --port 8081
     ports:
-    - "8081:8081"
+      - "8081:8081"
     expose:
-    - "8081"
+      - "8081"
     volumes:
       - ./graph_definitions/graph_images:/graph_images
     shm_size: 5gb
 
-
   patient-intake-ui:
     container_name: patient-intake-ui
     image: patient-intake-ui:${TAG:-latest}
     env_file:
-    - path: ./vars.env
-      required: true 
+      - path: ./vars.env
+        required: true
     build:
       context: ./
       dockerfile: Dockerfile
     entrypoint: python3 graph_definitions/graph_patient_intake_only.py --port 7860
     ports:
-    - "7860:7860"
+      - "7860:7860"
     expose:
-    - "7860"
+      - "7860"
     volumes:
       - ./graph_definitions/graph_images:/graph_images
     shm_size: 5gb
 
-
   appointment-making-ui:
     container_name: appointment-making-ui
     image: appointment-making-ui:${TAG:-latest}
     env_file:
-    - path: ./vars.env
-      required: true 
+      - path: ./vars.env
+        required: true
     build:
       context: ./
       dockerfile: Dockerfile
     entrypoint: python3 graph_definitions/graph_appointment_making_only.py --port 7860
     ports:
-    - "7860:7860"
+      - "7860:7860"
     expose:
-    - "7860"
+      - "7860"
     volumes:
       - ./graph_definitions/graph_images:/graph_images
     shm_size: 5gb
@@ -59,16 +57,16 @@ services:
     container_name: medication-lookup-ui
     image: medication-lookup-ui:${TAG:-latest}
     env_file:
-    - path: ./vars.env
-      required: true 
+      - path: ./vars.env
+        required: true
     build:
       context: ./
       dockerfile: Dockerfile
     entrypoint: python3 graph_definitions/graph_medication_lookup_only.py --port 7860
     ports:
-    - "7860:7860"
+      - "7860:7860"
     expose:
-    - "7860"
+      - "7860"
     volumes:
       - ./graph_definitions/graph_images:/graph_images
     shm_size: 5gb
@@ -77,16 +75,16 @@ services:
     container_name: full-agent-ui
     image: full-agent-ui:${TAG:-latest}
     env_file:
-    - path: ./vars.env
-      required: true 
+      - path: ./vars.env
+        required: true
     build:
       context: ./
       dockerfile: Dockerfile
     entrypoint: python3 graph_definitions/graph.py --port 7860
     ports:
-    - "7860:7860"
+      - "7860:7860"
     expose:
-    - "7860"
+      - "7860"
     volumes:
       - ./graph_definitions/graph_images:/graph_images
-    shm_size: 5gb
\ No newline at end of file
+    shm_size: 5gb
diff --git a/industries/healthcare/medical-device-training-assistant/docker-compose-nim-ms.yaml b/industries/healthcare/medical-device-training-assistant/docker-compose-nim-ms.yaml
index 4387840e..5964261d 100644
--- a/industries/healthcare/medical-device-training-assistant/docker-compose-nim-ms.yaml
+++ b/industries/healthcare/medical-device-training-assistant/docker-compose-nim-ms.yaml
@@ -3,12 +3,12 @@ services:
     container_name: nemollm-inference-microservice
     image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0
     volumes:
-    - ${MODEL_DIRECTORY}:/opt/nim/.cache
+      - ${MODEL_DIRECTORY}:/opt/nim/.cache
     user: "${USERID}"
     ports:
-    - "8000:8000"
+      - "8000:8000"
     expose:
-    - "8000"
+      - "8000"
     environment:
       NGC_API_KEY: ${NGC_API_KEY}
     shm_size: 20gb
@@ -31,11 +31,11 @@ services:
     container_name: nemo-retriever-embedding-microservice
     image: nvcr.io/nim/nvidia/nv-embedqa-e5-v5:1.0.0
     volumes:
-    - ${MODEL_DIRECTORY}:/opt/nim/.cache
+      - ${MODEL_DIRECTORY}:/opt/nim/.cache
     ports:
-    - "9080:8000"
+      - "9080:8000"
     expose:
-    - "8000"
+      - "8000"
     environment:
       NGC_API_KEY: ${NGC_API_KEY}
     user: "${USERID}"
@@ -59,11 +59,11 @@ services:
     container_name: nemo-retriever-ranking-microservice
     image: nvcr.io/nim/nvidia/nv-rerankqa-mistral-4b-v3:1.0.0
     volumes:
-    - ${MODEL_DIRECTORY}:/opt/nim/.cache
+      - ${MODEL_DIRECTORY}:/opt/nim/.cache
     ports:
-    - "1976:8000"
+      - "1976:8000"
     expose:
-    - "8000"
+      - "8000"
     environment:
       NGC_API_KEY: ${NGC_API_KEY}
     user: "${USERID}"
diff --git a/industries/healthcare/medical-device-training-assistant/docker-compose-vectordb.yaml b/industries/healthcare/medical-device-training-assistant/docker-compose-vectordb.yaml
index 7afb929e..4abf756a 100644
--- a/industries/healthcare/medical-device-training-assistant/docker-compose-vectordb.yaml
+++ b/industries/healthcare/medical-device-training-assistant/docker-compose-vectordb.yaml
@@ -3,18 +3,17 @@ services:
     container_name: pgvector
     image: pgvector/pgvector:pg16
     ports:
-    - 5432:5432
+      - 5432:5432
     expose:
-    - "5432"
+      - "5432"
     volumes:
-    - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/data:/var/lib/postgresql/data
+      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/data:/var/lib/postgresql/data
     environment:
-    - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}
-    - POSTGRES_USER=${POSTGRES_USER:-postgres}
-    - POSTGRES_DB=${POSTGRES_DB:-api}
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password}
+      - POSTGRES_USER=${POSTGRES_USER:-postgres}
+      - POSTGRES_DB=${POSTGRES_DB:-api}
     profiles: ["pgvector"]
 
-
   etcd:
     container_name: milvus-etcd
     image: quay.io/coreos/etcd:v3.5.5
@@ -73,7 +72,7 @@ services:
       - "9091:9091"
     depends_on:
       - "etcd"
-      - "minio" 
+      - "minio"
     profiles: ["nemo-retriever", "milvus", ""]
 
   elasticsearch:
diff --git a/industries/healthcare/medical-device-training-assistant/docker-compose.yaml b/industries/healthcare/medical-device-training-assistant/docker-compose.yaml
index 993b6c5e..b93955c9 100644
--- a/industries/healthcare/medical-device-training-assistant/docker-compose.yaml
+++ b/industries/healthcare/medical-device-training-assistant/docker-compose.yaml
@@ -1,7 +1,7 @@
 include:
   - path:
-    - docker-compose-vectordb.yaml
-    - docker-compose-nim-ms.yaml
+      - docker-compose-vectordb.yaml
+      - docker-compose-nim-ms.yaml
 
 services:
   chain-server:
@@ -28,7 +28,7 @@ services:
       APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l
       APP_TEXTSPLITTER_CHUNKSIZE: 506
       APP_TEXTSPLITTER_CHUNKOVERLAP: 200
-      APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"} # Leave it blank to avoid using ranking
+      APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"}  # Leave it blank to avoid using ranking
       APP_RANKING_MODELENGINE: ${APP_RANKING_MODELENGINE:-nvidia-ai-endpoints}
       APP_RANKING_SERVERURL: ${APP_RANKING_SERVERURL:-""}
       NVIDIA_API_KEY: ${NVIDIA_API_KEY}
@@ -42,9 +42,9 @@ services:
       OTEL_EXPORTER_OTLP_PROTOCOL: grpc
       LOGLEVEL: ${LOGLEVEL:-INFO}
     ports:
-    - "8081:8081"
+      - "8081:8081"
     expose:
-    - "8081"
+      - "8081"
     shm_size: 5gb
     depends_on:
       nemollm-embedding:
@@ -57,7 +57,6 @@ services:
         condition: service_healthy
         required: false
 
-
   rag-playground:
     container_name: rag-playground
     image: rag-playground:${TAG:-latest}
@@ -75,8 +74,8 @@ services:
       OTEL_EXPORTER_OTLP_PROTOCOL: grpc
       ENABLE_TRACING: false
       # if locally hosting Riva:
-      #RIVA_API_URI: <riva-ip-address>:50051
-      #TTS_SAMPLE_RATE: 48000
+      # RIVA_API_URI: <riva-ip-address>:50051
+      # TTS_SAMPLE_RATE: 48000
       # if using Riva API Endpoint on NVIDIA API Catalog
       RIVA_API_URI: grpc.nvcf.nvidia.com:443
       NVIDIA_API_KEY: ${NVIDIA_API_KEY}
@@ -85,11 +84,11 @@ services:
       TTS_SAMPLE_RATE: 48000
 
     ports:
-    - "8090:8090"
+      - "8090:8090"
     expose:
-    - "8090"
+      - "8090"
     depends_on:
-    - chain-server
+      - chain-server
 
 networks:
   default:
diff --git a/industries/healthcare/medical-device-training-assistant/evaluation/docker-compose.yaml b/industries/healthcare/medical-device-training-assistant/evaluation/docker-compose.yaml
index 7d28acd7..dc861a8a 100644
--- a/industries/healthcare/medical-device-training-assistant/evaluation/docker-compose.yaml
+++ b/industries/healthcare/medical-device-training-assistant/evaluation/docker-compose.yaml
@@ -9,15 +9,14 @@ services:
       - ${DATA_DIR}:/data
       - ${OUTPUT_DIR}:/output
     command: >
-      python3 /opt/evaluation/query_rag_with_gt_questions_and_evaluate.py 
-      --gt_qa_pairs_doc /data/${GT_QA_DOC:-ELSA_IFU_RAG_Example_QandA.txt} 
+      python3 /opt/evaluation/query_rag_with_gt_questions_and_evaluate.py
+      --gt_qa_pairs_doc /data/${GT_QA_DOC:-ELSA_IFU_RAG_Example_QandA.txt}
       --output_dir /output
       --eval_result_name eval_result
     environment:
       NVIDIA_API_KEY: ${NVIDIA_API_KEY}
     shm_size: 5gb
 
-
 networks:
   default:
     name: nvidia-rag
diff --git a/industries/healthcare/medical-device-training-assistant/prompt.yaml b/industries/healthcare/medical-device-training-assistant/prompt.yaml
index 6234a38e..9c052fe9 100644
--- a/industries/healthcare/medical-device-training-assistant/prompt.yaml
+++ b/industries/healthcare/medical-device-training-assistant/prompt.yaml
@@ -1,22 +1,22 @@
 chat_template: |
-    You are a helpful, respectful and honest assistant.
-    Always answer as helpfully as possible, while being safe.
-    Please ensure that your responses are positive in nature.
+  You are a helpful, respectful and honest assistant.
+  Always answer as helpfully as possible, while being safe.
+  Please ensure that your responses are positive in nature.
 
 rag_template: |
-    <s>[INST] <<SYS>>
-    Use the following context to answer the user's question. If you don't know the answer,
-    just say that you don't know, don't try to make up an answer.
-    <</SYS>>
-    <s>[INST] Context: {context_str} Question: {query_str} Only return the helpful
-     answer below and nothing else. Helpful answer:[/INST]
+  <s>[INST] <<SYS>>
+  Use the following context to answer the user's question. If you don't know the answer,
+  just say that you don't know, don't try to make up an answer.
+  <</SYS>>
+  <s>[INST] Context: {context_str} Question: {query_str} Only return the helpful
+   answer below and nothing else. Helpful answer:[/INST]
 
-app_chain_template: |
-    You are a document chatbot. Help the user as they ask questions about documents.
-    User message just asked: {input}\n\n
-    For this, we have retrieved the following potentially-useful info:
-    Conversation History Retrieved:
-    {history}\n\n
-    Document Retrieved:
-    {context}\n\n
-    Answer only from retrieved data. Make your response conversational.
+app_chain_template: |-
+  You are a document chatbot. Help the user as they ask questions about documents.
+  User message just asked: {input}\n\n
+  For this, we have retrieved the following potentially-useful info:
+  Conversation History Retrieved:
+  {history}\n\n
+  Document Retrieved:
+  {context}\n\n
+  Answer only from retrieved data. Make your response conversational.
diff --git a/nemo/retriever-synthetic-data-generation/scripts/conf/config-fiqa.yaml b/nemo/retriever-synthetic-data-generation/scripts/conf/config-fiqa.yaml
index 99c88eb2..507163e4 100644
--- a/nemo/retriever-synthetic-data-generation/scripts/conf/config-fiqa.yaml
+++ b/nemo/retriever-synthetic-data-generation/scripts/conf/config-fiqa.yaml
@@ -6,8 +6,7 @@ max_examples: 220
 use_original: false  # Set true if input file contains original questions and would like to evaluate using the original data
 
 pre_processors:
-  - 
-    _target_: nemo_retriever_sdg.DummyPreprocessor
+  - _target_: nemo_retriever_sdg.DummyPreprocessor
 
 qa_generator:
   _target_: nemo_retriever_sdg.SimpleQAGenerator
@@ -37,19 +36,19 @@ qa_generator:
       - Generate questions that are relevant to the idea expressed in the input document, and the input document contains the complete answer to your question.
       - Generate questions that provide specific context that can lead to the specific answer contained in the input document.
       - Generate questions that are varied and different from each other. You can change up the phrasing, vocabulary, complexity, and the type of questions you ask throughout the task.
-      - DO NOT copy and paste exact phrasing from the test. Formulate questions in your own words. 
+      - DO NOT copy and paste exact phrasing from the test. Formulate questions in your own words.
       - Generate answers to the questions as well.
-      - Provide an explanation as to why the generated question is good. Use the following example questions and answers for reference. 
-      - Generated Questions should start with Question: 
+      - Provide an explanation as to why the generated question is good. Use the following example questions and answers for reference.
+      - Generated Questions should start with Question:
       - Generated Answers should start with Answer:
       - Generated Explanations should start with Explanation:
-      
+
       Examples:
 
       Input document:
       Just have the associate sign the back and then deposit it.  It's called a third party cheque and is perfectly legal.  I wouldn't be surprised if it has a longer hold period and, as always, you don't get the money if the cheque doesn't clear. Now, you may have problems if it's a large amount or you're not very well known at the bank.  In that case you can have the associate go to the bank and endorse it in front of the teller with some ID.  You don't even technically have to be there.  Anybody can deposit money to your account if they have the account number. He could also just deposit it in his account and write a cheque to the business.
       Have the check reissued to the proper payee.
-      
+
       Question:
       How to deposit a cheque issued to an associate in my business into my business account?
 
@@ -70,12 +69,12 @@ qa_generator:
 
       Question:
       Do I need a new EIN since I am hiring employees for my LLC?
-      
+
     user_prompt_template: |
       Generate {num_questions} questions and corresponding answers based on Input Document.
 
       Input Document:
-      {document}   
+      {document}
 
 easiness_filter:
   _target_: nemo_retriever_sdg.EasinessFilter
@@ -97,7 +96,7 @@ answerability_filter:
       Criterion 2 - Is it clear what type of answer or information the question seeks? The question should convey its purpose without ambiguity, allowing for a direct and relevant response.
       Criterion 3 - Does the content in the context contain information that can answer the question or part of the question?
       Criterion 4 - Does the content in the context completely answer the question?
-      
+
       Provide your response in a mandatory dictionary format, and a short explanation of the rating like
       {
       \"criterion_1_explanation\": "<Brief explanation of why criterion_1 was satisfied or not satisfied>",
@@ -109,28 +108,21 @@ answerability_filter:
       \"criterion_4_explanation\": "<Extract spans from the content that help completely answer the question if criterion is satisfied, state what parts are missing if not satisfied>",
       \"criterion_4\": "<Y/N>"
       }
-      Provide only the dictionary response and nothing else.    
-    
+      Provide only the dictionary response and nothing else.
+
     user_prompt_template: |
       Context Passage:
       {context}
       Question:
       {question}
-      
+
 filters:
-  -
-    ${answerability_filter}
-  -
-    ${easiness_filter}
-  
+  - ${answerability_filter}
+  - ${easiness_filter}
 
 post_processors:
-  - 
-    _target_: nemo_retriever_sdg.DivergenceCalculator
+  - _target_: nemo_retriever_sdg.DivergenceCalculator
 
 analyzers:
-  -
-    _target_: nemo_retriever_sdg.QuestionLengthAnalyzer
-  -
-    _target_: nemo_retriever_sdg.LexicalDivergenceAnalyzer
-
+  - _target_: nemo_retriever_sdg.QuestionLengthAnalyzer
+  - _target_: nemo_retriever_sdg.LexicalDivergenceAnalyzer
diff --git a/nemo/retriever-synthetic-data-generation/scripts/conf/config-nq.yaml b/nemo/retriever-synthetic-data-generation/scripts/conf/config-nq.yaml
index 0cac031b..50398424 100644
--- a/nemo/retriever-synthetic-data-generation/scripts/conf/config-nq.yaml
+++ b/nemo/retriever-synthetic-data-generation/scripts/conf/config-nq.yaml
@@ -6,8 +6,7 @@ max_examples: 1500
 use_original: false  # Set true if input file contains original questions and would like to evaluate using the original data
 
 pre_processors:
-  - 
-    _target_: nemo_retriever_sdg.DummyPreprocessor
+  - _target_: nemo_retriever_sdg.DummyPreprocessor
 
 qa_generator:
   _target_: nemo_retriever_sdg.SimpleQAGenerator
@@ -28,13 +27,13 @@ qa_generator:
     num_questions: 3
     squad_format: true
     system_prompt: |
-      Generate questions that are relevant to the input document provided. 
+      Generate questions that are relevant to the input document provided.
       Follow these General Instructions:
        - Questions must be completely answered by the input document.
        - Questions must be relevant to the input document.
        - Do not generate questions which requires looking at the input document to comprehend the question
        - Generate questions and answers to the generated questions.
-       - Generated Questions should start with Question: 
+       - Generated Questions should start with Question:
        - Generated Answers should start with Answer:
       Follow this chain of thought when formulating questions:
       Step 1: Identify key phrases and entities in the input document
@@ -42,12 +41,12 @@ qa_generator:
 
       Compress any compounded questions to shorter questions to sound realistic. Questions can also be in the form of short phrases.
       Use the following examples as guidelines.
-      
+
       Examples:
 
       Input document:
       In November 2013, Senate Democrats led by Harry Reid used the nuclear option to eliminate the 60 - vote rule on executive branch nominations and federal judicial appointments, but not for the Supreme Court. In April 2017, Senate Republicans led by Mitch McConnell extended the nuclear option to Supreme Court and the nomination of Neil Gorsuch ending the debate.
-      
+
       Question:
       who changed the senate rules for supreme court nominees?
 
@@ -74,12 +73,12 @@ qa_generator:
 
       Question:
       what is the symbol for hugs and kisses?
-      
+
     user_prompt_template: |
       Generate {num_questions} questions and corresponding answers based on Input Document.
 
       Input Document:
-      {document}   
+      {document}
 
 easiness_filter:
   _target_: nemo_retriever_sdg.EasinessFilter
@@ -101,7 +100,7 @@ answerability_filter:
       Criterion 2 - Is it clear what type of answer or information the question seeks? The question should convey its purpose without ambiguity, allowing for a direct and relevant response.
       Criterion 3 - Does the content in the context contain information that can answer the question or part of the question?
       Criterion 4 - Does the content in the context completely answer the question?
-      
+
       Provide your response in a mandatory dictionary format, and a short explanation of the rating like
       {
       \"criterion_1_explanation\": "<Brief explanation of why criterion_1 was satisfied or not satisfied>",
@@ -113,28 +112,21 @@ answerability_filter:
       \"criterion_4_explanation\": "<Extract spans from the content that help completely answer the question if criterion is satisfied, state what parts are missing if not satisfied>",
       \"criterion_4\": "<Y/N>"
       }
-      Provide only the dictionary response and nothing else.    
-    
+      Provide only the dictionary response and nothing else.
+
     user_prompt_template: |
       Context Passage:
       {context}
       Question:
       {question}
-      
-filters:
-  -
-    ${answerability_filter}
-  -
-    ${easiness_filter}
 
+filters:
+  - ${answerability_filter}
+  - ${easiness_filter}
 
 post_processors:
-  - 
-    _target_: nemo_retriever_sdg.DivergenceCalculator
+  - _target_: nemo_retriever_sdg.DivergenceCalculator
 
 analyzers:
-  -
-    _target_: nemo_retriever_sdg.QuestionLengthAnalyzer
-  -
-    _target_: nemo_retriever_sdg.LexicalDivergenceAnalyzer
-
+  - _target_: nemo_retriever_sdg.QuestionLengthAnalyzer
+  - _target_: nemo_retriever_sdg.LexicalDivergenceAnalyzer
diff --git a/nemo/retriever-synthetic-data-generation/scripts/conf/config.yaml b/nemo/retriever-synthetic-data-generation/scripts/conf/config.yaml
index 09128d5e..47b5c00f 100644
--- a/nemo/retriever-synthetic-data-generation/scripts/conf/config.yaml
+++ b/nemo/retriever-synthetic-data-generation/scripts/conf/config.yaml
@@ -2,12 +2,11 @@ input_file: ${input_file}
 input_format: ${input_format}  # squad or rawdoc
 output_dir: ${output_dir}
 api_key: ${api_key}
-max_examples: 20     # Remove this line to use the entire dataset
+max_examples: 20  # Remove this line to use the entire dataset
 use_original: false  # Set true if input file contains original questions and would like to evaluate using the original data
 
 pre_processors:
-  - 
-    _target_: nemo_retriever_sdg.DummyPreprocessor
+  - _target_: nemo_retriever_sdg.DummyPreprocessor
 
 qa_generator:
   _target_: nemo_retriever_sdg.SimpleQAGenerator
@@ -34,19 +33,18 @@ qa_generator:
       - Generate questions that are relevant to the idea expressed in the input document, and the input document contains the complete answer to your question.
       - Generate questions that provide specific context that can lead to the specific answer contained in the input document.
       - Generate questions that are varied and different from each other. You can change up the phrasing, vocabulary, complexity, and the type of questions you ask throughout the task.
-      - DO NOT copy and paste exact phrasing from the test. Formulate questions in your own words. 
+      - DO NOT copy and paste exact phrasing from the test. Formulate questions in your own words.
       - Generate answers to the questions as well.
-      - Provide an explanation as to why the generated question is good. Use the following example questions and answers for reference. 
-      - Generated Questions should start with Question: 
+      - Provide an explanation as to why the generated question is good. Use the following example questions and answers for reference.
+      - Generated Questions should start with Question:
       - Generated Answers should start with Answer:
       - Explanations should start with Explanation:
-      
+
       Examples:
 
       Input document:
       We witnessed a flurry of defaults in 2015-2016 dominated by aggressive 2012-2014 vintage energy sector issuance. High cost producers with inadequate liquidity found bankruptcy to be their only option in the face of $30 oil. 2016 was the fifth highest default volume year on record with 80% of defaults occurring in commodity credits. The default rate for energy issuers was approximately  20%. We believe that most of the  aggressive credits in these sectors have now restructured (the average energy bond trades at $98 today, up from $56 in February, 2016).
 
-
       Question:
       Which year has the highest default volume of all times?
       Explanation:
@@ -81,12 +79,12 @@ qa_generator:
       What is the common statistic mentioned in the document?
       Explaination:
       This is a bad question! Its too generic and vague. It assumes that document is being looked at when the question is being asked.
-      
+
     user_prompt_template: |
       Generate {num_questions} questions and corresponding answers based on Input Document.
 
       Input Document:
-      {document}   
+      {document}
 
 easiness_filter:
   _target_: nemo_retriever_sdg.EasinessFilter
@@ -104,8 +102,8 @@ easiness_filter:
 #   filter_cfg:
 #     filter_threshold: 0.75
 #     embedding_model: "intfloat/e5-large-unsupervised"
-#     batch_size: 8    
-    
+#     batch_size: 8
+
 answerability_filter:
   _target_: nemo_retriever_sdg.AnswerabilityFilter
   filter_cfg:
@@ -119,7 +117,7 @@ answerability_filter:
       Criterion 2 - Is it clear what type of answer or information the question seeks? The question should convey its purpose without ambiguity, allowing for a direct and relevant response.
       Criterion 3 - Does the content in the context contain information that can answer the question or part of the question?
       Criterion 4 - Does the content in the context completely answer the question?
-      
+
       Provide your response in a mandatory dictionary format, and a short explanation of the rating like
       {
       \"criterion_1_explanation\": "<Brief explanation of why criterion_1 was satisfied or not satisfied>",
@@ -131,36 +129,30 @@ answerability_filter:
       \"criterion_4_explanation\": "<Extract spans from the content that help completely answer the question if criterion is satisfied, state what parts are missing if not satisfied>",
       \"criterion_4\": "<Y/N>"
       }
-      Provide only the dictionary response and nothing else.    
-    
+      Provide only the dictionary response and nothing else.
+
     user_prompt_template: |
       Context Passage:
       {context}
       Question:
       {question}
-      
+
 filters:
-  - 
-    ${easiness_filter}
-  -
-    ${answerability_filter}
+  - ${easiness_filter}
+  - ${answerability_filter}
 
 post_processors:
-  - 
-    _target_: nemo_retriever_sdg.DivergenceCalculator
+  - _target_: nemo_retriever_sdg.DivergenceCalculator
 
 analyzers:
-  -
-    _target_: nemo_retriever_sdg.QuestionLengthAnalyzer
-  -
-    _target_: nemo_retriever_sdg.LexicalDivergenceAnalyzer
+  - _target_: nemo_retriever_sdg.QuestionLengthAnalyzer
+  - _target_: nemo_retriever_sdg.LexicalDivergenceAnalyzer
 
 evaluators:
-  -
-    _target_: nemo_retriever_sdg.BEIREvaluator
+  - _target_: nemo_retriever_sdg.BEIREvaluator
     model_names:
       - "sentence-transformers/gtr-t5-large"
       - "BAAI/bge-large-en-v1.5"
       - "intfloat/e5-large-unsupervised"
-    score_function: "cos_sim" # "dot" or "cos_sim"
-    batch_size: 16
\ No newline at end of file
+    score_function: "cos_sim"  # "dot" or "cos_sim"
+    batch_size: 16