diff --git a/.github/workflows/yamllint.yml b/.github/workflows/yamllint.yml new file mode 100644 index 00000000..23dfbe6a --- /dev/null +++ b/.github/workflows/yamllint.yml @@ -0,0 +1,14 @@ +--- +name: Yaml Lint +# yamllint disable-line rule:truthy +on: + workflow_dispatch: + push: + pull_request: +jobs: + lintAllTheThings: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: yaml-lint + uses: ibiqlik/action-yamllint@v3 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 11fada31..5e43807c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,5 +23,7 @@ repos: files: ^RAG/ name: isort (python) args: ["--multi-line=3", "--trailing-comma", "--force-grid-wrap=0", "--use-parenthese", "--line-width=119", "--ws"] - - + - repo: https://github.com/google/yamlfmt + rev: v0.13.0 + hooks: + - id: yamlfmt diff --git a/.yamlfmt b/.yamlfmt new file mode 100644 index 00000000..0c4c3682 --- /dev/null +++ b/.yamlfmt @@ -0,0 +1,8 @@ +formatter: + type: basic + eof_newline: true + max_line_length: 120 + pad_line_comments: 2 + retain_line_breaks_single: true + scan_folded_as_literal: true + trim_trailing_whitespace: true diff --git a/.yamllint b/.yamllint new file mode 100644 index 00000000..5364339c --- /dev/null +++ b/.yamllint @@ -0,0 +1,7 @@ +--- + +extends: default + +rules: + line-length: "disable" + document-start: "disable" diff --git a/RAG/examples/advanced_rag/multi_turn_rag/docker-compose.yaml b/RAG/examples/advanced_rag/multi_turn_rag/docker-compose.yaml index b5e9c43a..79d76ec2 100644 --- a/RAG/examples/advanced_rag/multi_turn_rag/docker-compose.yaml +++ b/RAG/examples/advanced_rag/multi_turn_rag/docker-compose.yaml @@ -1,7 +1,7 @@ include: - path: - - ../../local_deploy/docker-compose-vectordb.yaml - - ../../local_deploy/docker-compose-nim-ms.yaml + - ../../local_deploy/docker-compose-vectordb.yaml + - ../../local_deploy/docker-compose-nim-ms.yaml services: chain-server: @@ -28,7 +28,7 @@ services: APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l APP_TEXTSPLITTER_CHUNKSIZE: 506 APP_TEXTSPLITTER_CHUNKOVERLAP: 200 - APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"} # Leave it blank to avoid using ranking + APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"} # Leave it blank to avoid using ranking APP_RANKING_MODELENGINE: ${APP_RANKING_MODELENGINE:-nvidia-ai-endpoints} APP_RANKING_SERVERURL: ${APP_RANKING_SERVERURL:-""} NVIDIA_API_KEY: ${NVIDIA_API_KEY} @@ -43,9 +43,9 @@ services: ENABLE_TRACING: false LOGLEVEL: ${LOGLEVEL:-INFO} ports: - - "8081:8081" + - "8081:8081" expose: - - "8081" + - "8081" shm_size: 5gb depends_on: nemollm-embedding: @@ -75,11 +75,11 @@ services: OTEL_EXPORTER_OTLP_PROTOCOL: grpc ENABLE_TRACING: false ports: - - "8090:8090" + - "8090:8090" expose: - - "8090" + - "8090" depends_on: - - chain-server + - chain-server networks: default: diff --git a/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml b/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml index 5e63a644..60f7575a 100644 --- a/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml +++ b/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml @@ -1,22 +1,22 @@ chat_template: | - You are a helpful, respectful and honest assistant. - Always answer as helpfully as possible, while being safe. - Please ensure that your responses are positive in nature. + You are a helpful, respectful and honest assistant. + Always answer as helpfully as possible, while being safe. + Please ensure that your responses are positive in nature. rag_template: | - [INST] <> - Use the following context to answer the user's question. If you don't know the answer, - just say that you don't know, don't try to make up an answer. - <> - [INST] Context: {context_str} Question: {query_str} Only return the helpful - answer below and nothing else. Helpful answer:[/INST] + [INST] <> + Use the following context to answer the user's question. If you don't know the answer, + just say that you don't know, don't try to make up an answer. + <> + [INST] Context: {context_str} Question: {query_str} Only return the helpful + answer below and nothing else. Helpful answer:[/INST] -multi_turn_rag_template: | - You are a document chatbot. Help the user as they ask questions about documents. - User message just asked: {input}\n\n - For this, we have retrieved the following potentially-useful info: - Conversation History Retrieved: - {history}\n\n - Document Retrieved: - {context}\n\n - Answer only from retrieved data. Make your response conversational. +multi_turn_rag_template: |- + You are a document chatbot. Help the user as they ask questions about documents. + User message just asked: {input}\n\n + For this, we have retrieved the following potentially-useful info: + Conversation History Retrieved: + {history}\n\n + Document Retrieved: + {context}\n\n + Answer only from retrieved data. Make your response conversational. diff --git a/RAG/examples/advanced_rag/multimodal_rag/docker-compose.yaml b/RAG/examples/advanced_rag/multimodal_rag/docker-compose.yaml index 948f2652..d8f0d934 100644 --- a/RAG/examples/advanced_rag/multimodal_rag/docker-compose.yaml +++ b/RAG/examples/advanced_rag/multimodal_rag/docker-compose.yaml @@ -1,7 +1,7 @@ include: - path: - - ../../local_deploy/docker-compose-vectordb.yaml - - ../../local_deploy/docker-compose-nim-ms.yaml + - ../../local_deploy/docker-compose-vectordb.yaml + - ../../local_deploy/docker-compose-nim-ms.yaml services: chain-server: @@ -37,9 +37,9 @@ services: ENABLE_TRACING: false LOGLEVEL: ${LOGLEVEL:-INFO} ports: - - "8081:8081" + - "8081:8081" expose: - - "8081" + - "8081" shm_size: 5gb depends_on: nemollm-embedding: @@ -66,9 +66,9 @@ services: OTEL_EXPORTER_OTLP_PROTOCOL: grpc ENABLE_TRACING: false ports: - - "8090:8090" + - "8090:8090" expose: - - "8090" + - "8090" depends_on: - chain-server diff --git a/RAG/examples/advanced_rag/multimodal_rag/prompt.yaml b/RAG/examples/advanced_rag/multimodal_rag/prompt.yaml index aa63fa28..a62c5c30 100644 --- a/RAG/examples/advanced_rag/multimodal_rag/prompt.yaml +++ b/RAG/examples/advanced_rag/multimodal_rag/prompt.yaml @@ -1,11 +1,23 @@ chat_template: | - You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant. - Always answer as helpfully as possible, while being safe. - Please ensure that your responses are positive in nature. + You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant. + Always answer as helpfully as possible, while being safe. + Please ensure that your responses are positive in nature. -rag_template: "You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant. You are an expert in the content of the document provided and can provide information using both text and images. The user may also provide an image input, and you will use the image description to retrieve similar images, tables and text. The context given below will provide some technical or financial documentation and whitepapers to help you answer the question. Based on this context, answer the question truthfully. If the question is not related to this, please refrain from answering. Most importantly, if the context provided does not include information about the question from the user, reply saying that you don't know. Do not utilize any information that is not provided in the documents below. All documents will be preceded by tags, for example [[DOCUMENT 1]], [[DOCUMENT 2]], and so on. You can reference them in your reply but without the brackets, so just say document 1 or 2. The question will be preceded by a [[QUESTION]] tag. Be succinct, clear, and helpful. Remember to describe everything in detail by using the knowledge provided, or reply that you don't know the answer. Do not fabricate any responses. Note that you have the ability to reference images, tables, and other multimodal elements when necessary. You can also refer to the image provided by the user, if any." +rag_template: "You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant. You + are an expert in the content of the document provided and can provide information using both text and images. The user may + also provide an image input, and you will use the image description to retrieve similar images, tables and text. The context + given below will provide some technical or financial documentation and whitepapers to help you answer the question. Based + on this context, answer the question truthfully. If the question is not related to this, please refrain from answering. + Most importantly, if the context provided does not include information about the question from the user, reply saying that + you don't know. Do not utilize any information that is not provided in the documents below. All documents will be preceded + by tags, for example [[DOCUMENT 1]], [[DOCUMENT 2]], and so on. You can reference them in your reply but without the brackets, + so just say document 1 or 2. The question will be preceded by a [[QUESTION]] tag. Be succinct, clear, and helpful. Remember + to describe everything in detail by using the knowledge provided, or reply that you don't know the answer. Do not fabricate + any responses. Note that you have the ability to reference images, tables, and other multimodal elements when necessary. + You can also refer to the image provided by the user, if any." describe_image_prompt: | Describe this image in detail: -deplot_summarization_prompt: Your responsibility is to explain charts. You are an expert in describing the responses of linearized tables into plain English text for LLMs to use. \ No newline at end of file +deplot_summarization_prompt: Your responsibility is to explain charts. You are an expert in describing the responses of linearized + tables into plain English text for LLMs to use. diff --git a/RAG/examples/advanced_rag/query_decomposition_rag/docker-compose.yaml b/RAG/examples/advanced_rag/query_decomposition_rag/docker-compose.yaml index aff87171..bffe3cc9 100644 --- a/RAG/examples/advanced_rag/query_decomposition_rag/docker-compose.yaml +++ b/RAG/examples/advanced_rag/query_decomposition_rag/docker-compose.yaml @@ -1,7 +1,7 @@ include: - path: - - ../../local_deploy/docker-compose-vectordb.yaml - - ../../local_deploy/docker-compose-nim-ms.yaml + - ../../local_deploy/docker-compose-vectordb.yaml + - ../../local_deploy/docker-compose-nim-ms.yaml services: chain-server: @@ -40,9 +40,9 @@ services: ENABLE_TRACING: false LOGLEVEL: ${LOGLEVEL:-INFO} ports: - - "8081:8081" + - "8081:8081" expose: - - "8081" + - "8081" shm_size: 5gb depends_on: nemollm-embedding: @@ -69,11 +69,11 @@ services: OTEL_EXPORTER_OTLP_PROTOCOL: grpc ENABLE_TRACING: false ports: - - "8090:8090" + - "8090:8090" expose: - - "8090" + - "8090" depends_on: - - chain-server + - chain-server networks: default: diff --git a/RAG/examples/advanced_rag/query_decomposition_rag/prompt.yaml b/RAG/examples/advanced_rag/query_decomposition_rag/prompt.yaml index cc38c8af..c77a9512 100644 --- a/RAG/examples/advanced_rag/query_decomposition_rag/prompt.yaml +++ b/RAG/examples/advanced_rag/query_decomposition_rag/prompt.yaml @@ -1,45 +1,47 @@ -chat_template: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are positive in nature." +chat_template: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. + Please ensure that your responses are positive in nature." -rag_template: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user." +rag_template: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you + are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user." tool_selector_prompt: | - Your task is to answer questions. If you cannot answer the question, you can request use for a tool and break the question into specific sub questions. Fill with Nil where no action is required. You should only return a JSON containing the tool and the generated sub questions. Consider the contextual information and only ask for information that you do not already have. Do not return any other explanations or text. The output should be a simple JSON structure! You are given two tools: - - Search - - Math - Search tool quickly finds and retrieves relevant answers from a given context, providing accurate and precise information to meet search needs. - Math tool performs essential operations, including multiplication, addition, subtraction, division, and greater than or less than comparisons, providing accurate results with ease. Utilize math tool when asked to find sum, difference of values. - Do not pass sub questions to any tool if they already have an answer in the Contextual Information. - If you have all the information needed to answer the question, mark the Tool_Request as Nil. - - Contextual Information: - {{ context }} - - Question: - {{ question }} - - {"Tool_Request": "", "Generated Sub Questions": []} - -math_tool_prompt: | - Your task is to identify 2 variables and an operation from given questions. If you cannot answer the question, you can simply return "Not Possible". You should only return a JSON containing the `IsPossible`, `variable1`, `variable2`, and `operation`. Do not return any other explanations or text. The output should be a simple JSON structure! - You are given two options for `IsPossible`: - - Possible - - Not Possible - `variable1` and `variable2` should be real floating point numbers. - You are given four options for `operation symbols`: - - '+' (addition) - - '-' (subtraction) - - '*' (multiplication) - - '/' (division) - - '=' (equal to) - - '>' (greater than) - - '<' (less than) - - '>=' (greater than or equal to) - - '<=' (less than or equal to) - Only return the symbols for the specified operations and nothing else. - Contextual Information: - {{ context }} - - Question: - {{ question }} - - {"IsPossible": "", "variable1": [], "variable2": [], "operation": []} \ No newline at end of file + Your task is to answer questions. If you cannot answer the question, you can request use for a tool and break the question into specific sub questions. Fill with Nil where no action is required. You should only return a JSON containing the tool and the generated sub questions. Consider the contextual information and only ask for information that you do not already have. Do not return any other explanations or text. The output should be a simple JSON structure! You are given two tools: + - Search + - Math + Search tool quickly finds and retrieves relevant answers from a given context, providing accurate and precise information to meet search needs. + Math tool performs essential operations, including multiplication, addition, subtraction, division, and greater than or less than comparisons, providing accurate results with ease. Utilize math tool when asked to find sum, difference of values. + Do not pass sub questions to any tool if they already have an answer in the Contextual Information. + If you have all the information needed to answer the question, mark the Tool_Request as Nil. + + Contextual Information: + {{ context }} + + Question: + {{ question }} + + {"Tool_Request": "", "Generated Sub Questions": []} + +math_tool_prompt: |- + Your task is to identify 2 variables and an operation from given questions. If you cannot answer the question, you can simply return "Not Possible". You should only return a JSON containing the `IsPossible`, `variable1`, `variable2`, and `operation`. Do not return any other explanations or text. The output should be a simple JSON structure! + You are given two options for `IsPossible`: + - Possible + - Not Possible + `variable1` and `variable2` should be real floating point numbers. + You are given four options for `operation symbols`: + - '+' (addition) + - '-' (subtraction) + - '*' (multiplication) + - '/' (division) + - '=' (equal to) + - '>' (greater than) + - '<' (less than) + - '>=' (greater than or equal to) + - '<=' (less than or equal to) + Only return the symbols for the specified operations and nothing else. + Contextual Information: + {{ context }} + + Question: + {{ question }} + + {"IsPossible": "", "variable1": [], "variable2": [], "operation": []} diff --git a/RAG/examples/advanced_rag/structured_data_rag/docker-compose.yaml b/RAG/examples/advanced_rag/structured_data_rag/docker-compose.yaml index e8ae62cb..4ce52a0f 100644 --- a/RAG/examples/advanced_rag/structured_data_rag/docker-compose.yaml +++ b/RAG/examples/advanced_rag/structured_data_rag/docker-compose.yaml @@ -1,6 +1,6 @@ include: - path: - - ../../local_deploy/docker-compose-nim-ms.yaml + - ../../local_deploy/docker-compose-nim-ms.yaml services: chain-server: @@ -20,16 +20,19 @@ services: APP_LLM_MODELENGINE: nvidia-ai-endpoints APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""} APP_LLM_MODELNAMEPANDASAI: ${APP_LLM_MODELNAME:-meta/llama3-70b-instruct} - APP_PROMPTS_CHATTEMPLATE: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are positive in nature." - APP_PROMPTS_RAGTEMPLATE: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user." + APP_PROMPTS_CHATTEMPLATE: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, + while being safe. Please ensure that your responses are positive in nature." + APP_PROMPTS_RAGTEMPLATE: "You are a helpful AI assistant named Envie. You will reply to questions only based on the + context that you are provided. If something is out of context, you will refrain from replying and politely decline + to respond to the user." NVIDIA_API_KEY: ${NVIDIA_API_KEY} COLLECTION_NAME: ${COLLECTION_NAME:-structured_data_rag} CSV_NAME: PdM_machines LOGLEVEL: ${LOGLEVEL:-INFO} ports: - - "8081:8081" + - "8081:8081" expose: - - "8081" + - "8081" shm_size: 5gb depends_on: nemollm-inference: @@ -50,11 +53,11 @@ services: APP_SERVERPORT: 8081 APP_MODELNAME: ${APP_LLM_MODELNAME:-meta/llama3-70b-instruct} ports: - - "8090:8090" + - "8090:8090" expose: - - "8090" + - "8090" depends_on: - - chain-server + - chain-server networks: default: diff --git a/RAG/examples/advanced_rag/structured_data_rag/prompt.yaml b/RAG/examples/advanced_rag/structured_data_rag/prompt.yaml index a01430d5..c9d2a11d 100644 --- a/RAG/examples/advanced_rag/structured_data_rag/prompt.yaml +++ b/RAG/examples/advanced_rag/structured_data_rag/prompt.yaml @@ -1,11 +1,12 @@ prompts: - chat_template: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are positive in nature. + chat_template: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. + Please ensure that your responses are positive in nature. - rag_template: You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user. + rag_template: You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you + are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user. csv_data_retrieval_template: | - You are an expert data retrieval agent who writes functional python code and utilzes Pandas library in python for data retrieval. Provide a functional and accurate code based on the provided pandas dataframe for the user's query. @@ -19,7 +20,6 @@ prompts: - dfs is a list containing df a pandas dataframe. Always use the first entry from the list like df = dfs[0]. {instructions} - csv_response_template: | Provide a response to user's queries based on the given Data point. You are provided with the required data value and your job is to formulate a natural language response based on the data. @@ -48,14 +48,17 @@ prompts: df['age'] = df['age'].str.extract('(\d+)').astype(int) - For any age related query, always provide the response as string with the age and its unit together. Unit of age for all machines is in months. - name: PdM_errors - description: These are errors encountered by the machines while in operating condition. Since, these errors don't shut down the machines, these are not considered as failures. The error date and times are rounded to the closest hour since the telemetry data is collected at an hourly rate. + description: These are errors encountered by the machines while in operating condition. Since, these errors don't shut + down the machines, these are not considered as failures. The error date and times are rounded to the closest hour + since the telemetry data is collected at an hourly rate. instructions: | - Convert the datetime column to pandas datetime like df['datetime'] = pd.to_datetime(df['datetime']) - Use pandas datatime only for filtering date time columns based on date or time. Like df['datetime'].dt.day - If year is not mentioned explicitly in queries containing dates, then consider the year to be 2015 by default. - name: PdM_failures - description: Each record represents replacement of a component due to failure. This data is a subset of Maintenance data. This data is rounded to the closest hour since the telemetry data is collected at an hourly rate. - instructions: | + description: Each record represents replacement of a component due to failure. This data is a subset of Maintenance + data. This data is rounded to the closest hour since the telemetry data is collected at an hourly rate. + instructions: |- - Convert the datetime column to pandas datetime like df['datetime'] = pd.to_datetime(df['datetime']) - Use pandas datatime only for filtering date time columns based on date or time. Like df['datetime'].dt.day - - If year is not mentioned explicitly in queries containing dates, then consider the year to be 2015 by default. \ No newline at end of file + - If year is not mentioned explicitly in queries containing dates, then consider the year to be 2015 by default. diff --git a/RAG/examples/basic_rag/langchain/docker-compose.yaml b/RAG/examples/basic_rag/langchain/docker-compose.yaml index 33483c56..39db3cb2 100644 --- a/RAG/examples/basic_rag/langchain/docker-compose.yaml +++ b/RAG/examples/basic_rag/langchain/docker-compose.yaml @@ -1,7 +1,7 @@ include: - path: - - ../../local_deploy/docker-compose-vectordb.yaml - - ../../local_deploy/docker-compose-nim-ms.yaml + - ../../local_deploy/docker-compose-vectordb.yaml + - ../../local_deploy/docker-compose-nim-ms.yaml services: chain-server: @@ -40,7 +40,7 @@ services: APP_TEXTSPLITTER_CHUNKSIZE: 506 APP_TEXTSPLITTER_CHUNKOVERLAP: 200 NVIDIA_API_KEY: ${NVIDIA_API_KEY} - # vectorstore collection name to store embeddings + # vectorstore collection name to store embeddings COLLECTION_NAME: ${COLLECTION_NAME:-nvidia_api_catalog} APP_RETRIEVER_TOPK: 4 APP_RETRIEVER_SCORETHRESHOLD: 0.25 @@ -52,9 +52,9 @@ services: # Log level for server, supported level NOTSET, DEBUG, INFO, WARN, ERROR, CRITICAL LOGLEVEL: ${LOGLEVEL:-INFO} ports: - - "8081:8081" + - "8081:8081" expose: - - "8081" + - "8081" shm_size: 5gb depends_on: nemollm-embedding: @@ -86,11 +86,11 @@ services: # enable observability in rag playground ENABLE_TRACING: false ports: - - "8090:8090" + - "8090:8090" expose: - - "8090" + - "8090" depends_on: - - chain-server + - chain-server networks: default: diff --git a/RAG/examples/basic_rag/langchain/prompt.yaml b/RAG/examples/basic_rag/langchain/prompt.yaml index ebbaa8d9..b64776ca 100644 --- a/RAG/examples/basic_rag/langchain/prompt.yaml +++ b/RAG/examples/basic_rag/langchain/prompt.yaml @@ -1,9 +1,9 @@ chat_template: | - You are a helpful, respectful and honest assistant. - Always answer as helpfully as possible, while being safe. - Please ensure that your responses are positive in nature. + You are a helpful, respectful and honest assistant. + Always answer as helpfully as possible, while being safe. + Please ensure that your responses are positive in nature. -rag_template: | - You are a helpful AI assistant named Envie. - You will reply to questions only based on the context that you are provided. - If something is out of context, you will refrain from replying and politely decline to respond to the user. \ No newline at end of file +rag_template: |- + You are a helpful AI assistant named Envie. + You will reply to questions only based on the context that you are provided. + If something is out of context, you will refrain from replying and politely decline to respond to the user. diff --git a/RAG/examples/basic_rag/llamaindex/docker-compose.yaml b/RAG/examples/basic_rag/llamaindex/docker-compose.yaml index 520f7037..91213825 100644 --- a/RAG/examples/basic_rag/llamaindex/docker-compose.yaml +++ b/RAG/examples/basic_rag/llamaindex/docker-compose.yaml @@ -1,7 +1,7 @@ include: - path: - - ../../local_deploy/docker-compose-vectordb.yaml - - ../../local_deploy/docker-compose-nim-ms.yaml + - ../../local_deploy/docker-compose-vectordb.yaml + - ../../local_deploy/docker-compose-nim-ms.yaml services: chain-server: @@ -36,7 +36,7 @@ services: # embedding model engine used for inference, supported type nvidia-ai-endpoints APP_LLM_MODELENGINE: ${APP_LLM_MODELENGINE:-nvidia-ai-endpoints} NVIDIA_API_KEY: ${NVIDIA_API_KEY} - # vectorstore collection name to store embeddings + # vectorstore collection name to store embeddings COLLECTION_NAME: ${COLLECTION_NAME:-developer_rag} APP_RETRIEVER_TOPK: 4 APP_RETRIEVER_SCORETHRESHOLD: 0.25 @@ -52,9 +52,9 @@ services: # Log level for server, supported level NOTSET, DEBUG, INFO, WARN, ERROR, CRITICAL LOGLEVEL: ${LOGLEVEL:-INFO} ports: - - "8081:8081" + - "8081:8081" expose: - - "8081" + - "8081" shm_size: 5gb depends_on: nemollm-embedding: @@ -64,7 +64,6 @@ services: condition: service_healthy required: false - rag-playground: container_name: rag-playground image: rag-playground:${TAG:-latest} @@ -88,11 +87,11 @@ services: # enable observability in rag playground ENABLE_TRACING: false ports: - - "8090:8090" + - "8090:8090" expose: - - "8090" + - "8090" depends_on: - - chain-server + - chain-server networks: default: diff --git a/RAG/examples/basic_rag/llamaindex/prompt.yaml b/RAG/examples/basic_rag/llamaindex/prompt.yaml index 4bf764a0..7afefde6 100644 --- a/RAG/examples/basic_rag/llamaindex/prompt.yaml +++ b/RAG/examples/basic_rag/llamaindex/prompt.yaml @@ -1,10 +1,10 @@ chat_template: | - You are a helpful, respectful and honest assistant. - Always answer as helpfully as possible, while being safe. - Please ensure that your responses are positive in nature. + You are a helpful, respectful and honest assistant. + Always answer as helpfully as possible, while being safe. + Please ensure that your responses are positive in nature. -rag_template: | - Use the following context to answer the user's question. If you don't know the answer, - just say that you don't know, don't try to make up an answer. - Context: {context_str} Question: {query_str} Only return the helpful - answer below and nothing else. Helpful answer: \ No newline at end of file +rag_template: |- + Use the following context to answer the user's question. If you don't know the answer, + just say that you don't know, don't try to make up an answer. + Context: {context_str} Question: {query_str} Only return the helpful + answer below and nothing else. Helpful answer: diff --git a/RAG/examples/local_deploy/docker-compose-nim-ms.yaml b/RAG/examples/local_deploy/docker-compose-nim-ms.yaml index 6c73fff0..d06119c8 100644 --- a/RAG/examples/local_deploy/docker-compose-nim-ms.yaml +++ b/RAG/examples/local_deploy/docker-compose-nim-ms.yaml @@ -3,12 +3,12 @@ services: container_name: nemollm-inference-microservice image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.3 volumes: - - ${MODEL_DIRECTORY}:/opt/nim/.cache + - ${MODEL_DIRECTORY}:/opt/nim/.cache user: "${USERID}" ports: - - "8000:8000" + - "8000:8000" expose: - - "8000" + - "8000" environment: NGC_API_KEY: ${NGC_API_KEY} shm_size: 20gb @@ -31,11 +31,11 @@ services: container_name: nemo-retriever-embedding-microservice image: nvcr.io/nim/nvidia/nv-embedqa-e5-v5:1.0.1 volumes: - - ${MODEL_DIRECTORY}:/opt/nim/.cache + - ${MODEL_DIRECTORY}:/opt/nim/.cache ports: - - "9080:8000" + - "9080:8000" expose: - - "8000" + - "8000" environment: NGC_API_KEY: ${NGC_API_KEY} user: "${USERID}" @@ -59,11 +59,11 @@ services: container_name: nemo-retriever-ranking-microservice image: nvcr.io/nim/nvidia/nv-rerankqa-mistral-4b-v3:1.0.1 volumes: - - ${MODEL_DIRECTORY}:/opt/nim/.cache + - ${MODEL_DIRECTORY}:/opt/nim/.cache ports: - - "1976:8000" + - "1976:8000" expose: - - "8000" + - "8000" environment: NGC_API_KEY: ${NGC_API_KEY} user: "${USERID}" diff --git a/RAG/examples/local_deploy/docker-compose-vectordb.yaml b/RAG/examples/local_deploy/docker-compose-vectordb.yaml index cd76bc98..4abf756a 100644 --- a/RAG/examples/local_deploy/docker-compose-vectordb.yaml +++ b/RAG/examples/local_deploy/docker-compose-vectordb.yaml @@ -3,18 +3,17 @@ services: container_name: pgvector image: pgvector/pgvector:pg16 ports: - - 5432:5432 + - 5432:5432 expose: - - "5432" + - "5432" volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/data:/var/lib/postgresql/data + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/data:/var/lib/postgresql/data environment: - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} - - POSTGRES_USER=${POSTGRES_USER:-postgres} - - POSTGRES_DB=${POSTGRES_DB:-api} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} + - POSTGRES_USER=${POSTGRES_USER:-postgres} + - POSTGRES_DB=${POSTGRES_DB:-api} profiles: ["pgvector"] - etcd: container_name: milvus-etcd image: quay.io/coreos/etcd:v3.5.5 diff --git a/RAG/tools/evaluation/docker-compose.yaml b/RAG/tools/evaluation/docker-compose.yaml index d0b84f53..e6f085b3 100644 --- a/RAG/tools/evaluation/docker-compose.yaml +++ b/RAG/tools/evaluation/docker-compose.yaml @@ -10,12 +10,13 @@ services: volumes: - ${DATASET_DIRECTORY}:/data_dir - ${RESULT_DIRECTORY}:/result_dir - command: python3 /opt/tools/evaluation/main.py --generate_answer ${GENERATE_ANSWERS:-False} --base_url ${BASE_URL:-0.0.0.0:8081} --docs /data_dir --ga_input /result_dir/qna.json --ga_output /result_dir/eval.json --evaluate ${EVALUATE:-True} --metrics ${METRICS:-ragas} --judge_llm_model ${JUDGE_LLM_MODEL} --ev_input /result_dir/eval.json --ev_result /result_dir/result + command: python3 /opt/tools/evaluation/main.py --generate_answer ${GENERATE_ANSWERS:-False} --base_url ${BASE_URL:-0.0.0.0:8081} + --docs /data_dir --ga_input /result_dir/qna.json --ga_output /result_dir/eval.json --evaluate ${EVALUATE:-True} --metrics + ${METRICS:-ragas} --judge_llm_model ${JUDGE_LLM_MODEL} --ev_input /result_dir/eval.json --ev_result /result_dir/result environment: NVIDIA_API_KEY: ${NVIDIA_API_KEY} shm_size: 5gb - synthetic_data_generator: container_name: data-generator image: data-generator:${TAG:-latest} @@ -27,7 +28,7 @@ services: volumes: - ${DATASET_DIRECTORY}:/data_dir - ${RESULT_DIRECTORY}:/result_dir - command: python3 /opt/tools/evaluation/main.py --docs /data_dir --gd_output /result_dir/qna.json + command: python3 /opt/tools/evaluation/main.py --docs /data_dir --gd_output /result_dir/qna.json environment: NVIDIA_API_KEY: ${NVIDIA_API_KEY} diff --git a/RAG/tools/observability/configs/jaeger.yaml b/RAG/tools/observability/configs/jaeger.yaml index 64d3513c..41e3b326 100644 --- a/RAG/tools/observability/configs/jaeger.yaml +++ b/RAG/tools/observability/configs/jaeger.yaml @@ -1,3 +1,3 @@ query.base-path: /jaeger/ui cassandra.keyspace: jaeger_v1_dc1 -cassandra.servers: cassandra \ No newline at end of file +cassandra.servers: cassandra diff --git a/RAG/tools/observability/configs/otel-collector-config.yaml b/RAG/tools/observability/configs/otel-collector-config.yaml index 8e79ef48..8bf59d0a 100644 --- a/RAG/tools/observability/configs/otel-collector-config.yaml +++ b/RAG/tools/observability/configs/otel-collector-config.yaml @@ -4,7 +4,7 @@ receivers: grpc: endpoint: 0.0.0.0:4317 http: - # endpoint: 0.0.0.0:4318 + # endpoint: 0.0.0.0:4318 processors: tail_sampling: @@ -51,4 +51,3 @@ service: receivers: [otlp] exporters: [otlp] processors: [tail_sampling, transform] - diff --git a/RAG/tools/observability/docker-compose.yaml b/RAG/tools/observability/docker-compose.yaml index c5edab14..4ff48a79 100644 --- a/RAG/tools/observability/docker-compose.yaml +++ b/RAG/tools/observability/docker-compose.yaml @@ -1,4 +1,4 @@ -services: +services: otel-collector: container_name: otel-collector image: otel/opentelemetry-collector-contrib:0.102.0 @@ -30,7 +30,7 @@ services: - ${JAEGER_CONFIG_FILE}:/etc/jaeger.yaml depends_on: - cassandra-schema - + cassandra: image: cassandra:4.0 container_name: cassandra diff --git a/community/digital-human-security-analyst/conda_env.yml b/community/digital-human-security-analyst/conda_env.yml index 36ebd7d4..70431737 100644 --- a/community/digital-human-security-analyst/conda_env.yml +++ b/community/digital-human-security-analyst/conda_env.yml @@ -15,24 +15,24 @@ name: morpheus channels: - - rapidsai - - nvidia - - nvidia/label/dev # For pre-releases of MRC. Should still default to full releases if available - - conda-forge + - rapidsai + - nvidia + - nvidia/label/dev # For pre-releases of MRC. Should still default to full releases if available + - conda-forge dependencies: - ####### Morpheus Dependencies (keep sorted!) ####### - - boto3 - - dask - - dill - - distributed - - kfp - - librdkafka - - mlflow>=2.10.0,<3 - - nodejs=18.* - - nvtabular=23.06 - - papermill - - s3fs>=2023.6 + ####### Morpheus Dependencies (keep sorted!) ####### + - boto3 + - dask + - dill + - distributed + - kfp + - librdkafka + - mlflow>=2.10.0,<3 + - nodejs=18.* + - nvtabular=23.06 + - papermill + - s3fs>=2023.6 - ##### Pip Dependencies (keep sorted!) ####### - - pip: - - python-logging-loki + ##### Pip Dependencies (keep sorted!) ####### + - pip: + - python-logging-loki diff --git a/community/digital-human-security-analyst/docker-compose.yml b/community/digital-human-security-analyst/docker-compose.yml index b22b08a6..98c9781f 100644 --- a/community/digital-human-security-analyst/docker-compose.yml +++ b/community/digital-human-security-analyst/docker-compose.yml @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - version: '3.3' services: @@ -28,7 +27,8 @@ services: networks: - frontend - backend - command: mlflow server --gunicorn-opts "--log-level debug" --backend-store-uri sqlite:////opt/mlflow/dbdata/mlflow.db --serve-artifacts --artifacts-destination /opt/mlflow/artifacts --host 0.0.0.0 + command: mlflow server --gunicorn-opts "--log-level debug" --backend-store-uri sqlite:////opt/mlflow/dbdata/mlflow.db + --serve-artifacts --artifacts-destination /opt/mlflow/artifacts --host 0.0.0.0 # Run the container with this command to upgrade if needed: mlflow db upgrade sqlite:////opt/mlflow/dbdata/mlflow.db volumes: - db_data:/opt/mlflow/dbdata @@ -37,7 +37,7 @@ services: # nim-llm: # image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0 # runtime: nvidia - # ulimits: + # ulimits: # memlock: -1 # stack: 67108864 # environment: @@ -65,9 +65,9 @@ services: resources: reservations: devices: - - driver: nvidia - device_ids: ['2'] - capabilities: [gpu] + - driver: nvidia + device_ids: ['2'] + capabilities: [gpu] image: morpheus_jupyter container_name: jupyter_security_analyst ports: diff --git a/community/event-driven-rag-cve-analysis/docker-compose.yml b/community/event-driven-rag-cve-analysis/docker-compose.yml index cba6e620..6036c714 100755 --- a/community/event-driven-rag-cve-analysis/docker-compose.yml +++ b/community/event-driven-rag-cve-analysis/docker-compose.yml @@ -38,7 +38,7 @@ services: reservations: devices: - driver: nvidia - capabilities: [ gpu ] + capabilities: [gpu] networks: - proxy # Uncomment if the .env file issue is resolved: https://github.com/docker/compose/issues/9181#issuecomment-1996016211 diff --git a/community/event-driven-rag-cve-analysis/requirements.yaml b/community/event-driven-rag-cve-analysis/requirements.yaml index 22d00294..6651e4a3 100644 --- a/community/event-driven-rag-cve-analysis/requirements.yaml +++ b/community/event-driven-rag-cve-analysis/requirements.yaml @@ -37,8 +37,8 @@ dependencies: ####### Pip Dependencies (keep sorted!) ####### - pip - pip: - - google-search-results==2.4 - - langchain-nvidia-ai-endpoints==0.0.3 - - langchain==0.1.9 - - nemollm==0.3.5 - - pydpkg==1.9.2 + - google-search-results==2.4 + - langchain-nvidia-ai-endpoints==0.0.3 + - langchain==0.1.9 + - nemollm==0.3.5 + - pydpkg==1.9.2 diff --git a/community/fm-asr-streaming-rag/deploy/docker-compose-file-replay.yaml b/community/fm-asr-streaming-rag/deploy/docker-compose-file-replay.yaml index 6bc2d340..1ec2add4 100644 --- a/community/fm-asr-streaming-rag/deploy/docker-compose-file-replay.yaml +++ b/community/fm-asr-streaming-rag/deploy/docker-compose-file-replay.yaml @@ -51,4 +51,4 @@ services: devices: - driver: nvidia device_ids: ['${REPLAY_GPU:-0}'] - capabilities: [gpu] \ No newline at end of file + capabilities: [gpu] diff --git a/community/fm-asr-streaming-rag/deploy/docker-compose-fm-asr.yaml b/community/fm-asr-streaming-rag/deploy/docker-compose-fm-asr.yaml index d150d6b5..d188a7bb 100644 --- a/community/fm-asr-streaming-rag/deploy/docker-compose-fm-asr.yaml +++ b/community/fm-asr-streaming-rag/deploy/docker-compose-fm-asr.yaml @@ -76,9 +76,9 @@ services: resources: reservations: devices: - - driver: nvidia - device_ids: ['${FRONTEND_GPU:-0}'] - capabilities: [gpu] + - driver: nvidia + device_ids: ['${FRONTEND_GPU:-0}'] + capabilities: [gpu] server: container_name: fm-asr-chain-server @@ -110,6 +110,6 @@ services: resources: reservations: devices: - - driver: nvidia - device_ids: ['${CHAIN_GPU:-0}'] - capabilities: [gpu] \ No newline at end of file + - driver: nvidia + device_ids: ['${CHAIN_GPU:-0}'] + capabilities: [gpu] diff --git a/community/fm-asr-streaming-rag/deploy/docker-compose-milvus-standalone.yaml b/community/fm-asr-streaming-rag/deploy/docker-compose-milvus-standalone.yaml index 4664fda9..e8161a24 100644 --- a/community/fm-asr-streaming-rag/deploy/docker-compose-milvus-standalone.yaml +++ b/community/fm-asr-streaming-rag/deploy/docker-compose-milvus-standalone.yaml @@ -23,9 +23,7 @@ services: - ETCD_SNAPSHOT_COUNT=50000 volumes: - ${NEMO_RET_DIR?:source compose.env}/volumes/etcd:/etcd:Z - command: - etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls - http://0.0.0.0:2379 --data-dir /etcd + command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd healthcheck: test: ["CMD", "etcdctl", "endpoint", "health"] interval: 30s diff --git a/community/fm-asr-streaming-rag/deploy/docker-compose-nemo-retriever.yaml b/community/fm-asr-streaming-rag/deploy/docker-compose-nemo-retriever.yaml index 78fca84d..565f2304 100644 --- a/community/fm-asr-streaming-rag/deploy/docker-compose-nemo-retriever.yaml +++ b/community/fm-asr-streaming-rag/deploy/docker-compose-nemo-retriever.yaml @@ -76,8 +76,7 @@ services: command: - "/bin/sh" - "-c" - - "opentelemetry-instrument \ - uvicorn retrieval.main:app --host 0.0.0.0 --port 8000" + - "opentelemetry-instrument uvicorn retrieval.main:app --host 0.0.0.0 --port 8000" healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] @@ -99,9 +98,7 @@ services: image: nvcr.io/ohlfw0olaadg/ea-participants/nemo-retriever-embedding-microservice:24.02 ports: - "${NEMO_EMBEDDING_PORT:-1985}:8080" - command: ./bin/web -p 8080 -n 1 -g - model_config_templates/NV-Embed-QA_template.yaml -c - /models/nv-embed-qa_v4/NV-Embed-QA-4.nemo + command: ./bin/web -p 8080 -n 1 -g model_config_templates/NV-Embed-QA_template.yaml -c /models/nv-embed-qa_v4/NV-Embed-QA-4.nemo volumes: - ${NEMO_RET_DIR?:source compose.env}/models:/models:ro healthcheck: @@ -149,9 +146,7 @@ services: - ETCD_SNAPSHOT_COUNT=50000 volumes: - ${NEMO_RET_DIR?:source compose.env}/volumes/etcd:/etcd:Z - command: - etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls - http://0.0.0.0:2379 --data-dir /etcd + command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd healthcheck: test: ["CMD", "etcdctl", "endpoint", "health"] interval: 30s @@ -254,11 +249,11 @@ services: volumes: - ${NEMO_RET_DIR?:source compose.env}/config/otel-collector-config.yaml:/etc/otel-collector-config.yaml ports: - - "13133:13133" # health check - - "4317:4317" # OTLP over gRPC receiver - - "55679:55679" # UI + - "13133:13133" # health check + - "4317:4317" # OTLP over gRPC receiver + - "55679:55679" # UI zipkin: image: openzipkin/zipkin:3.0.6 ports: - - "9411:9411" # Zipkin UI and API + - "9411:9411" # Zipkin UI and API diff --git a/community/fm-asr-streaming-rag/deploy/docker-compose-nim-build.yaml b/community/fm-asr-streaming-rag/deploy/docker-compose-nim-build.yaml index 76b4399d..b65312a3 100644 --- a/community/fm-asr-streaming-rag/deploy/docker-compose-nim-build.yaml +++ b/community/fm-asr-streaming-rag/deploy/docker-compose-nim-build.yaml @@ -51,4 +51,4 @@ services: devices: - driver: nvidia device_ids: ['${NIM_GPU:-0}'] - capabilities: [gpu] \ No newline at end of file + capabilities: [gpu] diff --git a/community/fm-asr-streaming-rag/deploy/docker-compose-nim-llm.yaml b/community/fm-asr-streaming-rag/deploy/docker-compose-nim-llm.yaml index e4404180..792a7aaa 100644 --- a/community/fm-asr-streaming-rag/deploy/docker-compose-nim-llm.yaml +++ b/community/fm-asr-streaming-rag/deploy/docker-compose-nim-llm.yaml @@ -50,4 +50,4 @@ services: devices: - driver: nvidia device_ids: ['${NIM_GPU:-0}'] - capabilities: [gpu] \ No newline at end of file + capabilities: [gpu] diff --git a/community/fm-asr-streaming-rag/nemo-retriever/config/milvus-config.yaml b/community/fm-asr-streaming-rag/nemo-retriever/config/milvus-config.yaml index 160b68d2..07b653f9 100644 --- a/community/fm-asr-streaming-rag/nemo-retriever/config/milvus-config.yaml +++ b/community/fm-asr-streaming-rag/nemo-retriever/config/milvus-config.yaml @@ -17,11 +17,11 @@ # Related configuration of etcd, used to store Milvus metadata & service discovery. etcd: endpoints: localhost:2379 - rootPath: by-dev # The root path where data is stored in etcd - metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath - kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath + rootPath: by-dev # The root path where data is stored in etcd + metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath + kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath log: - level: error # Only supports debug, info, warn, error, panic, or fatal. Default 'info'. + level: error # Only supports debug, info, warn, error, panic, or fatal. Default 'info'. # path is one of: # - "default" as os.Stderr, # - "stderr" as os.Stderr, @@ -30,18 +30,18 @@ etcd: # please adjust in embedded Milvus: /tmp/milvus/logs/etcd.log path: stdout ssl: - enabled: false # Whether to support ETCD secure connection mode - tlsCert: /path/to/etcd-client.pem # path to your cert file - tlsKey: /path/to/etcd-client-key.pem # path to your key file - tlsCACert: /path/to/ca.pem # path to your CACert file + enabled: false # Whether to support ETCD secure connection mode + tlsCert: /path/to/etcd-client.pem # path to your cert file + tlsKey: /path/to/etcd-client-key.pem # path to your key file + tlsCACert: /path/to/ca.pem # path to your CACert file # TLS min version # Optional values: 1.0, 1.1, 1.2, 1.3。 # We recommend using version 1.2 and above. tlsMinVersion: 1.3 use: - embed: false # Whether to enable embedded Etcd (an in-process EtcdServer). + embed: false # Whether to enable embedded Etcd (an in-process EtcdServer). data: - dir: default.etcd # Embedded Etcd only. please adjust in embedded Milvus: /tmp/milvus/etcdData/ + dir: default.etcd # Embedded Etcd only. please adjust in embedded Milvus: /tmp/milvus/etcdData/ metastore: # Default value: etcd @@ -54,23 +54,23 @@ metastore: tikv: # Note that the default pd port of tikv is 2379, which conflicts with etcd. endpoints: 127.0.0.1:2389 - rootPath: by-dev # The root path where data is stored - metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath - kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath + rootPath: by-dev # The root path where data is stored + metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath + kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath localStorage: - path: /var/lib/milvus/data/ # please adjust in embedded Milvus: /tmp/milvus/data/ + path: /var/lib/milvus/data/ # please adjust in embedded Milvus: /tmp/milvus/data/ # Related configuration of MinIO/S3/GCS or any other service supports S3 API, which is responsible for data persistence for Milvus. # We refer to the storage service as MinIO/S3 in the following description for simplicity. minio: - address: localhost # Address of MinIO/S3 - port: 9000 # Port of MinIO/S3 - accessKeyID: minioadmin # accessKeyID of MinIO/S3 - secretAccessKey: minioadmin # MinIO/S3 encryption string - useSSL: false # Access to MinIO/S3 with SSL - bucketName: a-bucket # Bucket name in MinIO/S3 - rootPath: files # The root path where the message is stored in MinIO/S3 + address: localhost # Address of MinIO/S3 + port: 9000 # Port of MinIO/S3 + accessKeyID: minioadmin # accessKeyID of MinIO/S3 + secretAccessKey: minioadmin # MinIO/S3 encryption string + useSSL: false # Access to MinIO/S3 with SSL + bucketName: a-bucket # Bucket name in MinIO/S3 + rootPath: files # The root path where the message is stored in MinIO/S3 # Whether to useIAM role to access S3/GCS instead of access/secret keys # For more information, refer to # aws: https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use.html @@ -109,14 +109,14 @@ mq: # Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services. pulsar: - address: localhost # Address of pulsar - port: 6650 # Port of Pulsar - webport: 80 # Web port of pulsar, if you connect directly without proxy, should use 8080 - maxMessageSize: 5242880 # 5 * 1024 * 1024 Bytes, Maximum size of each message in pulsar. + address: localhost # Address of pulsar + port: 6650 # Port of Pulsar + webport: 80 # Web port of pulsar, if you connect directly without proxy, should use 8080 + maxMessageSize: 5242880 # 5 * 1024 * 1024 Bytes, Maximum size of each message in pulsar. tenant: public namespace: default - requestTimeout: 60 # pulsar client global request timeout in seconds - enableClientMetrics: false # Whether to register pulsar client metrics into milvus metrics path. + requestTimeout: 60 # pulsar client global request timeout in seconds + enableClientMetrics: false # Whether to register pulsar client metrics into milvus metrics path. # If you want to enable kafka, needs to comment the pulsar configs # kafka: @@ -131,11 +131,11 @@ rocksmq: # The path where the message is stored in rocksmq # please adjust in embedded Milvus: /tmp/milvus/rdb_data path: /var/lib/milvus/rdb_data - lrucacheratio: 0.06 # rocksdb cache memory ratio - rocksmqPageSize: 67108864 # 64 MB, 64 * 1024 * 1024 bytes, The size of each page of messages in rocksmq - retentionTimeInMinutes: 4320 # 3 days, 3 * 24 * 60 minutes, The retention time of the message in rocksmq. - retentionSizeInMB: 8192 # 8 GB, 8 * 1024 MB, The retention size of the message in rocksmq. - compactionInterval: 86400 # 1 day, trigger rocksdb compaction every day to remove deleted data + lrucacheratio: 0.06 # rocksdb cache memory ratio + rocksmqPageSize: 67108864 # 64 MB, 64 * 1024 * 1024 bytes, The size of each page of messages in rocksmq + retentionTimeInMinutes: 4320 # 3 days, 3 * 24 * 60 minutes, The retention time of the message in rocksmq. + retentionSizeInMB: 8192 # 8 GB, 8 * 1024 MB, The retention size of the message in rocksmq. + compactionInterval: 86400 # 1 day, trigger rocksdb compaction every day to remove deleted data # compaction compression type, only support use 0,7. # 0 means not compress, 7 will use zstd # len of types means num of rocksdb level. @@ -144,36 +144,36 @@ rocksmq: # natsmq configuration. # more detail: https://docs.nats.io/running-a-nats-service/configuration natsmq: - server: # server side configuration for natsmq. - port: 4222 # 4222 by default, Port for nats server listening. - storeDir: /var/lib/milvus/nats # /var/lib/milvus/nats by default, directory to use for JetStream storage of nats. - maxFileStore: 17179869184 # (B) 16GB by default, Maximum size of the 'file' storage. - maxPayload: 8388608 # (B) 8MB by default, Maximum number of bytes in a message payload. - maxPending: 67108864 # (B) 64MB by default, Maximum number of bytes buffered for a connection Applies to client connections. - initializeTimeout: 4000 # (ms) 4s by default, waiting for initialization of natsmq finished. + server: # server side configuration for natsmq. + port: 4222 # 4222 by default, Port for nats server listening. + storeDir: /var/lib/milvus/nats # /var/lib/milvus/nats by default, directory to use for JetStream storage of nats. + maxFileStore: 17179869184 # (B) 16GB by default, Maximum size of the 'file' storage. + maxPayload: 8388608 # (B) 8MB by default, Maximum number of bytes in a message payload. + maxPending: 67108864 # (B) 64MB by default, Maximum number of bytes buffered for a connection Applies to client connections. + initializeTimeout: 4000 # (ms) 4s by default, waiting for initialization of natsmq finished. monitor: - trace: false # false by default, If true enable protocol trace log messages. - debug: false # false by default, If true enable debug log messages. - logTime: true # true by default, If set to false, log without timestamps. - logFile: /tmp/milvus/logs/nats.log # /tmp/milvus/logs/nats.log by default, Log file path relative to .. of milvus binary if use relative path. - logSizeLimit: 536870912 # (B) 512MB by default, Size in bytes after the log file rolls over to a new one. + trace: false # false by default, If true enable protocol trace log messages. + debug: false # false by default, If true enable debug log messages. + logTime: true # true by default, If set to false, log without timestamps. + logFile: /tmp/milvus/logs/nats.log # /tmp/milvus/logs/nats.log by default, Log file path relative to .. of milvus binary if use relative path. + logSizeLimit: 536870912 # (B) 512MB by default, Size in bytes after the log file rolls over to a new one. retention: - maxAge: 4320 # (min) 3 days by default, Maximum age of any message in the P-channel. - maxBytes: # (B) None by default, How many bytes the single P-channel may contain. Removing oldest messages if the P-channel exceeds this size. - maxMsgs: # None by default, How many message the single P-channel may contain. Removing oldest messages if the P-channel exceeds this limit. + maxAge: 4320 # (min) 3 days by default, Maximum age of any message in the P-channel. + maxBytes: # (B) None by default, How many bytes the single P-channel may contain. Removing oldest messages if the P-channel exceeds this size. + maxMsgs: # None by default, How many message the single P-channel may contain. Removing oldest messages if the P-channel exceeds this limit. # Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests rootCoord: - dmlChannelNum: 16 # The number of dml channels created at system startup - maxDatabaseNum: 64 # Maximum number of database - maxPartitionNum: 4096 # Maximum number of partitions in a collection - minSegmentSizeToEnableIndex: 1024 # It's a threshold. When the segment size is less than this value, the segment will not be indexed - importTaskExpiration: 900 # (in seconds) Duration after which an import task will expire (be killed). Default 900 seconds (15 minutes). - importTaskRetention: 86400 # (in seconds) Milvus will keep the record of import tasks for at least `importTaskRetention` seconds. Default 86400, seconds (24 hours). + dmlChannelNum: 16 # The number of dml channels created at system startup + maxDatabaseNum: 64 # Maximum number of database + maxPartitionNum: 4096 # Maximum number of partitions in a collection + minSegmentSizeToEnableIndex: 1024 # It's a threshold. When the segment size is less than this value, the segment will not be indexed + importTaskExpiration: 900 # (in seconds) Duration after which an import task will expire (be killed). Default 900 seconds (15 minutes). + importTaskRetention: 86400 # (in seconds) Milvus will keep the record of import tasks for at least `importTaskRetention` seconds. Default 86400, seconds (24 hours). enableActiveStandby: false # can specify ip for example # ip: 127.0.0.1 - ip: # if not specify address, will use the first unicastable address as local ip + ip: # if not specify address, will use the first unicastable address as local ip port: 53100 grpc: serverMaxSendSize: 536870912 @@ -183,33 +183,33 @@ rootCoord: # Related configuration of proxy, used to validate client requests and reduce the returned results. proxy: - timeTickInterval: 200 # ms, the interval that proxy synchronize the time tick - healthCheckTimeout: 3000 # ms, the interval that to do component healthy check + timeTickInterval: 200 # ms, the interval that proxy synchronize the time tick + healthCheckTimeout: 3000 # ms, the interval that to do component healthy check msgStream: timeTick: bufSize: 512 - maxNameLength: 255 # Maximum length of name for a collection or alias + maxNameLength: 255 # Maximum length of name for a collection or alias # Maximum number of fields in a collection. # As of today (2.2.0 and after) it is strongly DISCOURAGED to set maxFieldNum >= 64. # So adjust at your risk! maxFieldNum: 64 - maxShardNum: 16 # Maximum number of shards in a collection - maxDimension: 32768 # Maximum dimension of a vector + maxShardNum: 16 # Maximum number of shards in a collection + maxDimension: 32768 # Maximum dimension of a vector # Whether to produce gin logs.\n # please adjust in embedded Milvus: false ginLogging: true - maxTaskNum: 1024 # max task number of proxy task queue + maxTaskNum: 1024 # max task number of proxy task queue accessLog: enable: false - filename: "" # Log filename, leave empty to use stdout. + filename: "" # Log filename, leave empty to use stdout. # localPath: /tmp/milvus_accesslog // log file rootpath # maxSize: 64 # max log file size of singal log file to trigger rotate. http: - enabled: true # Whether to enable the http server - debug_mode: false # Whether to enable http server debug mode + enabled: true # Whether to enable the http server + debug_mode: false # Whether to enable http server debug mode # can specify ip for example # ip: 127.0.0.1 - ip: # if not specify address, will use the first unicastable address as local ip + ip: # if not specify address, will use the first unicastable address as local ip port: 19530 internalPort: 19529 grpc: @@ -220,25 +220,25 @@ proxy: # Related configuration of queryCoord, used to manage topology and load balancing for the query nodes, and handoff from growing segments to sealed segments. queryCoord: - autoHandoff: true # Enable auto handoff - autoBalance: false # Enable auto balance - balancer: ScoreBasedBalancer # Balancer to use - globalRowCountFactor: 0.1 # expert parameters, only used by scoreBasedBalancer - scoreUnbalanceTolerationFactor: 0.05 # expert parameters, only used by scoreBasedBalancer - reverseUnBalanceTolerationFactor: 1.3 #expert parameters, only used by scoreBasedBalancer - overloadedMemoryThresholdPercentage: 90 # The threshold percentage that memory overload + autoHandoff: true # Enable auto handoff + autoBalance: false # Enable auto balance + balancer: ScoreBasedBalancer # Balancer to use + globalRowCountFactor: 0.1 # expert parameters, only used by scoreBasedBalancer + scoreUnbalanceTolerationFactor: 0.05 # expert parameters, only used by scoreBasedBalancer + reverseUnBalanceTolerationFactor: 1.3 # expert parameters, only used by scoreBasedBalancer + overloadedMemoryThresholdPercentage: 90 # The threshold percentage that memory overload balanceIntervalSeconds: 60 memoryUsageMaxDifferencePercentage: 30 checkInterval: 1000 - channelTaskTimeout: 60000 # 1 minute - segmentTaskTimeout: 120000 # 2 minute + channelTaskTimeout: 60000 # 1 minute + segmentTaskTimeout: 120000 # 2 minute distPullInterval: 500 - heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available + heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available loadTimeoutSeconds: 600 checkHandoffInterval: 5000 # can specify ip for example # ip: 127.0.0.1 - ip: # if not specify address, will use the first unicastable address as local ip + ip: # if not specify address, will use the first unicastable address as local ip port: 19531 grpc: serverMaxSendSize: 536870912 @@ -247,35 +247,35 @@ queryCoord: clientMaxRecvSize: 268435456 taskMergeCap: 1 taskExecutionCap: 256 - enableActiveStandby: false # Enable active-standby - brokerTimeout: 5000 # broker rpc timeout in milliseconds + enableActiveStandby: false # Enable active-standby + brokerTimeout: 5000 # broker rpc timeout in milliseconds # Related configuration of queryNode, used to run hybrid search between vector and scalar data. queryNode: dataSync: flowGraph: - maxQueueLength: 16 # Maximum length of task queue in flowgraph - maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph + maxQueueLength: 16 # Maximum length of task queue in flowgraph + maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph stats: - publishInterval: 1000 # Interval for querynode to report node information (milliseconds) + publishInterval: 1000 # Interval for querynode to report node information (milliseconds) segcore: - cgoPoolSizeRatio: 2.0 # cgo pool size ratio to max read concurrency + cgoPoolSizeRatio: 2.0 # cgo pool size ratio to max read concurrency knowhereThreadPoolNumRatio: 4 # Use more threads to make better use of SSD throughput in disk index. # This parameter is only useful when enable-disk = true. # And this value should be a number greater than 1 and less than 32. - chunkRows: 1024 # The number of vectors in a chunk. - growing: # growing a vector index for growing segment to accelerate search + chunkRows: 1024 # The number of vectors in a chunk. + growing: # growing a vector index for growing segment to accelerate search enableIndex: true - nlist: 128 # growing segment index nlist - nprobe: 16 # nprobe to search growing segment, based on your accuracy requirement, must smaller than nlist - loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments - enableDisk: false # enable querynode load disk index, and search on disk index + nlist: 128 # growing segment index nlist + nprobe: 16 # nprobe to search growing segment, based on your accuracy requirement, must smaller than nlist + loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments + enableDisk: false # enable querynode load disk index, and search on disk index maxDiskUsagePercentage: 95 cache: - enabled: true # deprecated, TODO: remove it - memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024 # deprecated, TODO: remove it - readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed` + enabled: true # deprecated, TODO: remove it + memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024 # deprecated, TODO: remove it + readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed` grouping: enabled: true maxNQ: 1000 @@ -289,7 +289,7 @@ queryNode: # Max read concurrency must greater than or equal to 1, and less than or equal to runtime.NumCPU * 100. # (0, 100] maxReadConcurrentRatio: 1 - cpuRatio: 10 # ratio used to estimate read task cpu usage. + cpuRatio: 10 # ratio used to estimate read task cpu usage. maxTimestampLag: 86400 # read task schedule policy: fifo(by default), user-task-polling. scheduleReadPolicy: @@ -303,13 +303,13 @@ queryNode: name: fifo maxPendingTask: 10240 # user-task-polling configure: - taskQueueExpire: 60 # 1 min by default, expire time of inner user task queue since queue is empty. - enableCrossUserGrouping: false # false by default Enable Cross user grouping when using user-task-polling policy. (close it if task of any user can not merge others). - maxPendingTaskPerUser: 1024 # 50 by default, max pending task in scheduler per user. + taskQueueExpire: 60 # 1 min by default, expire time of inner user task queue since queue is empty. + enableCrossUserGrouping: false # false by default Enable Cross user grouping when using user-task-polling policy. (close it if task of any user can not merge others). + maxPendingTaskPerUser: 1024 # 50 by default, max pending task in scheduler per user. # can specify ip for example # ip: 127.0.0.1 - ip: # if not specify address, will use the first unicastable address as local ip + ip: # if not specify address, will use the first unicastable address as local ip port: 21123 grpc: serverMaxSendSize: 536870912 @@ -324,16 +324,16 @@ indexCoord: withCred: false nodeID: 0 segment: - minSegmentNumRowsToEnableIndex: 1024 # It's a threshold. When the segment num rows is less than this value, the segment will not be indexed + minSegmentNumRowsToEnableIndex: 1024 # It's a threshold. When the segment num rows is less than this value, the segment will not be indexed indexNode: scheduler: buildParallel: 1 - enableDisk: true # enable index node build disk vector index + enableDisk: true # enable index node build disk vector index maxDiskUsagePercentage: 95 # can specify ip for example # ip: 127.0.0.1 - ip: # if not specify address, will use the first unicastable address as local ip + ip: # if not specify address, will use the first unicastable address as local ip port: 21121 grpc: serverMaxSendSize: 536870912 @@ -343,12 +343,12 @@ indexNode: dataCoord: channel: - watchTimeoutInterval: 300 # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer. - balanceSilentDuration: 300 # The duration before the channelBalancer on datacoord to run - balanceInterval: 360 #The interval for the channelBalancer on datacoord to check balance status + watchTimeoutInterval: 300 # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer. + balanceSilentDuration: 300 # The duration before the channelBalancer on datacoord to run + balanceInterval: 360 # The interval for the channelBalancer on datacoord to check balance status segment: - maxSize: 512 # Maximum size of a segment in MB - diskSegmentMaxSize: 2048 # Maximum size of a segment in MB for collection which has Disk index + maxSize: 512 # Maximum size of a segment in MB + diskSegmentMaxSize: 2048 # Maximum size of a segment in MB for collection which has Disk index sealProportion: 0.23 # The time of the assignment expiration in ms # Warning! this parameter is an expert variable and closely related to data integrity. Without specific @@ -356,16 +356,16 @@ dataCoord: # this parameter, make sure that the newly changed value is larger than the previous value used before restart # otherwise there could be a large possibility of data loss assignmentExpiration: 2000 - maxLife: 86400 # The max lifetime of segment in seconds, 24*60*60 + maxLife: 86400 # The max lifetime of segment in seconds, 24*60*60 # If a segment didn't accept dml records in maxIdleTime and the size of segment is greater than # minSizeFromIdleToSealed, Milvus will automatically seal it. # The max idle time of segment in seconds, 10*60. maxIdleTime: 600 - minSizeFromIdleToSealed: 16 # The min size in MB of segment which can be idle from sealed. + minSizeFromIdleToSealed: 16 # The min size in MB of segment which can be idle from sealed. # The max number of binlog file for one segment, the segment will be sealed if # the number of binlog file reaches to max value. maxBinlogFileNumber: 32 - smallProportion: 0.5 # The segment is considered as "small segment" when its # of rows is smaller than + smallProportion: 0.5 # The segment is considered as "small segment" when its # of rows is smaller than # (smallProportion * segment max # of rows). # A compaction will happen on small segments if the segment after compaction will have compactableProportion: 0.85 @@ -373,22 +373,22 @@ dataCoord: # MUST BE GREATER THAN OR EQUAL TO !!! # During compaction, the size of segment # of rows is able to exceed segment max # of rows by (expansionRate-1) * 100%. expansionRate: 1.25 - enableCompaction: true # Enable data segment compaction + enableCompaction: true # Enable data segment compaction compaction: enableAutoCompaction: true - rpcTimeout: 10 # compaction rpc request timeout in seconds - maxParallelTaskNum: 10 # max parallel compaction task number + rpcTimeout: 10 # compaction rpc request timeout in seconds + maxParallelTaskNum: 10 # max parallel compaction task number indexBasedCompaction: true enableGarbageCollection: true gc: - interval: 3600 # gc interval in seconds - missingTolerance: 3600 # file meta missing tolerance duration in seconds, 3600 - dropTolerance: 10800 # file belongs to dropped entity tolerance duration in seconds. 10800 + interval: 3600 # gc interval in seconds + missingTolerance: 3600 # file meta missing tolerance duration in seconds, 3600 + dropTolerance: 10800 # file belongs to dropped entity tolerance duration in seconds. 10800 enableActiveStandby: false # can specify ip for example # ip: 127.0.0.1 - ip: # if not specify address, will use the first unicastable address as local ip + ip: # if not specify address, will use the first unicastable address as local ip port: 13333 grpc: serverMaxSendSize: 536870912 @@ -399,16 +399,16 @@ dataCoord: dataNode: dataSync: flowGraph: - maxQueueLength: 16 # Maximum length of task queue in flowgraph - maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph - maxParallelSyncTaskNum: 6 # Maximum number of sync tasks executed in parallel in each flush manager + maxQueueLength: 16 # Maximum length of task queue in flowgraph + maxParallelism: 1024 # Maximum number of tasks executed in parallel in the flowgraph + maxParallelSyncTaskNum: 6 # Maximum number of sync tasks executed in parallel in each flush manager segment: - insertBufSize: 16777216 # Max buffer size to flush for a single segment. - deleteBufBytes: 67108864 # Max buffer size to flush del for a single channel - syncPeriod: 600 # The period to sync segments if buffer is not empty. + insertBufSize: 16777216 # Max buffer size to flush for a single segment. + deleteBufBytes: 67108864 # Max buffer size to flush del for a single channel + syncPeriod: 600 # The period to sync segments if buffer is not empty. # can specify ip for example # ip: 127.0.0.1 - ip: # if not specify address, will use the first unicastable address as local ip + ip: # if not specify address, will use the first unicastable address as local ip port: 21124 grpc: serverMaxSendSize: 536870912 @@ -416,10 +416,10 @@ dataNode: clientMaxSendSize: 268435456 clientMaxRecvSize: 268435456 memory: - forceSyncEnable: true # `true` to force sync if memory usage is too high - forceSyncSegmentNum: 1 # number of segments to sync, segments with top largest buffer will be synced. - watermarkStandalone: 0.2 # memory watermark for standalone, upon reaching this watermark, segments will be synced. - watermarkCluster: 0.5 # memory watermark for cluster, upon reaching this watermark, segments will be synced. + forceSyncEnable: true # `true` to force sync if memory usage is too high + forceSyncSegmentNum: 1 # number of segments to sync, segments with top largest buffer will be synced. + watermarkStandalone: 0.2 # memory watermark for standalone, upon reaching this watermark, segments will be synced. + watermarkCluster: 0.5 # memory watermark for cluster, upon reaching this watermark, segments will be synced. timetick: byRPC: true channel: @@ -430,14 +430,14 @@ dataNode: # Configures the system log output. log: - level: error # Only supports debug, info, warn, error, panic, or fatal. Default 'info'. + level: error # Only supports debug, info, warn, error, panic, or fatal. Default 'info'. file: - rootPath: # root dir path to put logs, default "" means no log file will print. please adjust in embedded Milvus: /tmp/milvus/logs - maxSize: 300 # MB - maxAge: 10 # Maximum time for log retention in day. + rootPath: # root dir path to put logs, default "" means no log file will print. please adjust in embedded Milvus: /tmp/milvus/logs + maxSize: 300 # MB + maxAge: 10 # Maximum time for log retention in day. maxBackups: 20 - format: text # text or json - stdout: true # Stdout enable or not + format: text # text or json + stdout: true # Stdout enable or not grpc: log: @@ -450,9 +450,9 @@ grpc: keepAliveTime: 10000 keepAliveTimeout: 20000 maxMaxAttempts: 10 - initialBackOff: 0.2 # seconds - maxBackoff: 10 # seconds - backoffMultiplier: 2.0 # deprecated + initialBackOff: 0.2 # seconds + maxBackoff: 10 # seconds + backoffMultiplier: 2.0 # deprecated clientMaxSendSize: 268435456 clientMaxRecvSize: 268435456 @@ -482,14 +482,14 @@ common: queryNodeSubNamePrefix: queryNode dataCoordSubNamePrefix: dataCoord dataNodeSubNamePrefix: dataNode - defaultPartitionName: _default # default partition name for a collection - defaultIndexName: _default_idx # default index name - entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire - indexSliceSize: 16 # MB + defaultPartitionName: _default # default partition name for a collection + defaultIndexName: _default_idx # default index name + entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire + indexSliceSize: 16 # MB threadCoreCoefficient: - highPriority: 10 # This parameter specify how many times the number of threads is the number of cores in high priority thread pool - middlePriority: 5 # This parameter specify how many times the number of threads is the number of cores in middle priority thread pool - lowPriority: 1 # This parameter specify how many times the number of threads is the number of cores in low priority thread pool + highPriority: 10 # This parameter specify how many times the number of threads is the number of cores in high priority thread pool + middlePriority: 5 # This parameter specify how many times the number of threads is the number of cores in middle priority thread pool + lowPriority: 1 # This parameter specify how many times the number of threads is the number of cores in low priority thread pool DiskIndex: MaxDegree: 56 SearchListSize: 100 @@ -498,9 +498,9 @@ common: SearchCacheBudgetGBRatio: 0.1 LoadNumThreadRatio: 8 BeamWidthRatio: 4 - gracefulTime: 5000 # milliseconds. it represents the interval (in ms) by which the request arrival time needs to be subtracted in the case of Bounded Consistency. - gracefulStopTimeout: 1800 # seconds. it will force quit the server if the graceful stop process is not completed during this time. - storageType: minio # please adjust in embedded Milvus: local + gracefulTime: 5000 # milliseconds. it represents the interval (in ms) by which the request arrival time needs to be subtracted in the case of Bounded Consistency. + gracefulStopTimeout: 1800 # seconds. it will force quit the server if the graceful stop process is not completed during this time. + storageType: minio # please adjust in embedded Milvus: local # Default value: auto # Valid values: [auto, avx512, avx2, avx, sse4_2] # This configuration is only used by querynode and indexnode, it selects CPU instruction set for Searching and Index-building. @@ -512,8 +512,8 @@ common: # superUsers: root tlsMode: 0 session: - ttl: 30 # ttl value when session granting a lease to register service - retryTimes: 30 # retry times when session sending etcd requests + ttl: 30 # ttl value when session granting a lease to register service + retryTimes: 30 # retry times when session sending etcd requests # preCreatedTopic decides whether using existed topic preCreatedTopic: @@ -524,16 +524,16 @@ common: # need to set a separated topic to stand for currently consumed timestamp for each channel timeticker: "timetick-channel" - ImportMaxFileSize: 17179869184 # 16 * 1024 * 1024 * 1024 + ImportMaxFileSize: 17179869184 # 16 * 1024 * 1024 * 1024 # max file size to import for bulkInsert locks: metrics: enable: false threshold: - info: 500 # minimum milliseconds for printing durations in info level - warn: 1000 # minimum milliseconds for printing durations in warn level - ttMsgEnabled: true # Whether the instance disable sending ts messages + info: 500 # minimum milliseconds for printing durations in info level + warn: 1000 # minimum milliseconds for printing durations in warn level + ttMsgEnabled: true # Whether the instance disable sending ts messages # QuotaConfig, configurations of Milvus quota and limits. # By default, we enable: @@ -547,7 +547,7 @@ common: # 4. DQL result rate protection; # If necessary, you can also manually force to deny RW requests. quotaAndLimits: - enabled: true # `true` to enable quota and limits, `false` to disable. + enabled: true # `true` to enable quota and limits, `false` to disable. limits: maxCollectionNum: 65536 maxCollectionNumPerDB: 65536 @@ -557,49 +557,49 @@ quotaAndLimits: quotaCenterCollectInterval: 3 ddl: enabled: false - collectionRate: -1 # qps, default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection - partitionRate: -1 # qps, default no limit, rate for CreatePartition, DropPartition, LoadPartition, ReleasePartition + collectionRate: -1 # qps, default no limit, rate for CreateCollection, DropCollection, LoadCollection, ReleaseCollection + partitionRate: -1 # qps, default no limit, rate for CreatePartition, DropPartition, LoadPartition, ReleasePartition indexRate: enabled: false - max: -1 # qps, default no limit, rate for CreateIndex, DropIndex + max: -1 # qps, default no limit, rate for CreateIndex, DropIndex flushRate: enabled: false - max: -1 # qps, default no limit, rate for flush + max: -1 # qps, default no limit, rate for flush compactionRate: enabled: false - max: -1 # qps, default no limit, rate for manualCompaction + max: -1 # qps, default no limit, rate for manualCompaction dml: # dml limit rates, default no limit. # The maximum rate will not be greater than max. enabled: false insertRate: collection: - max: -1 # MB/s, default no limit - max: -1 # MB/s, default no limit + max: -1 # MB/s, default no limit + max: -1 # MB/s, default no limit upsertRate: collection: - max: -1 # MB/s, default no limit - max: -1 # MB/s, default no limit + max: -1 # MB/s, default no limit + max: -1 # MB/s, default no limit deleteRate: collection: - max: -1 # MB/s, default no limit - max: -1 # MB/s, default no limit + max: -1 # MB/s, default no limit + max: -1 # MB/s, default no limit bulkLoadRate: collection: - max: -1 # MB/s, default no limit, not support yet. TODO: limit bulkLoad rate - max: -1 # MB/s, default no limit, not support yet. TODO: limit bulkLoad rate + max: -1 # MB/s, default no limit, not support yet. TODO: limit bulkLoad rate + max: -1 # MB/s, default no limit, not support yet. TODO: limit bulkLoad rate dql: # dql limit rates, default no limit. # The maximum rate will not be greater than max. enabled: false searchRate: collection: - max: -1 # vps (vectors per second), default no limit - max: -1 # vps (vectors per second), default no limit + max: -1 # vps (vectors per second), default no limit + max: -1 # vps (vectors per second), default no limit queryRate: collection: - max: -1 # qps, default no limit - max: -1 # qps, default no limit + max: -1 # qps, default no limit + max: -1 # qps, default no limit limitWriting: # forceDeny false means dml requests are allowed (except for some # specific conditions, such as memory of nodes to water marker), true means always reject all dml requests. @@ -616,10 +616,10 @@ quotaAndLimits: # When memoryLowWaterLevel < memory usage < memoryHighWaterLevel, reduce the dml rate; # When memory usage < memoryLowWaterLevel, no action. enabled: true - dataNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in DataNodes - dataNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in DataNodes - queryNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in QueryNodes - queryNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in QueryNodes + dataNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in DataNodes + dataNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in DataNodes + queryNodeMemoryLowWaterLevel: 0.85 # (0, 1], memoryLowWaterLevel in QueryNodes + queryNodeMemoryHighWaterLevel: 0.95 # (0, 1], memoryHighWaterLevel in QueryNodes growingSegmentsSizeProtection: # No action will be taken if the growing segments size is less than the low watermark. # When the growing segments size exceeds the low watermark, the dml rate will be reduced, @@ -629,9 +629,9 @@ quotaAndLimits: lowWaterLevel: 0.2 highWaterLevel: 0.4 diskProtection: - enabled: true # When the total file size of object storage is greater than `diskQuota`, all dml requests would be rejected; - diskQuota: -1 # MB, (0, +inf), default no limit - diskQuotaPerCollection: -1 # MB, (0, +inf), default no limit + enabled: true # When the total file size of object storage is greater than `diskQuota`, all dml requests would be rejected; + diskQuota: -1 # MB, (0, +inf), default no limit + diskQuotaPerCollection: -1 # MB, (0, +inf), default no limit limitReading: # forceDeny false means dql requests are allowed (except for some # specific conditions, such as collection has been dropped), true means always reject all dql requests. @@ -669,7 +669,7 @@ trace: # Fractions >= 1 will always sample. Fractions < 0 are treated as zero. sampleFraction: 0 jaeger: - url: # "http://127.0.0.1:14268/api/traces" + url: # "http://127.0.0.1:14268/api/traces" # when exporter is jaeger should set the jaeger's URL autoIndex: diff --git a/community/fm-asr-streaming-rag/nemo-retriever/model_configs/nv-rerank-qa-mistral-4b-A6000.yaml b/community/fm-asr-streaming-rag/nemo-retriever/model_configs/nv-rerank-qa-mistral-4b-A6000.yaml index a88baa3f..6c9b550f 100644 --- a/community/fm-asr-streaming-rag/nemo-retriever/model_configs/nv-rerank-qa-mistral-4b-A6000.yaml +++ b/community/fm-asr-streaming-rag/nemo-retriever/model_configs/nv-rerank-qa-mistral-4b-A6000.yaml @@ -14,4 +14,4 @@ models: max_shapes: [64, 512] max_queue_delay_microseconds: 100 dtype: float16 - override_layernorm_precision_to_fp32: true \ No newline at end of file + override_layernorm_precision_to_fp32: true diff --git a/community/fm-asr-streaming-rag/nim/configs/mistral-7b.yaml b/community/fm-asr-streaming-rag/nim/configs/mistral-7b.yaml index b978ca5f..9756782c 100644 --- a/community/fm-asr-streaming-rag/nim/configs/mistral-7b.yaml +++ b/community/fm-asr-streaming-rag/nim/configs/mistral-7b.yaml @@ -14,7 +14,13 @@ pipeline: num_instances: 4 preprocessor: prompt_templates: - chat: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'].strip() + '\n\n' %}{% else %}{% set loop_messages = messages %}{% set system_message = '' %}{% endif %}{{ '' }}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{% set content = system_message + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}" + chat: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'].strip() + + '\n\n' %}{% else %}{% set loop_messages = messages %}{% set system_message = '' %}{% endif %}{{ '' }}{% for message + in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles + must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{% set content = system_message + + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + + '' }}{% endif %}{% endfor %}" stop_words: [] trt_llm: use: true @@ -36,4 +42,4 @@ trt_llm: enable_custom_all_reduce: 0 per_column_scaling: false kv_cache_free_gpu_mem_fraction: 0.3 - max_tokens_in_paged_kv_cache: "12288" \ No newline at end of file + max_tokens_in_paged_kv_cache: "12288" diff --git a/community/fm-asr-streaming-rag/sdr-holoscan/params.yml b/community/fm-asr-streaming-rag/sdr-holoscan/params.yml index 8059054a..f189553a 100644 --- a/community/fm-asr-streaming-rag/sdr-holoscan/params.yml +++ b/community/fm-asr-streaming-rag/sdr-holoscan/params.yml @@ -14,30 +14,30 @@ # limitations under the License. sensor: - sample_rate: 1_000_000 # Sample rate of sensor (Hz) + sample_rate: 1_000_000 # Sample rate of sensor (Hz) network_rx: - ip_addr: "0.0.0.0" - dst_port: 5005 - l4_proto: "udp" - batch_size: 47104 # Bytes to accumulate before emitting - header_bytes: 8 - max_payload_size: 1472 + ip_addr: "0.0.0.0" + dst_port: 5005 + l4_proto: "udp" + batch_size: 47104 # Bytes to accumulate before emitting + header_bytes: 8 + max_payload_size: 1472 pkt_format: - log_period: 5 # Log bandwidth processed every N (seconds) + log_period: 5 # Log bandwidth processed every N (seconds) lowpassfilt: - cutoff: 100_000 # Cutoff frequency of filter (Hz) - numtaps: 101 + cutoff: 100_000 # Cutoff frequency of filter (Hz) + numtaps: 101 resample: - sample_rate_out: 16_000 # Sample rate required by Riva ASR (16KHz PCM) - gain: 10.0 + sample_rate_out: 16_000 # Sample rate required by Riva ASR (16KHz PCM) + gain: 10.0 riva: - src_lang_code: "en-US" - uri: "localhost:50051" # Riva server URI ("riva_speech_api_port" in riva/config.sh) - automatic_punctuation: true - verbatim_transcripts: false - sample_rate: 16000 # Sample rate required by Riva ASR (16KHz PCM) \ No newline at end of file + src_lang_code: "en-US" + uri: "localhost:50051" # Riva server URI ("riva_speech_api_port" in riva/config.sh) + automatic_punctuation: true + verbatim_transcripts: false + sample_rate: 16000 # Sample rate required by Riva ASR (16KHz PCM) diff --git a/community/llm-prompt-design-helper/config.yaml b/community/llm-prompt-design-helper/config.yaml index c806dcf4..a2eae6b7 100644 --- a/community/llm-prompt-design-helper/config.yaml +++ b/community/llm-prompt-design-helper/config.yaml @@ -1,66 +1,66 @@ default: &default - system_prompt: > - You are an assistant to help answer user's question. Politely answer the question based on your knowledge. - few_shot_examples: - # - role: user - # content: | - # The odd numbers in this group add up to an even number: 4, 8, 9, 15, 12, 2, 1. + system_prompt: > + You are an assistant to help answer user's question. Politely answer the question based on your knowledge. + few_shot_examples: + # - role: user + # content: | + # The odd numbers in this group add up to an even number: 4, 8, 9, 15, 12, 2, 1. - # - role: assistant - # content: | - # The answer is False. + # - role: assistant + # content: | + # The answer is False. - # - role: user - # content: | - # The odd numbers in this group add up to an even number: 17, 10, 19, 4, 8, 12, 24. + # - role: user + # content: | + # The odd numbers in this group add up to an even number: 17, 10, 19, 4, 8, 12, 24. - # - role: assistant - # content: | - # The answer is True. - temperature: 0.0 - top_p: 0.7 - max_tokens: 1024 - seed: 42 + # - role: assistant + # content: | + # The answer is True. + temperature: 0.0 + top_p: 0.7 + max_tokens: 1024 + seed: 42 -"meta/llama3-70b-instruct": - system_prompt: > - You are an assistant to help answer user's question. Politely answer the question based on your knowledge. - # few_shot_examples: - # - role: user - # content: | - # The odd numbers in this group add up to an even number: 4, 8, 9, 15, 12, 2, 1. +"meta/llama3-70b-instruct": + system_prompt: > + You are an assistant to help answer user's question. Politely answer the question based on your knowledge. + # few_shot_examples: + # - role: user + # content: | + # The odd numbers in this group add up to an even number: 4, 8, 9, 15, 12, 2, 1. - # - role: assistant - # content: | - # The answer is False. + # - role: assistant + # content: | + # The answer is False. - # - role: user - # content: | - # The odd numbers in this group add up to an even number: 17, 10, 19, 4, 8, 12, 24. + # - role: user + # content: | + # The odd numbers in this group add up to an even number: 17, 10, 19, 4, 8, 12, 24. - # - role: assistant - # content: | - # The answer is True. - temperature: 0.0 - top_p: 0.7 - max_tokens: 1024 - seed: 42 + # - role: assistant + # content: | + # The answer is True. + temperature: 0.0 + top_p: 0.7 + max_tokens: 1024 + seed: 42 "nvidia/nemotron-4-340b-instruct": - system_prompt: > - You are an assistant to help answer user's question. Politely answer the question based on your knowledge. - few_shot_examples: - - temperature: 0.1 - top_p: 0.7 - max_tokens: 1024 - seed: 42 + system_prompt: > + You are an assistant to help answer user's question. Politely answer the question based on your knowledge. + few_shot_examples: + + temperature: 0.1 + top_p: 0.7 + max_tokens: 1024 + seed: 42 "microsoft/phi-3-mini-128k-instruct": - system_prompt: > - You are an assistant to help answer user's question. Politely answer the question based on your knowledge. - few_shot_examples: - - temperature: 0.1 - top_p: 0.7 - max_tokens: 1024 - seed: 42 \ No newline at end of file + system_prompt: > + You are an assistant to help answer user's question. Politely answer the question based on your knowledge. + few_shot_examples: + + temperature: 0.1 + top_p: 0.7 + max_tokens: 1024 + seed: 42 diff --git a/community/oran-chatbot-multimodal/config.yaml b/community/oran-chatbot-multimodal/config.yaml index 08fcb163..754da5b9 100644 --- a/community/oran-chatbot-multimodal/config.yaml +++ b/community/oran-chatbot-multimodal/config.yaml @@ -1,6 +1,6 @@ ## Default settings nvidia_api_key: "nvapi--***" -## Set these to required models endpoints from NVIDIA NGC +## Set these to required models endpoints from NVIDIA NGC llm_model: "mistralai/mixtral-8x7b-instruct-v0.1" embedding_model: "nvidia/nv-embedqa-e5-v5" reranker_model: "cross-encoder/ms-marco-MiniLM-L-6-v2" diff --git a/community/rag-developer-chatbot/docker-compose-dev-rag.yaml b/community/rag-developer-chatbot/docker-compose-dev-rag.yaml index ac0e9286..ddfae574 100644 --- a/community/rag-developer-chatbot/docker-compose-dev-rag.yaml +++ b/community/rag-developer-chatbot/docker-compose-dev-rag.yaml @@ -7,13 +7,13 @@ services: context: . dockerfile: Dockerfile.notebook ports: - - "8888:8888" - - "7860:7860" + - "8888:8888" + - "7860:7860" expose: - - "8888" - - "7860" + - "8888" + - "7860" env_file: - - compose.env + - compose.env deploy: resources: reservations: diff --git a/community/streaming_ingest_rag/docker-compose.yml b/community/streaming_ingest_rag/docker-compose.yml index eb4946cf..d7111b26 100644 --- a/community/streaming_ingest_rag/docker-compose.yml +++ b/community/streaming_ingest_rag/docker-compose.yml @@ -64,7 +64,7 @@ services: command: ["milvus", "run", "standalone"] hostname: milvus security_opt: - - seccomp:unconfined + - seccomp:unconfined environment: ETCD_ENDPOINTS: etcd:2379 MINIO_ADDRESS: minio:9000 @@ -146,7 +146,7 @@ services: depends_on: - kafka container_name: init-kafka - entrypoint: [ '/bin/sh', '-c' ] + entrypoint: ['/bin/sh', '-c'] command: | " # blocks until kafka is reachable @@ -160,7 +160,7 @@ services: kafka-topics --bootstrap-server kafka:19092 --list " networks: - - default + - default streaming-ingest-dev: restart: always @@ -177,9 +177,9 @@ services: resources: reservations: devices: - - driver: nvidia - device_ids: ['0'] - capabilities: [gpu] + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] image: streaming_ingest_morpheus_jupyter container_name: streaming-ingest-dev ports: @@ -215,9 +215,9 @@ services: resources: reservations: devices: - - driver: nvidia - device_ids: ['0'] - capabilities: [gpu] + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] networks: - default environment: @@ -268,14 +268,14 @@ services: "--model-control-mode=explicit", "--load-model", "all-MiniLM-L6-v2", - ] + ] deploy: resources: reservations: devices: - - driver: nvidia - device_ids: ['0'] - capabilities: [gpu] + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-f", "localhost:8000/v2/health/ready"] interval: 30s diff --git a/community/streaming_ingest_rag/docker/conda/environments/conda_env.yaml b/community/streaming_ingest_rag/docker/conda/environments/conda_env.yaml index 3eee24c8..20c350f2 100644 --- a/community/streaming_ingest_rag/docker/conda/environments/conda_env.yaml +++ b/community/streaming_ingest_rag/docker/conda/environments/conda_env.yaml @@ -18,7 +18,7 @@ channels: - defaults dependencies: - arxiv=1.4 - - onnx # required for triton model export + - onnx # required for triton model export - pip - pypdf=3.16 - newspaper3k=0.2 @@ -35,9 +35,9 @@ dependencies: ####### Pip Dependencies (keep sorted!) ####### - pip: - - farm-haystack[file-conversion] - - grpcio-status==1.58 # To keep in sync with 1.58 grpcio which is installed for Morpheus - - langchain==0.0.310 - - pymilvus==2.3.2 # The conda package is woefully out of date and incorrect - - sentence_transformers==2.3.0 - - PyMuPDF==1.23.21 + - farm-haystack[file-conversion] + - grpcio-status==1.58 # To keep in sync with 1.58 grpcio which is installed for Morpheus + - langchain==0.0.310 + - pymilvus==2.3.2 # The conda package is woefully out of date and incorrect + - sentence_transformers==2.3.0 + - PyMuPDF==1.23.21 diff --git a/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/kafka_config.yaml b/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/kafka_config.yaml index 95b77972..36026793 100644 --- a/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/kafka_config.yaml +++ b/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/kafka_config.yaml @@ -38,7 +38,7 @@ vdb_pipeline: module_output_id: "output" transform_type: web_scraper deserialize_config: - output_batch_size: 2048 # Number of chunked documents per output batch + output_batch_size: 2048 # Number of chunked documents per output batch kafka_config: max_batch_size: 64 bootstrap_servers: "kafka:19092" @@ -65,13 +65,13 @@ vdb_pipeline: config: stage_config: enable_monitor: true - run_indefinitely: true # TODO map to kafka source + run_indefinitely: true # TODO map to kafka source namespace: "morpheus_examples_llm" module_id: "kafka_source_pipe" module_output_id: "output" transform_type: raw_chunker deserialize_config: - output_batch_size: 2048 # Number of chunked documents per output batch + output_batch_size: 2048 # Number of chunked documents per output batch kafka_config: max_batch_size: 256 bootstrap_servers: "kafka:19092" @@ -88,7 +88,7 @@ vdb_pipeline: chunk_size: 512 payload_column: "payload" vdb_config: - vdb_resource_name: "vdb_kafka_raw" + vdb_resource_name: "vdb_kafka_raw" tokenizer: model_kwargs: @@ -100,11 +100,11 @@ vdb_pipeline: model_name: "bert-base-uncased-hash" vdb: - batch_size: 16384 # Vector DB max batch size + batch_size: 16384 # Vector DB max batch size resource_name: "vdb_kafka_raw" # Identifier for the resource in the vector database embedding_size: 384 - write_time_interval: 20 # Max time between successive uploads - recreate: False # Whether to recreate the resource if it already exists + write_time_interval: 20 # Max time between successive uploads + recreate: false # Whether to recreate the resource if it already exists service: "milvus" # Specify the type of vector database uri: "http://milvus:19530" # URI for connecting to the Vector Database server resource_schemas: @@ -153,45 +153,5 @@ vdb_pipeline: - name: embedding dtype: FLOAT_VECTOR description: Embedding vectors representing the data entry - dim: 384 # Size of the embeddings to store in the vector database - description: Collection schema for diverse data sources - - vdb_kafka_scrape: - index_conf: - field_name: embedding - metric_type: L2 - index_type: HNSW - params: - M: 8 - efConstruction: 64 - - schema_conf: - enable_dynamic_field: true - schema_fields: - - name: id - dtype: INT64 - description: Primary key for the collection - is_primary: true - auto_id: true - - name: title - dtype: VARCHAR - description: Title or heading of the data entry - max_length: 65_535 - - name: source - dtype: VARCHAR - description: Source or origin of the data entry - max_length: 65_535 - - name: summary - dtype: VARCHAR - description: Brief summary or abstract of the data content - max_length: 65_535 - - name: content - dtype: VARCHAR - description: Main content or body of the data entry - max_length: 65_535 - - name: embedding - dtype: FLOAT_VECTOR - description: Embedding vectors representing the data entry - dim: 384 # Size of the embeddings to store in the vector database + dim: 384 # Size of the embeddings to store in the vector database description: Collection schema for diverse data sources - \ No newline at end of file diff --git a/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_config.yaml b/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_config.yaml index 340cbe3c..f0b8d8c3 100644 --- a/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_config.yaml +++ b/community/streaming_ingest_rag/morpheus_examples/streaming_ingest_rag/vdb_upload/vdb_config.yaml @@ -32,11 +32,11 @@ vdb_pipeline: - type: rss name: "rss_cve" config: - batch_size: 128 # Number of rss feeds per batch + batch_size: 128 # Number of rss feeds per batch cache_dir: "./.cache/http" cooldown_interval_sec: 600 - enable_cache: False - enable_monitor: True + enable_cache: false + enable_monitor: true feed_input: - "https://www.theregister.com/security/headlines.atom" - "https://isc.sans.edu/dailypodcast.xml" @@ -71,7 +71,7 @@ vdb_pipeline: - "https://blog.google/threat-analysis-group/rss/" - "https://intezer.com/feed/" interval_sec: 600 - output_batch_size: 2048 # Number of chunked documents per output batch + output_batch_size: 2048 # Number of chunked documents per output batch request_timeout_sec: 2.0 run_indefinitely: true stop_after_sec: 0 @@ -89,9 +89,9 @@ vdb_pipeline: chunk_size: 512 num_threads: 10 chunk_overlap: 51 - enable_monitor: True + enable_monitor: true filenames: - - "./morpheus/data/randomly_generated_cybersecurity_text.txt" # will need to supply + - "./morpheus/data/randomly_generated_cybersecurity_text.txt" # will need to supply vdb_resource_name: "vdb_pdf" watch: false @@ -105,10 +105,10 @@ vdb_pipeline: csv: chunk_overlap: 51 chunk_size: 1024 - text_column_names: # For CSV files, the data from each text_column_name will be concatenated together. + text_column_names: # For CSV files, the data from each text_column_name will be concatenated together. - "raw" # Requires same schema for all CSV files. - "request_header_referer" - enable_monitor: True + enable_monitor: true filenames: - "./models/datasets/training-data/log-parsing-training-data.csv" vdb_resource_name: "vdb_csv" @@ -118,7 +118,7 @@ vdb_pipeline: name: "custom_source_text" config: batch_size: 1024 - enable_monitor: True + enable_monitor: true extractor_config: chunk_size: 512 chunk_overlap: 51 @@ -144,7 +144,7 @@ vdb_pipeline: batch_size: 5120 resource_name: "VDBGENERAL" # Identifier for the resource in the vector database embedding_size: 384 - recreate: True # Whether to recreate the resource if it already exists + recreate: true # Whether to recreate the resource if it already exists service: "milvus" # Specify the type of vector database uri: "http://localhost:19530" # URI for connecting to the Vector Database server resource_schemas: @@ -184,7 +184,7 @@ vdb_pipeline: - name: embedding dtype: FLOAT_VECTOR description: Embedding vectors representing the data entry - dim: 384 # Size of the embeddings to store in the vector database + dim: 384 # Size of the embeddings to store in the vector database description: Collection schema for diverse data sources vdb_pdf: index_conf: @@ -222,7 +222,7 @@ vdb_pipeline: - name: embedding dtype: FLOAT_VECTOR description: Embedding vectors representing the data entry - dim: 384 # Size of the embeddings to store in the vector database + dim: 384 # Size of the embeddings to store in the vector database description: Collection schema for diverse data sources vdb_csv: index_conf: @@ -260,7 +260,7 @@ vdb_pipeline: - name: embedding dtype: FLOAT_VECTOR description: Embedding vectors representing the data entry - dim: 384 # Size of the embeddings to store in the vector database + dim: 384 # Size of the embeddings to store in the vector database description: Collection schema for diverse data sources vdb_rss: index_conf: @@ -298,5 +298,5 @@ vdb_pipeline: - name: embedding dtype: FLOAT_VECTOR description: Embedding vectors representing the data entry - dim: 384 # Size of the embeddings to store in the vector database - description: Collection schema for diverse data sources \ No newline at end of file + dim: 384 # Size of the embeddings to store in the vector database + description: Collection schema for diverse data sources diff --git a/industries/healthcare/agentic-healthcare-front-desk/docker-compose.yaml b/industries/healthcare/agentic-healthcare-front-desk/docker-compose.yaml index 8bc03afc..299bf046 100644 --- a/industries/healthcare/agentic-healthcare-front-desk/docker-compose.yaml +++ b/industries/healthcare/agentic-healthcare-front-desk/docker-compose.yaml @@ -3,54 +3,52 @@ services: container_name: chain-server-healthcare-assistant image: chain-server-healthcare-assistant:${TAG:-latest} env_file: - - path: ./vars.env - required: true + - path: ./vars.env + required: true build: context: ./ dockerfile: Dockerfile entrypoint: python3 chain_server/chain_server.py --assistant intake --port 8081 ports: - - "8081:8081" + - "8081:8081" expose: - - "8081" + - "8081" volumes: - ./graph_definitions/graph_images:/graph_images shm_size: 5gb - patient-intake-ui: container_name: patient-intake-ui image: patient-intake-ui:${TAG:-latest} env_file: - - path: ./vars.env - required: true + - path: ./vars.env + required: true build: context: ./ dockerfile: Dockerfile entrypoint: python3 graph_definitions/graph_patient_intake_only.py --port 7860 ports: - - "7860:7860" + - "7860:7860" expose: - - "7860" + - "7860" volumes: - ./graph_definitions/graph_images:/graph_images shm_size: 5gb - appointment-making-ui: container_name: appointment-making-ui image: appointment-making-ui:${TAG:-latest} env_file: - - path: ./vars.env - required: true + - path: ./vars.env + required: true build: context: ./ dockerfile: Dockerfile entrypoint: python3 graph_definitions/graph_appointment_making_only.py --port 7860 ports: - - "7860:7860" + - "7860:7860" expose: - - "7860" + - "7860" volumes: - ./graph_definitions/graph_images:/graph_images shm_size: 5gb @@ -59,16 +57,16 @@ services: container_name: medication-lookup-ui image: medication-lookup-ui:${TAG:-latest} env_file: - - path: ./vars.env - required: true + - path: ./vars.env + required: true build: context: ./ dockerfile: Dockerfile entrypoint: python3 graph_definitions/graph_medication_lookup_only.py --port 7860 ports: - - "7860:7860" + - "7860:7860" expose: - - "7860" + - "7860" volumes: - ./graph_definitions/graph_images:/graph_images shm_size: 5gb @@ -77,16 +75,16 @@ services: container_name: full-agent-ui image: full-agent-ui:${TAG:-latest} env_file: - - path: ./vars.env - required: true + - path: ./vars.env + required: true build: context: ./ dockerfile: Dockerfile entrypoint: python3 graph_definitions/graph.py --port 7860 ports: - - "7860:7860" + - "7860:7860" expose: - - "7860" + - "7860" volumes: - ./graph_definitions/graph_images:/graph_images - shm_size: 5gb \ No newline at end of file + shm_size: 5gb diff --git a/industries/healthcare/medical-device-training-assistant/docker-compose-nim-ms.yaml b/industries/healthcare/medical-device-training-assistant/docker-compose-nim-ms.yaml index 4387840e..5964261d 100644 --- a/industries/healthcare/medical-device-training-assistant/docker-compose-nim-ms.yaml +++ b/industries/healthcare/medical-device-training-assistant/docker-compose-nim-ms.yaml @@ -3,12 +3,12 @@ services: container_name: nemollm-inference-microservice image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0 volumes: - - ${MODEL_DIRECTORY}:/opt/nim/.cache + - ${MODEL_DIRECTORY}:/opt/nim/.cache user: "${USERID}" ports: - - "8000:8000" + - "8000:8000" expose: - - "8000" + - "8000" environment: NGC_API_KEY: ${NGC_API_KEY} shm_size: 20gb @@ -31,11 +31,11 @@ services: container_name: nemo-retriever-embedding-microservice image: nvcr.io/nim/nvidia/nv-embedqa-e5-v5:1.0.0 volumes: - - ${MODEL_DIRECTORY}:/opt/nim/.cache + - ${MODEL_DIRECTORY}:/opt/nim/.cache ports: - - "9080:8000" + - "9080:8000" expose: - - "8000" + - "8000" environment: NGC_API_KEY: ${NGC_API_KEY} user: "${USERID}" @@ -59,11 +59,11 @@ services: container_name: nemo-retriever-ranking-microservice image: nvcr.io/nim/nvidia/nv-rerankqa-mistral-4b-v3:1.0.0 volumes: - - ${MODEL_DIRECTORY}:/opt/nim/.cache + - ${MODEL_DIRECTORY}:/opt/nim/.cache ports: - - "1976:8000" + - "1976:8000" expose: - - "8000" + - "8000" environment: NGC_API_KEY: ${NGC_API_KEY} user: "${USERID}" diff --git a/industries/healthcare/medical-device-training-assistant/docker-compose-vectordb.yaml b/industries/healthcare/medical-device-training-assistant/docker-compose-vectordb.yaml index 7afb929e..4abf756a 100644 --- a/industries/healthcare/medical-device-training-assistant/docker-compose-vectordb.yaml +++ b/industries/healthcare/medical-device-training-assistant/docker-compose-vectordb.yaml @@ -3,18 +3,17 @@ services: container_name: pgvector image: pgvector/pgvector:pg16 ports: - - 5432:5432 + - 5432:5432 expose: - - "5432" + - "5432" volumes: - - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/data:/var/lib/postgresql/data + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/data:/var/lib/postgresql/data environment: - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} - - POSTGRES_USER=${POSTGRES_USER:-postgres} - - POSTGRES_DB=${POSTGRES_DB:-api} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} + - POSTGRES_USER=${POSTGRES_USER:-postgres} + - POSTGRES_DB=${POSTGRES_DB:-api} profiles: ["pgvector"] - etcd: container_name: milvus-etcd image: quay.io/coreos/etcd:v3.5.5 @@ -73,7 +72,7 @@ services: - "9091:9091" depends_on: - "etcd" - - "minio" + - "minio" profiles: ["nemo-retriever", "milvus", ""] elasticsearch: diff --git a/industries/healthcare/medical-device-training-assistant/docker-compose.yaml b/industries/healthcare/medical-device-training-assistant/docker-compose.yaml index 993b6c5e..b93955c9 100644 --- a/industries/healthcare/medical-device-training-assistant/docker-compose.yaml +++ b/industries/healthcare/medical-device-training-assistant/docker-compose.yaml @@ -1,7 +1,7 @@ include: - path: - - docker-compose-vectordb.yaml - - docker-compose-nim-ms.yaml + - docker-compose-vectordb.yaml + - docker-compose-nim-ms.yaml services: chain-server: @@ -28,7 +28,7 @@ services: APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l APP_TEXTSPLITTER_CHUNKSIZE: 506 APP_TEXTSPLITTER_CHUNKOVERLAP: 200 - APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"} # Leave it blank to avoid using ranking + APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"} # Leave it blank to avoid using ranking APP_RANKING_MODELENGINE: ${APP_RANKING_MODELENGINE:-nvidia-ai-endpoints} APP_RANKING_SERVERURL: ${APP_RANKING_SERVERURL:-""} NVIDIA_API_KEY: ${NVIDIA_API_KEY} @@ -42,9 +42,9 @@ services: OTEL_EXPORTER_OTLP_PROTOCOL: grpc LOGLEVEL: ${LOGLEVEL:-INFO} ports: - - "8081:8081" + - "8081:8081" expose: - - "8081" + - "8081" shm_size: 5gb depends_on: nemollm-embedding: @@ -57,7 +57,6 @@ services: condition: service_healthy required: false - rag-playground: container_name: rag-playground image: rag-playground:${TAG:-latest} @@ -75,8 +74,8 @@ services: OTEL_EXPORTER_OTLP_PROTOCOL: grpc ENABLE_TRACING: false # if locally hosting Riva: - #RIVA_API_URI: :50051 - #TTS_SAMPLE_RATE: 48000 + # RIVA_API_URI: :50051 + # TTS_SAMPLE_RATE: 48000 # if using Riva API Endpoint on NVIDIA API Catalog RIVA_API_URI: grpc.nvcf.nvidia.com:443 NVIDIA_API_KEY: ${NVIDIA_API_KEY} @@ -85,11 +84,11 @@ services: TTS_SAMPLE_RATE: 48000 ports: - - "8090:8090" + - "8090:8090" expose: - - "8090" + - "8090" depends_on: - - chain-server + - chain-server networks: default: diff --git a/industries/healthcare/medical-device-training-assistant/evaluation/docker-compose.yaml b/industries/healthcare/medical-device-training-assistant/evaluation/docker-compose.yaml index 7d28acd7..dc861a8a 100644 --- a/industries/healthcare/medical-device-training-assistant/evaluation/docker-compose.yaml +++ b/industries/healthcare/medical-device-training-assistant/evaluation/docker-compose.yaml @@ -9,15 +9,14 @@ services: - ${DATA_DIR}:/data - ${OUTPUT_DIR}:/output command: > - python3 /opt/evaluation/query_rag_with_gt_questions_and_evaluate.py - --gt_qa_pairs_doc /data/${GT_QA_DOC:-ELSA_IFU_RAG_Example_QandA.txt} + python3 /opt/evaluation/query_rag_with_gt_questions_and_evaluate.py + --gt_qa_pairs_doc /data/${GT_QA_DOC:-ELSA_IFU_RAG_Example_QandA.txt} --output_dir /output --eval_result_name eval_result environment: NVIDIA_API_KEY: ${NVIDIA_API_KEY} shm_size: 5gb - networks: default: name: nvidia-rag diff --git a/industries/healthcare/medical-device-training-assistant/prompt.yaml b/industries/healthcare/medical-device-training-assistant/prompt.yaml index 6234a38e..9c052fe9 100644 --- a/industries/healthcare/medical-device-training-assistant/prompt.yaml +++ b/industries/healthcare/medical-device-training-assistant/prompt.yaml @@ -1,22 +1,22 @@ chat_template: | - You are a helpful, respectful and honest assistant. - Always answer as helpfully as possible, while being safe. - Please ensure that your responses are positive in nature. + You are a helpful, respectful and honest assistant. + Always answer as helpfully as possible, while being safe. + Please ensure that your responses are positive in nature. rag_template: | - [INST] <> - Use the following context to answer the user's question. If you don't know the answer, - just say that you don't know, don't try to make up an answer. - <> - [INST] Context: {context_str} Question: {query_str} Only return the helpful - answer below and nothing else. Helpful answer:[/INST] + [INST] <> + Use the following context to answer the user's question. If you don't know the answer, + just say that you don't know, don't try to make up an answer. + <> + [INST] Context: {context_str} Question: {query_str} Only return the helpful + answer below and nothing else. Helpful answer:[/INST] -app_chain_template: | - You are a document chatbot. Help the user as they ask questions about documents. - User message just asked: {input}\n\n - For this, we have retrieved the following potentially-useful info: - Conversation History Retrieved: - {history}\n\n - Document Retrieved: - {context}\n\n - Answer only from retrieved data. Make your response conversational. +app_chain_template: |- + You are a document chatbot. Help the user as they ask questions about documents. + User message just asked: {input}\n\n + For this, we have retrieved the following potentially-useful info: + Conversation History Retrieved: + {history}\n\n + Document Retrieved: + {context}\n\n + Answer only from retrieved data. Make your response conversational. diff --git a/nemo/retriever-synthetic-data-generation/scripts/conf/config-fiqa.yaml b/nemo/retriever-synthetic-data-generation/scripts/conf/config-fiqa.yaml index 99c88eb2..507163e4 100644 --- a/nemo/retriever-synthetic-data-generation/scripts/conf/config-fiqa.yaml +++ b/nemo/retriever-synthetic-data-generation/scripts/conf/config-fiqa.yaml @@ -6,8 +6,7 @@ max_examples: 220 use_original: false # Set true if input file contains original questions and would like to evaluate using the original data pre_processors: - - - _target_: nemo_retriever_sdg.DummyPreprocessor + - _target_: nemo_retriever_sdg.DummyPreprocessor qa_generator: _target_: nemo_retriever_sdg.SimpleQAGenerator @@ -37,19 +36,19 @@ qa_generator: - Generate questions that are relevant to the idea expressed in the input document, and the input document contains the complete answer to your question. - Generate questions that provide specific context that can lead to the specific answer contained in the input document. - Generate questions that are varied and different from each other. You can change up the phrasing, vocabulary, complexity, and the type of questions you ask throughout the task. - - DO NOT copy and paste exact phrasing from the test. Formulate questions in your own words. + - DO NOT copy and paste exact phrasing from the test. Formulate questions in your own words. - Generate answers to the questions as well. - - Provide an explanation as to why the generated question is good. Use the following example questions and answers for reference. - - Generated Questions should start with Question: + - Provide an explanation as to why the generated question is good. Use the following example questions and answers for reference. + - Generated Questions should start with Question: - Generated Answers should start with Answer: - Generated Explanations should start with Explanation: - + Examples: Input document: Just have the associate sign the back and then deposit it. It's called a third party cheque and is perfectly legal. I wouldn't be surprised if it has a longer hold period and, as always, you don't get the money if the cheque doesn't clear. Now, you may have problems if it's a large amount or you're not very well known at the bank. In that case you can have the associate go to the bank and endorse it in front of the teller with some ID. You don't even technically have to be there. Anybody can deposit money to your account if they have the account number. He could also just deposit it in his account and write a cheque to the business. Have the check reissued to the proper payee. - + Question: How to deposit a cheque issued to an associate in my business into my business account? @@ -70,12 +69,12 @@ qa_generator: Question: Do I need a new EIN since I am hiring employees for my LLC? - + user_prompt_template: | Generate {num_questions} questions and corresponding answers based on Input Document. Input Document: - {document} + {document} easiness_filter: _target_: nemo_retriever_sdg.EasinessFilter @@ -97,7 +96,7 @@ answerability_filter: Criterion 2 - Is it clear what type of answer or information the question seeks? The question should convey its purpose without ambiguity, allowing for a direct and relevant response. Criterion 3 - Does the content in the context contain information that can answer the question or part of the question? Criterion 4 - Does the content in the context completely answer the question? - + Provide your response in a mandatory dictionary format, and a short explanation of the rating like { \"criterion_1_explanation\": "", @@ -109,28 +108,21 @@ answerability_filter: \"criterion_4_explanation\": "", \"criterion_4\": "" } - Provide only the dictionary response and nothing else. - + Provide only the dictionary response and nothing else. + user_prompt_template: | Context Passage: {context} Question: {question} - + filters: - - - ${answerability_filter} - - - ${easiness_filter} - + - ${answerability_filter} + - ${easiness_filter} post_processors: - - - _target_: nemo_retriever_sdg.DivergenceCalculator + - _target_: nemo_retriever_sdg.DivergenceCalculator analyzers: - - - _target_: nemo_retriever_sdg.QuestionLengthAnalyzer - - - _target_: nemo_retriever_sdg.LexicalDivergenceAnalyzer - + - _target_: nemo_retriever_sdg.QuestionLengthAnalyzer + - _target_: nemo_retriever_sdg.LexicalDivergenceAnalyzer diff --git a/nemo/retriever-synthetic-data-generation/scripts/conf/config-nq.yaml b/nemo/retriever-synthetic-data-generation/scripts/conf/config-nq.yaml index 0cac031b..50398424 100644 --- a/nemo/retriever-synthetic-data-generation/scripts/conf/config-nq.yaml +++ b/nemo/retriever-synthetic-data-generation/scripts/conf/config-nq.yaml @@ -6,8 +6,7 @@ max_examples: 1500 use_original: false # Set true if input file contains original questions and would like to evaluate using the original data pre_processors: - - - _target_: nemo_retriever_sdg.DummyPreprocessor + - _target_: nemo_retriever_sdg.DummyPreprocessor qa_generator: _target_: nemo_retriever_sdg.SimpleQAGenerator @@ -28,13 +27,13 @@ qa_generator: num_questions: 3 squad_format: true system_prompt: | - Generate questions that are relevant to the input document provided. + Generate questions that are relevant to the input document provided. Follow these General Instructions: - Questions must be completely answered by the input document. - Questions must be relevant to the input document. - Do not generate questions which requires looking at the input document to comprehend the question - Generate questions and answers to the generated questions. - - Generated Questions should start with Question: + - Generated Questions should start with Question: - Generated Answers should start with Answer: Follow this chain of thought when formulating questions: Step 1: Identify key phrases and entities in the input document @@ -42,12 +41,12 @@ qa_generator: Compress any compounded questions to shorter questions to sound realistic. Questions can also be in the form of short phrases. Use the following examples as guidelines. - + Examples: Input document: In November 2013, Senate Democrats led by Harry Reid used the nuclear option to eliminate the 60 - vote rule on executive branch nominations and federal judicial appointments, but not for the Supreme Court. In April 2017, Senate Republicans led by Mitch McConnell extended the nuclear option to Supreme Court and the nomination of Neil Gorsuch ending the debate. - + Question: who changed the senate rules for supreme court nominees? @@ -74,12 +73,12 @@ qa_generator: Question: what is the symbol for hugs and kisses? - + user_prompt_template: | Generate {num_questions} questions and corresponding answers based on Input Document. Input Document: - {document} + {document} easiness_filter: _target_: nemo_retriever_sdg.EasinessFilter @@ -101,7 +100,7 @@ answerability_filter: Criterion 2 - Is it clear what type of answer or information the question seeks? The question should convey its purpose without ambiguity, allowing for a direct and relevant response. Criterion 3 - Does the content in the context contain information that can answer the question or part of the question? Criterion 4 - Does the content in the context completely answer the question? - + Provide your response in a mandatory dictionary format, and a short explanation of the rating like { \"criterion_1_explanation\": "", @@ -113,28 +112,21 @@ answerability_filter: \"criterion_4_explanation\": "", \"criterion_4\": "" } - Provide only the dictionary response and nothing else. - + Provide only the dictionary response and nothing else. + user_prompt_template: | Context Passage: {context} Question: {question} - -filters: - - - ${answerability_filter} - - - ${easiness_filter} +filters: + - ${answerability_filter} + - ${easiness_filter} post_processors: - - - _target_: nemo_retriever_sdg.DivergenceCalculator + - _target_: nemo_retriever_sdg.DivergenceCalculator analyzers: - - - _target_: nemo_retriever_sdg.QuestionLengthAnalyzer - - - _target_: nemo_retriever_sdg.LexicalDivergenceAnalyzer - + - _target_: nemo_retriever_sdg.QuestionLengthAnalyzer + - _target_: nemo_retriever_sdg.LexicalDivergenceAnalyzer diff --git a/nemo/retriever-synthetic-data-generation/scripts/conf/config.yaml b/nemo/retriever-synthetic-data-generation/scripts/conf/config.yaml index 09128d5e..47b5c00f 100644 --- a/nemo/retriever-synthetic-data-generation/scripts/conf/config.yaml +++ b/nemo/retriever-synthetic-data-generation/scripts/conf/config.yaml @@ -2,12 +2,11 @@ input_file: ${input_file} input_format: ${input_format} # squad or rawdoc output_dir: ${output_dir} api_key: ${api_key} -max_examples: 20 # Remove this line to use the entire dataset +max_examples: 20 # Remove this line to use the entire dataset use_original: false # Set true if input file contains original questions and would like to evaluate using the original data pre_processors: - - - _target_: nemo_retriever_sdg.DummyPreprocessor + - _target_: nemo_retriever_sdg.DummyPreprocessor qa_generator: _target_: nemo_retriever_sdg.SimpleQAGenerator @@ -34,19 +33,18 @@ qa_generator: - Generate questions that are relevant to the idea expressed in the input document, and the input document contains the complete answer to your question. - Generate questions that provide specific context that can lead to the specific answer contained in the input document. - Generate questions that are varied and different from each other. You can change up the phrasing, vocabulary, complexity, and the type of questions you ask throughout the task. - - DO NOT copy and paste exact phrasing from the test. Formulate questions in your own words. + - DO NOT copy and paste exact phrasing from the test. Formulate questions in your own words. - Generate answers to the questions as well. - - Provide an explanation as to why the generated question is good. Use the following example questions and answers for reference. - - Generated Questions should start with Question: + - Provide an explanation as to why the generated question is good. Use the following example questions and answers for reference. + - Generated Questions should start with Question: - Generated Answers should start with Answer: - Explanations should start with Explanation: - + Examples: Input document: We witnessed a flurry of defaults in 2015-2016 dominated by aggressive 2012-2014 vintage energy sector issuance. High cost producers with inadequate liquidity found bankruptcy to be their only option in the face of $30 oil. 2016 was the fifth highest default volume year on record with 80% of defaults occurring in commodity credits. The default rate for energy issuers was approximately 20%. We believe that most of the aggressive credits in these sectors have now restructured (the average energy bond trades at $98 today, up from $56 in February, 2016). - Question: Which year has the highest default volume of all times? Explanation: @@ -81,12 +79,12 @@ qa_generator: What is the common statistic mentioned in the document? Explaination: This is a bad question! Its too generic and vague. It assumes that document is being looked at when the question is being asked. - + user_prompt_template: | Generate {num_questions} questions and corresponding answers based on Input Document. Input Document: - {document} + {document} easiness_filter: _target_: nemo_retriever_sdg.EasinessFilter @@ -104,8 +102,8 @@ easiness_filter: # filter_cfg: # filter_threshold: 0.75 # embedding_model: "intfloat/e5-large-unsupervised" -# batch_size: 8 - +# batch_size: 8 + answerability_filter: _target_: nemo_retriever_sdg.AnswerabilityFilter filter_cfg: @@ -119,7 +117,7 @@ answerability_filter: Criterion 2 - Is it clear what type of answer or information the question seeks? The question should convey its purpose without ambiguity, allowing for a direct and relevant response. Criterion 3 - Does the content in the context contain information that can answer the question or part of the question? Criterion 4 - Does the content in the context completely answer the question? - + Provide your response in a mandatory dictionary format, and a short explanation of the rating like { \"criterion_1_explanation\": "", @@ -131,36 +129,30 @@ answerability_filter: \"criterion_4_explanation\": "", \"criterion_4\": "" } - Provide only the dictionary response and nothing else. - + Provide only the dictionary response and nothing else. + user_prompt_template: | Context Passage: {context} Question: {question} - + filters: - - - ${easiness_filter} - - - ${answerability_filter} + - ${easiness_filter} + - ${answerability_filter} post_processors: - - - _target_: nemo_retriever_sdg.DivergenceCalculator + - _target_: nemo_retriever_sdg.DivergenceCalculator analyzers: - - - _target_: nemo_retriever_sdg.QuestionLengthAnalyzer - - - _target_: nemo_retriever_sdg.LexicalDivergenceAnalyzer + - _target_: nemo_retriever_sdg.QuestionLengthAnalyzer + - _target_: nemo_retriever_sdg.LexicalDivergenceAnalyzer evaluators: - - - _target_: nemo_retriever_sdg.BEIREvaluator + - _target_: nemo_retriever_sdg.BEIREvaluator model_names: - "sentence-transformers/gtr-t5-large" - "BAAI/bge-large-en-v1.5" - "intfloat/e5-large-unsupervised" - score_function: "cos_sim" # "dot" or "cos_sim" - batch_size: 16 \ No newline at end of file + score_function: "cos_sim" # "dot" or "cos_sim" + batch_size: 16