diff --git a/.dockerignore b/.dockerignore index b4d50964..91540e39 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,7 +5,7 @@ .gitmodules # Ignore temperory volumes -deploy/compose/volumes +RAG/examples/**/volumes # creating a docker image .dockerignore diff --git a/.gitattributes b/.gitattributes index c8a8d73b..82f8bfb0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -notebooks/dataset.zip filter=lfs diff=lfs merge=lfs -text +notebooks/dataset.zip filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/.github/workflows/docs-build.yaml b/.github/workflows/docs-build.yaml deleted file mode 100644 index c0abe122..00000000 --- a/.github/workflows/docs-build.yaml +++ /dev/null @@ -1,153 +0,0 @@ -name: docs-build - -on: - pull_request: - branches: [ main, release-* ] - types: [ opened, synchronize ] - - push: - branches: [ main ] - tags: - - v* - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -defaults: - run: - shell: bash - -jobs: - build-docs: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Build image - run: | - docker build --pull --tag docs-builder:latest --file docs/Dockerfile . - - name: Build docs - run: | - docker run -v $(pwd):/work -w /work docs-builder:latest sphinx-build -b html -d /tmp docs docs/_build/output - - name: Delete unnecessary files - run: | - sudo rm -rf docs/_build/jupyter_execute - sudo rm -rf docs/_build/.buildinfo - - name: Upload HTML - uses: actions/upload-artifact@v4 - with: - name: html-build-artifact - path: docs/_build/ - if-no-files-found: error - retention-days: 1 - - name: Store PR information - if: ${{ github.event_name == 'pull_request' }} - run: | - mkdir ./pr - echo ${{ github.event.number }} > ./pr/pr.txt - echo ${{ github.event.pull_request.merged }} > ./pr/merged.txt - echo ${{ github.event.action }} > ./pr/action.txt - - name: Upload PR information - if: ${{ github.event_name == 'pull_request' }} - uses: actions/upload-artifact@v4 - with: - name: pr - path: pr/ - - store-html: - needs: [ build-docs ] - if: ${{ github.event_name == 'push' }} - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - ref: "gh-pages" - - name: Initialize Git configuration - run: | - git config user.name docs-build - git config user.email do-not-send@github.com - - name: Download artifacts - uses: actions/download-artifact@v4 - with: - name: html-build-artifact - - name: Copy HTML directories - run: | - ls -asl - - name: Store bleeding edge docs from main - if: ${{ github.ref == 'refs/heads/main' }} - run: | - mkdir main || true - rsync -av --progress --delete output/ main/ - git add main - - name: Store docs for a release tag - if: ${{ startsWith(github.ref, 'refs/tags/v') }} - env: - LATEST: ${{ contains(github.event.head_commit.message, '/not-latest') && 'not-true' || 'true' }} - run: | - printenv LATEST - if [[ "${GITHUB_REF}" =~ "-rc" ]]; then - echo "Not saving documents for release candidates." - exit 0 - fi - if [[ "${GITHUB_REF}" =~ v([0-9]+\.[0-9]+\.[0-9]+) ]]; then - TAG="${BASH_REMATCH[1]}" - mkdir "${TAG}" || true - rsync -av --progress --delete output/ "${TAG}/" - git add "${TAG}/" - if [[ "${LATEST}" == 'true' ]]; then - mkdir latest || true - rsync -av --progress --delete output/ latest/ - cp output/versions.json . - git add latest - git add versions.json - fi - fi - - name: Check or create dot-no-jekyll file - run: | - if [ -f ".nojekyll" ]; then - echo "The dot-no-jekyll file already exists." - exit 0 - fi - touch .nojekyll - git add .nojekyll - - name: Check or create redirect page - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - resp=$(grep 'http-equiv="refresh"' index.html 2>/dev/null) || true - if [ -n "${resp}" ]; then - echo "The redirect file already exists." - exit 0 - fi - # If any of these commands fail, fail the build. - html_url=$(gh api "repos/${GITHUB_REPOSITORY}/pages" --jq ".html_url") - # Beware ugly quotation mark avoidance in the foll lines. - echo '' > index.html - echo '' >> index.html - echo ' ' >> index.html - echo ' Redirect to documentation' >> index.html - echo ' ' >> index.html - echo ' ' >> index.html - echo ' ' >> index.html - echo ' ' >> index.html - echo ' ' >> index.html - echo ' ' >> index.html - echo '

Please follow the link to the ' >> index.html - echo 'latest documentation.

' >> index.html - echo ' ' >> index.html - echo '' >> index.html - git add index.html - - name: Commit changes to the GitHub Pages branch - run: | - git status - if git commit -m 'Pushing changes to GitHub Pages.'; then - git push -f - else - echo "Nothing changed." - fi diff --git a/.github/workflows/docs-preview-pr.yaml b/.github/workflows/docs-preview-pr.yaml deleted file mode 100644 index 362db16e..00000000 --- a/.github/workflows/docs-preview-pr.yaml +++ /dev/null @@ -1,117 +0,0 @@ -name: docs-preview-pr - -on: - workflow_run: - workflows: [docs-build] - types: [completed] - -env: - WF_ID: ${{ github.event.workflow_run.id }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - -jobs: - # Always determine if GitHub Pages are configured for this repo. - get-gh-pages-url: - if: - github.event.workflow_run.event == 'pull_request' && - github.event.workflow_run.conclusion == 'success' - runs-on: ubuntu-latest - outputs: - url: ${{ steps.api-resp.outputs.html_url || '' }} - branch: ${{ steps.api-resp.outputs.branch || '' }} - steps: - - name: Check for GitHub Pages - id: api-resp - run: | - has_pages=$(gh api "repos/${GITHUB_REPOSITORY}" -q '.has_pages') - if [ "true" != "${has_pages}" ]; then - echo "GitHub pages is not active for the repository. Quitting." - return - fi - - url=$(gh api "repos/${GITHUB_REPOSITORY}/pages" -q '.html_url') - branch=$(gh api "repos/${GITHUB_REPOSITORY}/pages" -q '.source.branch') - - echo "html_url=${url}" >> $GITHUB_OUTPUT - echo "branch=${branch}" >> $GITHUB_OUTPUT - - # Identify the dir for the HTML. - store-html: - runs-on: ubuntu-latest - needs: [get-gh-pages-url] - if: needs.get-gh-pages-url.outputs.url != '' - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ needs.get-gh-pages-url.outputs.branch }} - - name: Initialize Git configuration - run: | - git config user.name docs-preview - git config user.email do-not-send-@github.com - - name: Download artifacts - run: | - gh run view "${WF_ID}" - gh run download "${WF_ID}" - PR=$(cat ./pr/pr.txt) - MERGED=$(cat ./pr/merged.txt) - ACTION=$(cat ./pr/action.txt) - echo "PR_NO=${PR}" >> $GITHUB_ENV - echo "MERGE_STATUS=${MERGED}" >> $GITHUB_ENV - echo "PR_ACTION=${ACTION}" >> $GITHUB_ENV - echo "REVIEW_DIR=review/" >> $GITHUB_ENV - echo "PR_REVIEW_DIR=review/pr-${PR}" >> $GITHUB_ENV - - # Remove the pr artifact directory so that it does not - # appear in listings or confuse git with untracked files. - rm -rf ./pr - - # Permutations: - # - PR was updated, PR_ACTION is !closed, need to delete review directory and update it. - # - PR was closed (regardless of merge), PR_ACTION is closed, need to delete review directory. - - # If this PR is still open, store HTML in a review directory. - - name: Handle HTML review directory for open PRs and updates to PRs - if: env.MERGE_STATUS == 'false' && env.PR_ACTION != 'closed' - run: | - rm -rf "${{ env.PR_REVIEW_DIR }}" 2>/dev/null || true - if [ ! -d "${{ env.REVIEW_DIR }}" ]; then - mkdir "${{ env.REVIEW_DIR }}" - fi - mv ./html-build-artifact/latest/ "${{ env.PR_REVIEW_DIR }}" - git add "${{ env.PR_REVIEW_DIR }}" - # If the PR was closed, merged or not, delete review directory. - - name: Delete HTML review directory for closed PRs - if: env.PR_ACTION == 'closed' - run: | - if [ -d ./html-build-artifact/ ]; then - rm -rf ./html-build-artifact/ 2>/dev/null - fi - if [ -d "${{ env.PR_REVIEW_DIR }}" ]; then - git rm -rf "${{ env.PR_REVIEW_DIR }}" - fi - - name: Commit changes to the GitHub Pages branch - run: | - git status - if git commit -m 'Pushing changes to GitHub Pages.'; then - git push -f - else - echo "Nothing changed." - fi - - name: Check for existing documentation review comment - run: | - result=$(gh pr view ${{ env.PR_NO }} --json comments -q 'any(.comments[].body; contains("Documentation preview"))') - echo "COMMENT_EXISTS=${result}" >> $GITHUB_ENV - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Add HTML review URL comment to a newly opened PR - if: env.MERGE_STATUS == 'false' && env.COMMENT_EXISTS == 'false' - env: - URL: ${{ needs.get-gh-pages-url.outputs.url }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - shell: bash - run: | - echo -e "## Documentation preview" > body - echo -e "" >> body - echo -e "<${{ env.URL }}${{ env.PR_REVIEW_DIR }}>" >> body - cat body - gh pr comment ${{ env.PR_NO }} --body-file body diff --git a/.github/workflows/docs-remove-stale-reviews.yaml b/.github/workflows/docs-remove-stale-reviews.yaml deleted file mode 100644 index 8b758c37..00000000 --- a/.github/workflows/docs-remove-stale-reviews.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: docs-remove-stale-reviews - -on: - schedule: - # 42 minutes after 0:00 UTC on Sundays - - cron: "42 0 * * 0" - workflow_dispatch: - -jobs: - remove: - uses: nvidia-merlin/.github/.github/workflows/docs-remove-stale-reviews-common.yaml@main diff --git a/.gitignore b/.gitignore index 7094b42f..9d14994c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,14 +3,10 @@ **__pycache__** # Helm Exclusions -**/charts/*.tgz - -# project temp files -deploy/*.log -deploy/*.txt +**/helm-charts/*.tgz # Docker Compose exclusions -volumes/ +RAG/examples/**/volumes uploaded_files/ # Visual Studio Code @@ -26,5 +22,10 @@ docs/experimental docs/tools # Developing examples -RetrievalAugmentedGeneration/examples/simple_rag_api_catalog/ -deploy/compose/simple-rag-api-catalog.yaml +RAG/examples/simple_rag_api_catalog/ +RAG/examples/simple-rag-api-catalog.yaml + +# Notebook checkpoints +RAG/notebooks/langchain/.ipynb_checkpoints +RAG/notebooks/langchain/data/nv_embedding +RAG/notebooks/langchain/data/save_embedding \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ab5a3e8b..11fada31 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,11 +13,15 @@ repos: rev: 19.10b0 hooks: - id: black + files: ^RAG/ args: ["--skip-string-normalization", "--line-length=119"] additional_dependencies: ['click==8.0.4'] - repo: https://github.com/pycqa/isort rev: 5.12.0 hooks: - id: isort + files: ^RAG/ name: isort (python) args: ["--multi-line=3", "--trailing-comma", "--force-grid-wrap=0", "--use-parenthese", "--line-width=119", "--ws"] + + diff --git a/CHANGELOG.md b/CHANGELOG.md index 6112a884..810186d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,60 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.8.0] - 2024-08-19 + +This release completely refactors the directory structure of the repository for a more seamless and intuitive developer journey. It also adds support to deploy latest accelerated embedding and reranking models across the cloud, data center, and workstation using [NVIDIA NeMo Retriever NIM microservices](https://docs.nvidia.com/nim/index.html#nemo-retriever). + +### Added +- [End-to-end RAG examples](./RAG/examples/) enhancements + - [Single-command deployment](./README.md#try-it-now) for all the examples using Docker Compose. + - All end to end RAG examples are now more encapsulated with documentation, code and deployment assets residing in dedicated example specific directory. + - Segregated examples into [basic and advanced RAG](./RAG/examples/) with dedicated READMEs. + - Added reranker model support to [multi-turn RAG example](./RAG/examples/advanced_rag/multi_turn_rag/). + - Added [dedicated prompt configuration file for every example](./docs/prompt-customization.md). + - Removed Python dev packages from containers to enhance security. + - Updated to latest version of [langchain-nvidia-ai-endpoints](https://python.langchain.com/v0.2/docs/integrations/providers/nvidia/). +- [Speech support using RAG Playground]((./docs/riva-asr-tts.md)) + - Added support to access [RIVA speech models from NVIDIA API Catalog](https://build.nvidia.com/explore/speech). + - Speech support in RAG Playground is opt-in. +- Documentation enhancements + - Added more comprehensive [how-to guides](./README.md#how-to-guides) for end to end RAG examples. + - Added [example specific architecture diagrams](./RAG/examples/basic_rag/langchain/) in each example directory. +- Added a new industry specific [top level directory](./industries/) + - Added [health care domain specific Medical Device Training Assistant RAG](./industries/healthcare/medical-device-training-assistant/). +- Added notebooks showcasing new usecases + - [Basic langchain based RAG pipeline](./RAG/notebooks/langchain/langchain_basic_RAG.ipynb) using latest NVIDIA API Catalog connectors. + - [Basic llamaindex based RAG pipeline](./RAG/notebooks/llamaindex/llamaindex_basic_RAG.ipynb) using latest NVIDIA API Catalog connectors. + - [NeMo Guardrails with basic langchain RAG](./RAG/notebooks/langchain/NeMo_Guardrails_with_LangChain_RAG/). + - [NVIDIA NIM microservices using NeMo Guardrails based RAG](./RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/). + - [Using NeMo Evaluator using Llama 3.1 8B Instruct](./RAG/notebooks/nemo/Nemo%20Evaluator%20Llama%203.1%20Workbook/). + - [Agentic RAG pipeline with Nemo Retriever and NIM for LLMs](./RAG/notebooks/langchain/agentic_rag_with_nemo_retriever_nim.ipynb). +- Added new `community` (before `experimental`) example + - Create a simple web interface to interact with different [selectable NIM endpoints](./community/llm-prompt-design-helper/). The provided interface of this project supports designing a system prompt to call the LLM. + +### Changed +- Major restructuring and reorganisation of the assets within the repository + - Top level `experimental` directory has been renamed as `community`. + - Top level `RetrievalAugmentedGeneration` directory has been renamed as just `RAG`. + - The Docker Compose files inside top level `deploy` directory has been migrated to example-specific directories under `RAG/examples`. The vector database and on-prem NIM microservices deployment files are under `RAG/examples/local_deploy`. + - Top level `models` has been renamed to `finetuning`. + - Top level `notebooks` directory has been moved to under `RAG/notebooks` and has been organised framework wise. + - Top level `tools` directory has been migrated to `RAG/tools`. + - Top level `integrations` directory has been moved into `RAG/src`. + - `RetreivalAugmentedGeneration/common` is now residing under `RAG/src/chain_server`. + - `RetreivalAugmentedGeneration/frontend` is now residing under `RAG/src/rag_playground/default`. + - `5 mins RAG No GPU` example under top level `examples` directory, is now under `community`. + +### Deprecated + - Github pages based documentation is now replaced with markdown based documentation. + - Top level `examples` directory has been removed. + - Following notebooks were removed + - [02_Option(1)_NVIDIA_AI_endpoint_simple.ipynb](https://github.com/NVIDIA/GenerativeAIExamples/blob/v0.7.0/notebooks/02_Option(1)_NVIDIA_AI_endpoint_simple.ipynb) + - [notebooks/02_Option(2)_minimalistic_RAG_with_langchain_local_HF_LLM.ipynb](https://github.com/NVIDIA/GenerativeAIExamples/blob/v0.7.0/notebooks/02_Option(2)_minimalistic_RAG_with_langchain_local_HF_LLM.ipynb) + - [notebooks/03_Option(1)_llama_index_with_NVIDIA_AI_endpoint.ipynb](https://github.com/NVIDIA/GenerativeAIExamples/blob/v0.7.0/notebooks/03_Option(1)_llama_index_with_NVIDIA_AI_endpoint.ipynb) + - [notebooks/03_Option(2)_llama_index_with_HF_local_LLM.ipynb](https://github.com/NVIDIA/GenerativeAIExamples/blob/v0.7.0/notebooks/03_Option(2)_llama_index_with_HF_local_LLM.ipynb) + + ## [0.7.0] - 2024-06-18 This release switches all examples to use cloud hosted GPU accelerated LLM and embedding models from [Nvidia API Catalog](https://build.nvidia.com) as default. It also deprecates support to deploy on-prem models using NeMo Inference Framework Container and adds support to deploy accelerated generative AI models across the cloud, data center, and workstation using [latest Nvidia NIM-LLM](https://docs.nvidia.com/nim/large-language-models/latest/introduction.html). @@ -17,7 +71,7 @@ This release switches all examples to use cloud hosted GPU accelerated LLM and e - Improved accuracy of image parsing by using [tesseract-ocr](https://pypi.org/project/tesseract-ocr/) - Added a [new notebook showcasing RAG usecase using accelerated NIM based on-prem deployed models](./notebooks/08_RAG_Langchain_with_Local_NIM.ipynb) - Added a [new experimental example](./experimental/rag-developer-chatbot/) showcasing how to create a developer-focused RAG chatbot using RAPIDS cuDF source code and API documentation. -- Added a [new experimental example](./experimental/event-driven-rag-cve-analysis/) demonstrating how NVIDIA Morpheus, NIMs, and RAG pipelines can be integrated to create LLM-based agent pipelines. +- Added a [new experimental example](./experimental/event-driven-rag-cve-analysis/) demonstrating how NVIDIA Morpheus, NIM microservices, and RAG pipelines can be integrated to create LLM-based agent pipelines. ### Changed - All examples now use llama3 models from [Nvidia API Catalog](https://build.nvidia.com/search?term=llama3) as default. Summary of updated examples and the model it uses is available [here](https://nvidia.github.io/GenerativeAIExamples/latest/index.html#developer-rag-examples). diff --git a/RetrievalAugmentedGeneration/common/__init__.py b/RAG/__init__.py similarity index 100% rename from RetrievalAugmentedGeneration/common/__init__.py rename to RAG/__init__.py diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/llm/__init__.py b/RAG/examples/README.md similarity index 100% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/llm/__init__.py rename to RAG/examples/README.md diff --git a/RetrievalAugmentedGeneration/examples/developer_rag/__init__.py b/RAG/examples/__init__.py similarity index 100% rename from RetrievalAugmentedGeneration/examples/developer_rag/__init__.py rename to RAG/examples/__init__.py diff --git a/RAG/examples/advanced_rag/multi_turn_rag/README.md b/RAG/examples/advanced_rag/multi_turn_rag/README.md new file mode 100644 index 00000000..59d9dbc8 --- /dev/null +++ b/RAG/examples/advanced_rag/multi_turn_rag/README.md @@ -0,0 +1,85 @@ + + +# Multi-Turn RAG Example + +## Example Features + +This example showcases multi-turn conversational AI in a RAG pipeline. +The chain server stores the conversation history and knowledge base in a vector database and retrieves them at runtime to understand contextual queries. + +The example supports ingestion of PDF and text files. +The documents are ingested in a dedicated document vector store, multi_turn_rag. +The prompt for the example is tuned to act as a document chat bot. +To maintain the conversation history, the chain server stores the previously asked query and the model's generated answer as a text entry in a different and dedicated vector store for conversation history, conv_store. +Both of these vector stores are part of a LangChain [LCEL](https://python.langchain.com/docs/expression_language/) chain as LangChain Retrievers. +When the chain is invoked with a query, the query passes through both the retrievers. +The retriever retrieves context from the document vector store and the closest-matching conversation history from conversation history vector store. The document chunks retrieved from the document vector store are then passed through a reranker model to determine the most relevant top_k context. The context is then passed onto the LLM prompt for response generation. +Afterward, the chunks are added into the LLM prompt as part of the chain. + +| Model | Embedding | Ranking (Optional) | Framework | Vector Database | File Types | +| ----------------------- | ----------------------- | -------------------------------- | --------- | --------------- | ------------ | +| meta/llama3-8b-instruct | nvidia/nv-embedqa-e5-v5 | nvidia/nv-rerankqa-mistral-4b-v3 | LangChain | Milvus | TXT, PDF, MD | + +![Diagram](../../../../docs/images/multiturn_rag_arch.png) + +## Prerequisites + +Complete the [common prerequisites](../../../../docs/common-prerequisites.md). + +## Build and Start the Containers + +1. Export your NVIDIA API key as an environment variable: + + ```text + export NVIDIA_API_KEY="nvapi-<...>" + ``` + +1. Start the containers: + + ```console + cd RAG/examples/advanced_rag/multi_turn_rag/ + docker compose up -d --build + ``` + + *Example Output* + + ```output + ✔ Network nvidia-rag Created + ✔ Container milvus-etcd Running + ✔ Container milvus-minio Running + ✔ Container milvus-standalone Running + ✔ Container chain-server Started + ✔ Container rag-playground Started + ``` + +1. Confirm the containers are running: + + ```console + docker ps --format "table {{.ID}}\t{{.Names}}\t{{.Status}}" + ``` + + *Example Output* + + ```output + CONTAINER ID NAMES STATUS + dd4fc3da6c9c rag-playground Up About a minute + ac1f039a1db8 chain-server Up About a minute + cd0a57ee20e0 milvus-standalone Up 2 hours + a36370e7ed75 milvus-minio Up 2 hours (healthy) + a796a4e59b68 milvus-etcd Up 2 hours (healthy) + ``` + +1. Open a web browser and access to use the RAG Playground. + + Refer to [Using the Sample Web Application](../../../../docs/using-sample-web-application.md) + for information about uploading documents and using the web interface. + +## Next Steps + +- [Vector Database Customizations](../../../../docs/vector-database.md) +- Stop the containers by running `docker compose down`. +- Use the [RAG Application: Multi Turn Agent](https://registry.ngc.nvidia.com/orgs/ohlfw0olaadg/teams/ea-participants/helm-charts/rag-app-multiturn-chatbot) + Helm Chart to deploy this example in Kubernetes. diff --git a/RAG/examples/advanced_rag/multi_turn_rag/chains.py b/RAG/examples/advanced_rag/multi_turn_rag/chains.py new file mode 100644 index 00000000..dd466496 --- /dev/null +++ b/RAG/examples/advanced_rag/multi_turn_rag/chains.py @@ -0,0 +1,366 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""RAG example showcasing multi-turn conversation.""" +import logging +import os +from operator import itemgetter +from typing import Any, Dict, Generator, List + +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.document_loaders import UnstructuredFileLoader +from langchain_core.output_parsers.string import StrOutputParser +from langchain_core.prompts.chat import ChatPromptTemplate +from langchain_core.prompts.prompt import PromptTemplate +from langchain_core.runnables.passthrough import RunnableAssign + +from RAG.src.chain_server.base import BaseExample +from RAG.src.chain_server.tracing import langchain_instrumentation_class_wrapper + +# pylint: disable=no-name-in-module, disable=import-error +from RAG.src.chain_server.utils import ( + create_vectorstore_langchain, + del_docs_vectorstore_langchain, + get_config, + get_docs_vectorstore_langchain, + get_embedding_model, + get_llm, + get_prompts, + get_ranking_model, + get_text_splitter, + get_vectorstore, +) + +document_embedder = get_embedding_model() +text_splitter = None +settings = get_config() +ranker = get_ranking_model() +logger = logging.getLogger(__name__) + +prompts = get_prompts() + +try: + docstore = create_vectorstore_langchain(document_embedder=document_embedder) +except Exception as e: + docstore = None + logger.info(f"Unable to connect to vector store during initialization: {e}") + + +@langchain_instrumentation_class_wrapper +class MultiTurnChatbot(BaseExample): + def save_memory_and_get_output(self, d, vstore): + """Accepts 'input'/'output' dictionary and saves to convstore""" + vstore.add_texts( + [f"User previously responded with {d.get('input')}", f"Agent previously responded with {d.get('output')}",] + ) + return d.get("output") + + def ingest_docs(self, filepath: str, filename: str): + """Ingest documents to the VectorDB.""" + if not filename.endswith((".txt", ".pdf", ".md")): + raise ValueError(f"{filename} is not a valid Text, PDF or Markdown file") + try: + # Load raw documents from the directory + _path = filepath + raw_documents = UnstructuredFileLoader(_path).load() + + if raw_documents: + global text_splitter + if not text_splitter: + text_splitter = get_text_splitter() + + documents = text_splitter.split_documents(raw_documents) + ds = get_vectorstore(docstore, document_embedder) + ds.add_documents(documents) + else: + logger.warning("No documents available to process!") + except Exception as e: + logger.error(f"Failed to ingest document due to exception {e}") + raise ValueError("Failed to upload document. Please upload an unstructured text document.") + + def llm_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Generator[str, None, None]: + """Execute a simple LLM chain using the components defined above.""" + + logger.info("Using llm to generate response directly without knowledge base.") + # WAR: Disable chat history (UI consistency). + chat_history = [] + conversation_history = [(msg.role, msg.content) for msg in chat_history] + system_message = [("system", prompts.get("chat_template", ""))] + user_message = [("user", "{query_str}")] + + # TODO: Enable this block once conversation history is enabled for llm chain + # Checking if conversation_history is not None and not empty + # prompt_template = ChatPromptTemplate.from_messages( + # system_message + conversation_history + user_message + # ) if conversation_history else ChatPromptTemplate.from_messages( + # system_message + user_message + # ) + prompt_template = ChatPromptTemplate.from_messages(system_message + user_message) + + llm = get_llm(**kwargs) + chain = prompt_template | llm | StrOutputParser() + + logger.info(f"Prompt used for response generation: {prompt_template.format(query_str=query)}") + return chain.stream({"query_str": query}, config={"callbacks": [self.cb_handler]}) + + def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Generator[str, None, None]: + """Execute a Retrieval Augmented Generation chain using the components defined above.""" + + logger.info("Using rag to generate response from document") + + # chat_prompt = ChatPromptTemplate.from_messages( + # [ + # ("system", settings.prompts.multi_turn_rag_template), + # ("user", "{input}"), + # ] + # ) + + # This is a workaround Prompt Template + chat_prompt = ChatPromptTemplate.from_messages( + [("user", prompts.get("multi_turn_rag_template") + "User Query: {input}"),] + ) + + llm = get_llm(**kwargs) + stream_chain = chat_prompt | llm | StrOutputParser() + + convstore = create_vectorstore_langchain(document_embedder, collection_name="conv_store") + + resp_str = "" + # TODO Integrate chat_history + try: + ds = get_vectorstore(docstore, document_embedder) + if ds: + + # Get 40 results from vector databases and compress them to 4 (top_k) using a ranker + top_k = 40 if ranker else settings.retriever.top_k + + try: + logger.info( + f"Getting retrieved top k values: {top_k} with confidence threshold: {settings.retriever.score_threshold}" + ) + + context_chain = RunnableAssign( + { + "context": itemgetter("input") + | ds.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={"score_threshold": settings.retriever.score_threshold, "k": top_k}, + ) + } + ) + + history_chain = RunnableAssign( + { + "history": itemgetter("input") + | convstore.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={"score_threshold": settings.retriever.score_threshold, "k": top_k}, + ) + } + ) + if ranker: + logger.info( + f"Narrowing the collection from {top_k} results and further narrowing it to {settings.retriever.top_k} with the reranker." + ) + context_reranker = RunnableAssign( + { + "context": lambda input: ranker.compress_documents( + query=input['input'], documents=input['context'] + ) + } + ) + history_reranker = RunnableAssign( + { + "history": lambda input: ranker.compress_documents( + query=input['input'], documents=input['history'] + ) + } + ) + + retrieval_chain = context_chain | context_reranker | history_chain | history_reranker + else: + retrieval_chain = context_chain | history_chain + # Handling Retrieval failure + docs = retrieval_chain.invoke({"input": query}, config={"callbacks": [self.cb_handler]}) + if not docs: + logger.warning("Retrieval failed to get any relevant context") + return iter( + [ + "No response generated from LLM, make sure your query is relavent to the ingested document." + ] + ) + + logger.debug(f"Retrieved docs are: {docs}") + + chain = retrieval_chain | stream_chain + + for chunk in chain.stream({"input": query}, config={"callbacks": [self.cb_handler]}): + yield chunk + resp_str += chunk + + self.save_memory_and_get_output({"input": query, "output": resp_str}, convstore) + + return chain.stream(query, config={"callbacks": [self.cb_handler]}) + + except NotImplementedError: + # TODO: Optimize it, currently error is raised during stream + # check if there is better way to handle this similarity case + logger.info(f"Skipping similarity score as it's not supported by retriever") + # Some retriever like milvus don't have similarity score threshold implemented + context_chain = RunnableAssign( + {"context": itemgetter("input") | ds.as_retriever(search_kwargs={"k": top_k})} + ) + + history_chain = RunnableAssign( + {"history": itemgetter("input") | convstore.as_retriever(search_kwargs={"k": top_k})} + ) + if ranker: + logger.info( + f"Narrowing the collection from {top_k} results and further narrowing it to {settings.retriever.top_k} with the reranker." + ) + context_reranker = RunnableAssign( + { + "context": lambda input: ranker.compress_documents( + query=input['input'], documents=input['context'] + ) + } + ) + history_reranker = RunnableAssign( + { + "history": lambda input: ranker.compress_documents( + query=input['input'], documents=input['history'] + ) + } + ) + + retrieval_chain = context_chain | context_reranker | history_chain | history_reranker + else: + retrieval_chain = context_chain | history_chain + + # Handling Retrieval failure + docs = retrieval_chain.invoke({"input": query}, config={"callbacks": [self.cb_handler]}) + if not docs: + logger.warning("Retrieval failed to get any relevant context") + return iter( + [ + "No response generated from LLM, make sure your query is relavent to the ingested document." + ] + ) + + logger.debug(f"Retrieved documents are: {docs}") + chain = retrieval_chain | stream_chain + for chunk in chain.stream({"input": query}, config={"callbacks": [self.cb_handler]}): + yield chunk + resp_str += chunk + + self.save_memory_and_get_output({"input": query, "output": resp_str}, convstore) + + return chain.stream(query, config={"callbacks": [self.cb_handler]}) + + except Exception as e: + logger.warning(f"Failed to generate response due to exception {e}") + logger.warning("No response generated from LLM, make sure you've ingested document.") + return iter( + ["No response generated from LLM, make sure you have ingested document from the Knowledge Base Tab."] + ) + + def document_search(self, content: str, num_docs: int) -> List[Dict[str, Any]]: + """Search for the most relevant documents for the given search parameters.""" + + try: + ds = get_vectorstore(docstore, document_embedder) + if ds != None: + # Get 40 results from vector databases and compress them to 4 (top_k) using a ranker + top_k = 40 if ranker else settings.retriever.top_k + + try: + context_chain = RunnableAssign( + { + "context": itemgetter("input") + | ds.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={"score_threshold": settings.retriever.score_threshold, "k": top_k}, + ) + } + ) + if ranker: + logger.info( + f"Narrowing the collection from {top_k} results and further narrowing it to {settings.retriever.top_k} with the reranker." + ) + context_reranker = RunnableAssign( + { + "context": lambda input: ranker.compress_documents( + query=input['input'], documents=input['context'] + ) + } + ) + retriever = context_chain | context_reranker + else: + retriever = context_chain + docs = retriever.invoke({"input": content}, config={"callbacks": [self.cb_handler]}) + except NotImplementedError: + # Some retriever like milvus don't have similarity score threshold implemented + context_chain = RunnableAssign( + {"context": itemgetter("input") | ds.as_retriever(search_kwargs={"k": top_k})} + ) + if ranker: + logger.info( + f"Narrowing the collection from {top_k} results and further narrowing it to {settings.retriever.top_k} with the reranker." + ) + context_reranker = RunnableAssign( + { + "context": lambda input: ranker.compress_documents( + query=input['input'], documents=input['context'] + ) + } + ) + retriever = context_chain | context_reranker + else: + retriever = context_chain + docs = retriever.invoke({"input": content}, config={"callbacks": [self.cb_handler]}) + + result = [] + for doc in docs.get("context"): + result.append( + { + "source": os.path.basename(doc.metadata.get("source", "")), + "content": doc.page_content, + "score": doc.metadata.get("relevance_score", 0), + } + ) + return result + return [] + except Exception as e: + logger.error(f"Error from /documentSearch endpoint. Error details: {e}") + return [] + + def get_documents(self) -> List[str]: + """Retrieves filenames stored in the vector store.""" + try: + ds = get_vectorstore(docstore, document_embedder) + if ds: + return get_docs_vectorstore_langchain(ds) + except Exception as e: + logger.error(f"Vectorstore not initialized. Error details: {e}") + return [] + + def delete_documents(self, filenames: List[str]): + """Delete documents from the vector index.""" + try: + ds = get_vectorstore(docstore, document_embedder) + if ds: + return del_docs_vectorstore_langchain(ds, filenames) + except Exception as e: + logger.error(f"Vectorstore not initialized. Error details: {e}") diff --git a/deploy/compose/rag-app-text-chatbot.yaml b/RAG/examples/advanced_rag/multi_turn_rag/docker-compose.yaml similarity index 58% rename from deploy/compose/rag-app-text-chatbot.yaml rename to RAG/examples/advanced_rag/multi_turn_rag/docker-compose.yaml index 1342367e..b5e9c43a 100644 --- a/deploy/compose/rag-app-text-chatbot.yaml +++ b/RAG/examples/advanced_rag/multi_turn_rag/docker-compose.yaml @@ -1,73 +1,71 @@ -services: - jupyter-server: - container_name: notebook-server - image: notebook-server:${TAG:-latest} - build: - context: ../../ - dockerfile: ./notebooks/Dockerfile.notebooks # replace GPU enabled Dockerfile ./notebooks/Dockerfile.gpu_notebook - ports: - - "8888:8888" - expose: - - "8888" - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] +include: + - path: + - ../../local_deploy/docker-compose-vectordb.yaml + - ../../local_deploy/docker-compose-nim-ms.yaml +services: chain-server: container_name: chain-server image: chain-server:${TAG:-latest} build: - context: ../../ - dockerfile: ./RetrievalAugmentedGeneration/Dockerfile + context: ../../../../ + dockerfile: RAG/src/chain_server/Dockerfile args: - EXAMPLE_NAME: developer_rag + EXAMPLE_PATH: 'advanced_rag/multi_turn_rag' + volumes: + - ./prompt.yaml:/prompt.yaml command: --port 8081 --host 0.0.0.0 environment: + EXAMPLE_PATH: 'advanced_rag/multi_turn_rag' APP_VECTORSTORE_URL: "http://milvus:19530" APP_VECTORSTORE_NAME: "milvus" - APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-snowflake/arctic-embed-l} + APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-8b-instruct"} + APP_LLM_MODELENGINE: nvidia-ai-endpoints + APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""} + APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/nv-embedqa-e5-v5} APP_EMBEDDINGS_MODELENGINE: ${APP_EMBEDDINGS_MODELENGINE:-nvidia-ai-endpoints} APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-""} - APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""} - APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-8b-instruct"} - APP_LLM_MODELENGINE: ${APP_LLM_MODELENGINE:-nvidia-ai-endpoints} + APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l + APP_TEXTSPLITTER_CHUNKSIZE: 506 + APP_TEXTSPLITTER_CHUNKOVERLAP: 200 + APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/nv-rerankqa-mistral-4b-v3"} # Leave it blank to avoid using ranking + APP_RANKING_MODELENGINE: ${APP_RANKING_MODELENGINE:-nvidia-ai-endpoints} + APP_RANKING_SERVERURL: ${APP_RANKING_SERVERURL:-""} NVIDIA_API_KEY: ${NVIDIA_API_KEY} + APP_RETRIEVER_TOPK: 4 + APP_RETRIEVER_SCORETHRESHOLD: 0.25 POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-password} POSTGRES_USER: ${POSTGRES_USER:-postgres} POSTGRES_DB: ${POSTGRES_DB:-api} - COLLECTION_NAME: ${COLLECTION_NAME:-developer_rag} - APP_RETRIEVER_TOPK: 4 - APP_RETRIEVER_SCORETHRESHOLD: 0.25 + COLLECTION_NAME: ${COLLECTION_NAME:-multi_turn_rag} OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 OTEL_EXPORTER_OTLP_PROTOCOL: grpc ENABLE_TRACING: false - APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l - APP_TEXTSPLITTER_CHUNKSIZE: 506 - APP_TEXTSPLITTER_CHUNKOVERLAP: 200 LOGLEVEL: ${LOGLEVEL:-INFO} ports: - "8081:8081" expose: - "8081" shm_size: 5gb - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] + depends_on: + nemollm-embedding: + condition: service_healthy + required: false + nemollm-inference: + condition: service_healthy + required: false + ranking-ms: + condition: service_healthy + required: false rag-playground: container_name: rag-playground image: rag-playground:${TAG:-latest} build: - context: ../.././RetrievalAugmentedGeneration/frontend/ + context: ../../../../RAG/src/rag_playground/ dockerfile: Dockerfile + args: + PLAYGROUND_MODE: ${PLAYGROUND_MODE:-default} command: --port 8090 environment: APP_SERVERURL: http://chain-server @@ -76,16 +74,12 @@ services: OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 OTEL_EXPORTER_OTLP_PROTOCOL: grpc ENABLE_TRACING: false - RIVA_API_URI: ${RIVA_API_URI:-} - RIVA_API_KEY: ${RIVA_API_KEY:-} - RIVA_FUNCTION_ID: ${RIVA_FUNCTION_ID:-} - TTS_SAMPLE_RATE: ${TTS_SAMPLE_RATE:-48000} ports: - "8090:8090" expose: - "8090" depends_on: - - chain-server + - chain-server networks: default: diff --git a/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml b/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml new file mode 100644 index 00000000..5e63a644 --- /dev/null +++ b/RAG/examples/advanced_rag/multi_turn_rag/prompt.yaml @@ -0,0 +1,22 @@ +chat_template: | + You are a helpful, respectful and honest assistant. + Always answer as helpfully as possible, while being safe. + Please ensure that your responses are positive in nature. + +rag_template: | + [INST] <> + Use the following context to answer the user's question. If you don't know the answer, + just say that you don't know, don't try to make up an answer. + <> + [INST] Context: {context_str} Question: {query_str} Only return the helpful + answer below and nothing else. Helpful answer:[/INST] + +multi_turn_rag_template: | + You are a document chatbot. Help the user as they ask questions about documents. + User message just asked: {input}\n\n + For this, we have retrieved the following potentially-useful info: + Conversation History Retrieved: + {history}\n\n + Document Retrieved: + {context}\n\n + Answer only from retrieved data. Make your response conversational. diff --git a/RAG/examples/advanced_rag/multimodal_rag/README.md b/RAG/examples/advanced_rag/multimodal_rag/README.md new file mode 100644 index 00000000..fa8573ef --- /dev/null +++ b/RAG/examples/advanced_rag/multimodal_rag/README.md @@ -0,0 +1,78 @@ + + +# Multimodal RAG Example + +## Example Features + +This example demonstrates how work with multimodal data. +It showcases multimodal parsing of documents - images, tables, text through multimodal LLM APIs residing in Nvidia API Catalog. The example generates image descriptions using VLMs as shown in the diagram below. +The example works with PDF, PPTX, and PNG files. +The chain server extracts information from the files such as graphs and plots, as well as text and tables. + +| Model | Embedding | Framework | Vector Database | File Types | +| ----------------------- | ------------------------ | --------- | --------------- | -------------- | +| meta/llama3-8b-instruct for response generation, google/Deplot for graph to text convertion and Neva-22B for image to text convertion | nvidia/nv-embedqa-e5-v5 | LangChain | Milvus | PDF, PPTX, PNG | + +![Diagram](../../../../docs/images/multimodal_rag_arch.png) + +## Prerequisites + +Complete the [common prerequisites](../../../../docs/common-prerequisites.md). + +## Build and Start the Containers + +1. Export your NVIDIA API key as an environment variable: + + ```text + export NVIDIA_API_KEY="nvapi-<...>" + ``` + +1. Start the containers: + + ```console + cd RAG/examples/advanced_rag/multimodal_rag/ + docker compose up -d --build + ``` + + *Example Output* + + ```output + ✔ Network nvidia-rag Created + ✔ Container rag-playground Started + ✔ Container milvus-minio Started + ✔ Container chain-server Started + ✔ Container milvus-etcd Started + ✔ Container milvus-standalone Started + ``` + +1. Confirm the containers are running: + + ```console + docker ps --format "table {{.ID}}\t{{.Names}}\t{{.Status}}" + ``` + + *Example Output* + + ```output + CONTAINER ID NAMES STATUS + 39a8524829da rag-playground Up 2 minutes + bfbd0193dbd2 chain-server Up 2 minutes + ec02ff3cc58b milvus-standalone Up 3 minutes + 6969cf5b4342 milvus-minio Up 3 minutes (healthy) + 57a068d62fbb milvus-etcd Up 3 minutes (healthy) + ``` + +1. Open a web browser and access to use the RAG Playground. + + Refer to [Using the Sample Web Application](../../../../docs/using-sample-web-application.md) + for information about uploading documents and using the web interface. + +## Next Steps + +- [Vector Database Customizations](../../../../docs/vector-database.md) +- Stop the containers by running `docker compose down`. +- Use the [RAG Application: Multimodal Chatbot](https://registry.ngc.nvidia.com/orgs/ohlfw0olaadg/teams/ea-participants/helm-charts/rag-app-multimodal-chatbot) + Helm chart to deploy this example in Kubernetes. \ No newline at end of file diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/__init__.py b/RAG/examples/advanced_rag/multimodal_rag/__init__.py similarity index 100% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/__init__.py rename to RAG/examples/advanced_rag/multimodal_rag/__init__.py diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/chains.py b/RAG/examples/advanced_rag/multimodal_rag/chains.py similarity index 58% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/chains.py rename to RAG/examples/advanced_rag/multimodal_rag/chains.py index bcd68698..f135f0ad 100644 --- a/RetrievalAugmentedGeneration/examples/multimodal_rag/chains.py +++ b/RAG/examples/advanced_rag/multimodal_rag/chains.py @@ -15,32 +15,35 @@ import logging import os -from typing import Generator, List, Dict, Any from functools import lru_cache from traceback import print_exc +from typing import Any, Dict, Generator, List + from langchain_community.document_loaders import UnstructuredFileLoader -from RetrievalAugmentedGeneration.common.utils import utils_cache +from RAG.src.chain_server.utils import utils_cache logger = logging.getLogger(__name__) -from RetrievalAugmentedGeneration.common.base import BaseExample -from RetrievalAugmentedGeneration.example.llm.llm_client import LLMClient -from RetrievalAugmentedGeneration.example.vectorstore.vectorstore_updater import update_vectorstore -from RetrievalAugmentedGeneration.common.utils import ( - get_config, +from RAG.examples.advanced_rag.multimodal_rag.llm.llm_client import LLMClient +from RAG.examples.advanced_rag.multimodal_rag.vectorstore.vectorstore_updater import update_vectorstore +from RAG.src.chain_server.base import BaseExample +from RAG.src.chain_server.tracing import langchain_instrumentation_class_wrapper +from RAG.src.chain_server.utils import ( create_vectorstore_langchain, + del_docs_vectorstore_langchain, + get_config, + get_docs_vectorstore_langchain, get_embedding_model, + get_prompts, get_text_splitter, - get_docs_vectorstore_langchain, - del_docs_vectorstore_langchain, - get_vectorstore + get_vectorstore, ) -from RetrievalAugmentedGeneration.common.tracing import langchain_instrumentation_class_wrapper document_embedder = get_embedding_model() text_splitter = None settings = get_config() +prompts = get_prompts() sources = [] RESPONSE_PARAPHRASING_MODEL = settings.llm.model_name @@ -50,42 +53,42 @@ docstore = None logger.info(f"Unable to connect to vector store during initialization: {e}") + @utils_cache @lru_cache() def get_llm(model_name, cb_handler, is_response_generator=False, **kwargs): - return LLMClient(model_name=model_name, is_response_generator=is_response_generator, cb_handler=cb_handler, **kwargs) + return LLMClient( + model_name=model_name, is_response_generator=is_response_generator, cb_handler=cb_handler, **kwargs + ) @langchain_instrumentation_class_wrapper class MultimodalRAG(BaseExample): - def ingest_docs(self, filepath: str, filename: str): """Ingest documents to the VectorDB.""" - if not filename.endswith((".pdf",".pptx")): - raise ValueError(f"{filename} is not a valid PDF/PPTX file. Only PDF/PPTX files are supported for multimodal rag. The PDF/PPTX files can contain multimodal data.") + if not filename.endswith((".pdf", ".pptx", ".png")): + raise ValueError( + f"{filename} is not a valid PDF/PPTX/PNG file. Only PDF/PPTX/PNG files are supported for multimodal rag. The PDF/PPTX/PNG files can contain multimodal data." + ) try: _path = filepath ds = get_vectorstore(docstore, document_embedder) - update_vectorstore(_path,ds,document_embedder,os.getenv('COLLECTION_NAME', "vector_db")) + update_vectorstore(_path, ds, document_embedder, os.getenv('COLLECTION_NAME', "vector_db")) except Exception as e: logger.error(f"Failed to ingest document due to exception {e}") - raise ValueError( - "Failed to upload document. Please upload an unstructured text document." - ) - + raise ValueError("Failed to upload document. Please upload an unstructured text document.") - def llm_chain( - self, query: str, chat_history: List["Message"], **kwargs - ) -> Generator[str, None, None]: + def llm_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Generator[str, None, None]: """Execute a simple LLM chain using the components defined above.""" # TODO integrate chat_history logger.info("Using llm to generate response directly without knowledge base.") - response = get_llm(model_name=RESPONSE_PARAPHRASING_MODEL, cb_handler=self.cb_handler, is_response_generator=True, **kwargs).chat_with_prompt(settings.prompts.chat_template, query) + response = get_llm( + model_name=RESPONSE_PARAPHRASING_MODEL, cb_handler=self.cb_handler, is_response_generator=True, **kwargs + ).chat_with_prompt(prompts.get("chat_template", ""), query) return response - def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Generator[str, None, None]: """Execute a Retrieval Augmented Generation chain using the components defined above.""" @@ -95,41 +98,63 @@ def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Gene ds = get_vectorstore(docstore, document_embedder) if ds: try: - logger.info(f"Getting retrieved top k values: {settings.retriever.top_k} with confidence threshold: {settings.retriever.score_threshold}") - retriever = ds.as_retriever(search_type="similarity_score_threshold",search_kwargs={"score_threshold": settings.retriever.score_threshold,"k": settings.retriever.top_k}) - docs = retriever.invoke(input=query, config={"callbacks":[self.cb_handler]}) + logger.info( + f"Getting retrieved top k values: {settings.retriever.top_k} with confidence threshold: {settings.retriever.score_threshold}" + ) + retriever = ds.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={ + "score_threshold": settings.retriever.score_threshold, + "k": settings.retriever.top_k, + }, + ) + docs = retriever.invoke(input=query, config={"callbacks": [self.cb_handler]}) if not docs: logger.warning("Retrieval failed to get any relevant context") - return iter(["No response generated from LLM, make sure your query is relavent to the ingested document."]) + return iter( + [ + "No response generated from LLM, make sure your query is relavent to the ingested document." + ] + ) augmented_prompt = "Relevant documents:" + docs + "\n\n[[QUESTION]]\n\n" + query - system_prompt = settings.prompts.rag_template + system_prompt = prompts.get("rag_template", "") logger.info(f"Formulated prompt for RAG chain: {system_prompt}\n{augmented_prompt}") - response = get_llm(model_name=RESPONSE_PARAPHRASING_MODEL, cb_handler=self.cb_handler, is_response_generator=True, **kwargs).chat_with_prompt(settings.prompts.rag_template, augmented_prompt) + response = get_llm( + model_name=RESPONSE_PARAPHRASING_MODEL, + cb_handler=self.cb_handler, + is_response_generator=True, + **kwargs, + ).chat_with_prompt(prompts.get("rag_template", ""), augmented_prompt) return response except Exception as e: logger.info(f"Skipping similarity score as it's not supported by retriever") retriever = ds.as_retriever() - docs = retriever.invoke(input=query, config={"callbacks":[self.cb_handler]}) + docs = retriever.invoke(input=query, config={"callbacks": [self.cb_handler]}) if not docs: logger.warning("Retrieval failed to get any relevant context") - return iter(["No response generated from LLM, make sure your query is relavent to the ingested document."]) - docs=[doc.page_content for doc in docs] + return iter( + [ + "No response generated from LLM, make sure your query is relavent to the ingested document." + ] + ) + docs = [doc.page_content for doc in docs] docs = " ".join(docs) augmented_prompt = "Relevant documents:" + docs + "\n\n[[QUESTION]]\n\n" + query - system_prompt = settings.prompts.rag_template + system_prompt = prompts.get("rag_template", "") logger.info(f"Formulated prompt for RAG chain: {system_prompt}\n{augmented_prompt}") - response = get_llm(model_name=RESPONSE_PARAPHRASING_MODEL, cb_handler=self.cb_handler, is_response_generator=True, **kwargs).chat_with_prompt(settings.prompts.rag_template, augmented_prompt) + response = get_llm( + model_name=RESPONSE_PARAPHRASING_MODEL, + cb_handler=self.cb_handler, + is_response_generator=True, + **kwargs, + ).chat_with_prompt(prompts.get("rag_template", ""), augmented_prompt) return response except Exception as e: logger.warning(f"Failed to generate response due to exception {e}") - logger.warning( - "No response generated from LLM, make sure you've ingested document." - ) + logger.warning("No response generated from LLM, make sure you've ingested document.") return iter( - [ - "No response generated from LLM, make sure you have ingested document from the Knowledge Base Tab." - ] + ["No response generated from LLM, make sure you have ingested document from the Knowledge Base Tab."] ) def document_search(self, content: str, num_docs: int) -> List[Dict[str, Any]]: @@ -138,7 +163,7 @@ def document_search(self, content: str, num_docs: int) -> List[Dict[str, Any]]: try: ds = get_vectorstore(docstore, document_embedder) retriever = ds.as_retriever() - sources = retriever.invoke(input=content, limit=settings.retriever.top_k, config={"callbacks":[self.cb_handler]}) + sources = retriever.invoke(input=content, limit=num_docs, config={"callbacks": [self.cb_handler]}) output = [] for every_chunk in sources: entry = {"source": every_chunk.metadata['filename'], "content": every_chunk.page_content} diff --git a/deploy/compose/rag-app-multiturn-chatbot.yaml b/RAG/examples/advanced_rag/multimodal_rag/docker-compose.yaml similarity index 65% rename from deploy/compose/rag-app-multiturn-chatbot.yaml rename to RAG/examples/advanced_rag/multimodal_rag/docker-compose.yaml index c8251e87..948f2652 100644 --- a/deploy/compose/rag-app-multiturn-chatbot.yaml +++ b/RAG/examples/advanced_rag/multimodal_rag/docker-compose.yaml @@ -1,32 +1,37 @@ +include: + - path: + - ../../local_deploy/docker-compose-vectordb.yaml + - ../../local_deploy/docker-compose-nim-ms.yaml + services: chain-server: container_name: chain-server image: chain-server:${TAG:-latest} build: - context: ../../ - dockerfile: ./RetrievalAugmentedGeneration/Dockerfile + context: ../../../../ + dockerfile: RAG/src/chain_server/Dockerfile args: - EXAMPLE_NAME: multi_turn_rag + EXAMPLE_PATH: 'advanced_rag/multimodal_rag' + volumes: + - ./prompt.yaml:/prompt.yaml command: --port 8081 --host 0.0.0.0 environment: - APP_VECTORSTORE_URL: "http://milvus:19530" - APP_VECTORSTORE_NAME: "milvus" + EXAMPLE_PATH: 'advanced_rag/multimodal_rag' APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-8b-instruct"} APP_LLM_MODELENGINE: nvidia-ai-endpoints APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""} - APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-snowflake/arctic-embed-l} + APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/nv-embedqa-e5-v5} APP_EMBEDDINGS_MODELENGINE: ${APP_EMBEDDINGS_MODELENGINE:-nvidia-ai-endpoints} APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-""} APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l - APP_TEXTSPLITTER_CHUNKSIZE: 506 + APP_TEXTSPLITTER_CHUNKSIZE: 510 APP_TEXTSPLITTER_CHUNKOVERLAP: 200 NVIDIA_API_KEY: ${NVIDIA_API_KEY} APP_RETRIEVER_TOPK: 4 APP_RETRIEVER_SCORETHRESHOLD: 0.25 - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-password} - POSTGRES_USER: ${POSTGRES_USER:-postgres} - POSTGRES_DB: ${POSTGRES_DB:-api} - COLLECTION_NAME: ${COLLECTION_NAME:-multi_turn_rag} + APP_VECTORSTORE_URL: "http://milvus:19530" + APP_VECTORSTORE_NAME: "milvus" + COLLECTION_NAME: ${COLLECTION_NAME:-multimodal_rag} OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 OTEL_EXPORTER_OTLP_PROTOCOL: grpc ENABLE_TRACING: false @@ -36,29 +41,27 @@ services: expose: - "8081" shm_size: 5gb - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] + depends_on: + nemollm-embedding: + condition: service_healthy + required: false + nemollm-inference: + condition: service_healthy + required: false rag-playground: container_name: rag-playground image: rag-playground:${TAG:-latest} build: - context: ../.././RetrievalAugmentedGeneration/frontend/ + context: ../../../../RAG/src/rag_playground/ dockerfile: Dockerfile + args: + PLAYGROUND_MODE: ${PLAYGROUND_MODE:-default} command: --port 8090 environment: APP_SERVERURL: http://chain-server APP_SERVERPORT: 8081 APP_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-8b-instruct"} - RIVA_API_URI: ${RIVA_API_URI:-} - RIVA_API_KEY: ${RIVA_API_KEY:-} - RIVA_FUNCTION_ID: ${RIVA_FUNCTION_ID:-} - TTS_SAMPLE_RATE: ${TTS_SAMPLE_RATE:-48000} OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 OTEL_EXPORTER_OTLP_PROTOCOL: grpc ENABLE_TRACING: false @@ -67,7 +70,7 @@ services: expose: - "8090" depends_on: - - chain-server + - chain-server networks: default: diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/retriever/__init__.py b/RAG/examples/advanced_rag/multimodal_rag/llm/__init__.py similarity index 100% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/retriever/__init__.py rename to RAG/examples/advanced_rag/multimodal_rag/llm/__init__.py diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/llm/llm.py b/RAG/examples/advanced_rag/multimodal_rag/llm/llm.py similarity index 80% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/llm/llm.py rename to RAG/examples/advanced_rag/multimodal_rag/llm/llm.py index 629a170c..49fb1301 100644 --- a/RetrievalAugmentedGeneration/examples/multimodal_rag/llm/llm.py +++ b/RAG/examples/advanced_rag/multimodal_rag/llm/llm.py @@ -13,14 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import requests import json + +import requests import torch -from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline from langchain_community.llms import HuggingFacePipeline from langchain_nvidia_ai_endpoints import ChatNVIDIA +from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline -from RetrievalAugmentedGeneration.common.utils import get_llm, get_config +from RAG.src.chain_server.utils import get_config, get_llm class NvidiaLLM: @@ -31,29 +32,29 @@ def __init__(self, model_name, is_response_generator: bool = False, **kwargs): if is_response_generator: self.llm = get_llm(**kwargs) else: - self.llm = ChatNVIDIA(model=model_name, - temperature = kwargs.get('temperature', None), - top_p = kwargs.get('top_p', None), - max_tokens = kwargs.get('max_tokens', None)) + self.llm = ChatNVIDIA( + model=model_name, + temperature=kwargs.get('temperature', None), + top_p=kwargs.get('top_p', None), + max_tokens=kwargs.get('max_tokens', None), + ) + class LocalLLM: def __init__(self, model_path, **kwargs): tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( - model_path, - torch_dtype=torch.float16, - trust_remote_code=True, - device_map="auto" - ) + model_path, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto" + ) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, - max_length=kwargs.get('max_tokens',1024), + max_length=kwargs.get('max_tokens', 1024), temperature=kwargs.get('temperature', 0.6), top_p=kwargs.get('top_p', 0.3), - repetition_penalty=1.0 + repetition_penalty=1.0, ) self.llm = HuggingFacePipeline(pipeline=pipe) @@ -75,9 +76,9 @@ def create_llm(model_name, model_type="NVIDIA", is_response_generator=False, **k if __name__ == "__main__": llm = create_llm("gpt2", "LOCAL") + from langchain import LLMChain from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate - from langchain import LLMChain system_prompt = "" prompt = "who are you" @@ -88,4 +89,3 @@ def create_llm(model_name, model_type="NVIDIA", is_response_generator=False, **k for chunk in response: print(chunk) - diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/llm/llm_client.py b/RAG/examples/advanced_rag/multimodal_rag/llm/llm_client.py similarity index 63% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/llm/llm_client.py rename to RAG/examples/advanced_rag/multimodal_rag/llm/llm_client.py index 14997d0f..276e2569 100644 --- a/RetrievalAugmentedGeneration/examples/multimodal_rag/llm/llm_client.py +++ b/RAG/examples/advanced_rag/multimodal_rag/llm/llm_client.py @@ -14,16 +14,27 @@ # limitations under the License. import logging + logger = logging.getLogger(__name__) -from RetrievalAugmentedGeneration.example.llm.llm import create_llm +from langchain.callbacks.base import BaseCallbackHandler +from langchain_core.messages import HumanMessage from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate -from langchain_core.messages import HumanMessage -from langchain.callbacks.base import BaseCallbackHandler + +from RAG.examples.advanced_rag.multimodal_rag.llm.llm import create_llm +from RAG.src.chain_server.utils import get_prompts + class LLMClient: - def __init__(self, model_name="mixtral_8x7b", model_type="NVIDIA", is_response_generator=False, cb_handler=BaseCallbackHandler, **kwargs): + def __init__( + self, + model_name="mixtral_8x7b", + model_type="NVIDIA", + is_response_generator=False, + cb_handler=BaseCallbackHandler, + **kwargs, + ): self.llm = create_llm(model_name, model_type, is_response_generator, **kwargs) self.cb_handler = cb_handler @@ -35,9 +46,22 @@ def chat_with_prompt(self, system_prompt, prompt): return response def multimodal_invoke(self, b64_string, steer=False, creativity=0, quality=9, complexity=0, verbosity=8): - message = HumanMessage(content=[{"type": "text", "text": "Describe this image in detail:"}, - {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_string}"},}]) + message = HumanMessage( + content=[ + {"type": "text", "text": get_prompts().get("describe_image_prompt", "")}, + {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_string}"},}, + ] + ) if steer: - return self.llm.invoke([message], labels={"creativity": creativity, "quality": quality, "complexity": complexity, "verbosity": verbosity}, callbacks=[self.cb_handler]) + return self.llm.invoke( + [message], + labels={ + "creativity": creativity, + "quality": quality, + "complexity": complexity, + "verbosity": verbosity, + }, + callbacks=[self.cb_handler], + ) else: - return self.llm.invoke([message]) \ No newline at end of file + return self.llm.invoke([message]) diff --git a/RAG/examples/advanced_rag/multimodal_rag/prompt.yaml b/RAG/examples/advanced_rag/multimodal_rag/prompt.yaml new file mode 100644 index 00000000..aa63fa28 --- /dev/null +++ b/RAG/examples/advanced_rag/multimodal_rag/prompt.yaml @@ -0,0 +1,11 @@ +chat_template: | + You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant. + Always answer as helpfully as possible, while being safe. + Please ensure that your responses are positive in nature. + +rag_template: "You are a helpful and friendly multimodal intelligent AI assistant named Multimodal Chatbot Assistant. You are an expert in the content of the document provided and can provide information using both text and images. The user may also provide an image input, and you will use the image description to retrieve similar images, tables and text. The context given below will provide some technical or financial documentation and whitepapers to help you answer the question. Based on this context, answer the question truthfully. If the question is not related to this, please refrain from answering. Most importantly, if the context provided does not include information about the question from the user, reply saying that you don't know. Do not utilize any information that is not provided in the documents below. All documents will be preceded by tags, for example [[DOCUMENT 1]], [[DOCUMENT 2]], and so on. You can reference them in your reply but without the brackets, so just say document 1 or 2. The question will be preceded by a [[QUESTION]] tag. Be succinct, clear, and helpful. Remember to describe everything in detail by using the knowledge provided, or reply that you don't know the answer. Do not fabricate any responses. Note that you have the ability to reference images, tables, and other multimodal elements when necessary. You can also refer to the image provided by the user, if any." + +describe_image_prompt: | + Describe this image in detail: + +deplot_summarization_prompt: Your responsibility is to explain charts. You are an expert in describing the responses of linearized tables into plain English text for LLMs to use. \ No newline at end of file diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/requirements.txt b/RAG/examples/advanced_rag/multimodal_rag/requirements.txt similarity index 100% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/requirements.txt rename to RAG/examples/advanced_rag/multimodal_rag/requirements.txt diff --git a/experimental/multimodal_assistant/guardrails/__init__.py b/RAG/examples/advanced_rag/multimodal_rag/retriever/__init__.py similarity index 100% rename from experimental/multimodal_assistant/guardrails/__init__.py rename to RAG/examples/advanced_rag/multimodal_rag/retriever/__init__.py diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/retriever/embedder.py b/RAG/examples/advanced_rag/multimodal_rag/retriever/embedder.py similarity index 92% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/retriever/embedder.py rename to RAG/examples/advanced_rag/multimodal_rag/retriever/embedder.py index 183448ad..27590a3a 100644 --- a/RetrievalAugmentedGeneration/examples/multimodal_rag/retriever/embedder.py +++ b/RAG/examples/advanced_rag/multimodal_rag/retriever/embedder.py @@ -14,16 +14,15 @@ # limitations under the License. from abc import ABC, abstractmethod -from pydantic import BaseModel from typing import Any, Optional from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings +from pydantic import BaseModel -from RetrievalAugmentedGeneration.common.utils import get_embedding_model - -class Embedder(ABC, BaseModel): +from RAG.src.chain_server.utils import get_embedding_model +class Embedder(ABC, BaseModel): @abstractmethod def embed_query(self, text): ... @@ -37,20 +36,19 @@ def get_embedding_size(self): sample_embedding = self.embedder.embed_query(sample_text) return len(sample_embedding) + class NVIDIAEmbedders(Embedder): - name : str - type : str - embedder : Optional[Any] = None + name: str + type: str + embedder: Optional[Any] = None def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.embedder = get_embedding_model() - def embed_query(self, text): return self.embedder.embed_query(text) - def embed_documents(self, documents, batch_size=10): output = [] batch_documents = [] @@ -62,4 +60,4 @@ def embed_documents(self, documents, batch_size=10): else: if len(batch_documents) > 0: output.extend(self.embedder.embed_documents(batch_documents)) - return output \ No newline at end of file + return output diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/retriever/retriever.py b/RAG/examples/advanced_rag/multimodal_rag/retriever/retriever.py similarity index 80% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/retriever/retriever.py rename to RAG/examples/advanced_rag/multimodal_rag/retriever/retriever.py index 804014bd..7cdabac5 100644 --- a/RetrievalAugmentedGeneration/examples/multimodal_rag/retriever/retriever.py +++ b/RAG/examples/advanced_rag/multimodal_rag/retriever/retriever.py @@ -14,14 +14,15 @@ # limitations under the License. from pydantic import BaseModel -from RetrievalAugmentedGeneration.example.retriever.embedder import Embedder -from RetrievalAugmentedGeneration.example.retriever.vector import VectorClient +from RAG.examples.advanced_rag.multimodal_rag.retriever.embedder import Embedder +from RAG.examples.advanced_rag.multimodal_rag.retriever.vector import VectorClient + class Retriever(BaseModel): - embedder : Embedder - vector_client : VectorClient - search_limit : int = 4 + embedder: Embedder + vector_client: VectorClient + search_limit: int = 4 def get_relevant_docs(self, text, limit=None): if limit is None: @@ -29,4 +30,3 @@ def get_relevant_docs(self, text, limit=None): query_vector = self.embedder.embed_query(text) concatdocs, sources = self.vector_client.search([query_vector], limit) return concatdocs, sources - diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/retriever/vector.py b/RAG/examples/advanced_rag/multimodal_rag/retriever/vector.py similarity index 81% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/retriever/vector.py rename to RAG/examples/advanced_rag/multimodal_rag/retriever/vector.py index a8af767e..15a402ee 100644 --- a/RetrievalAugmentedGeneration/examples/multimodal_rag/retriever/vector.py +++ b/RAG/examples/advanced_rag/multimodal_rag/retriever/vector.py @@ -13,17 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import uuid from abc import ABC, abstractmethod from typing import Any -import uuid + from pydantic import BaseModel -from pymilvus import connections, Collection, FieldSchema, CollectionSchema, DataType, utility +from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, connections, utility + class VectorClient(ABC, BaseModel): - hostname : str - port : str - collection_name : str + hostname: str + port: str + collection_name: str @abstractmethod def connect(self): @@ -43,19 +45,21 @@ def update(self): class MilvusVectorClient(VectorClient): - hostname : str = "milvus" - port : str = "19530" - metric_type : str = "L2" - index_type : str = "GPU_IVF_FLAT" - nlist : int = 100 - index_field_name : str = "embedding" - nprobe : int = 5 - vector_db : Any = None + hostname: str = "milvus" + port: str = "19530" + metric_type: str = "L2" + index_type: str = "GPU_IVF_FLAT" + nlist: int = 100 + index_field_name: str = "embedding" + nprobe: int = 5 + vector_db: Any = None embedding_size: int = 1024 def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.vector_db = self.connect(self.collection_name, self.hostname, self.port, embedding_size=self.embedding_size) + self.vector_db = self.connect( + self.collection_name, self.hostname, self.port, embedding_size=self.embedding_size + ) self._create_index(self.metric_type, self.index_type, self.index_field_name, self.nlist) self.vector_db.load() @@ -64,7 +68,7 @@ def _create_index(self, metric_type, index_type, field_name, nlist=100): index_params = { "metric_type": metric_type, # or "IP" depending on your requirement "index_type": index_type, # You can choose other types like IVF_PQ based on your need - "params": {"nlist": nlist} # Adjust the nlist parameter as per your requirements + "params": {"nlist": nlist}, # Adjust the nlist parameter as per your requirements } self.vector_db.create_index(field_name=field_name, index_params=index_params) @@ -82,17 +86,14 @@ def disconnect(self, alias="default"): connections.disconnect(alias) def search(self, query_vectors, limit=5): - search_params = { - "metric_type": self.metric_type, - "params": {"nprobe": self.nprobe} - } + search_params = {"metric_type": self.metric_type, "params": {"nprobe": self.nprobe}} search_results = self.vector_db.search( data=query_vectors, anns_field=self.index_field_name, # Replace with your vector field name param=search_params, output_fields=["content", "metadata"], - limit=limit + limit=limit, ) concatdocs = "" sources = {} @@ -116,19 +117,14 @@ def __del__(self): self.disconnect() def update(self, documents, embeddings, collection_name): - # Processing each document + # Processing each document insert_data = [] for i, doc in enumerate(documents): # If 'doc' has unique identifier or create one for the document doc_id = doc.id if hasattr(doc, 'id') else str(uuid.uuid4()) # Prepare data for insertion - example = { - "id": doc_id, - "content": doc.page_content, - "embedding": embeddings[i], - "metadata": doc.metadata - } + example = {"id": doc_id, "content": doc.page_content, "embedding": embeddings[i], "metadata": doc.metadata} insert_data.append(example) self.vector_db.insert(insert_data) @@ -137,9 +133,11 @@ def get_schema(self, embedding_size): # Define the primary key field along with other fields fields = [ FieldSchema(name="id", dtype=DataType.VARCHAR, max_length=100, is_primary=True), # Primary key field - FieldSchema(name="content", dtype=DataType.VARCHAR, max_length=10000), # Text field with up to 10000 characters + FieldSchema( + name="content", dtype=DataType.VARCHAR, max_length=10000 + ), # Text field with up to 10000 characters FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=embedding_size), - FieldSchema(name="metadata", dtype=DataType.JSON) + FieldSchema(name="metadata", dtype=DataType.JSON), ] schema = CollectionSchema(fields, "Collection for storing document embeddings and metadata") @@ -155,7 +153,7 @@ def list_filenames(self): List all filenames in the collection. """ # Assuming 'filename' is a field in the metadata - + expr = "metadata['filename'] != ''" # Expression to match all entities with a non-empty filename entities = self.vector_db.query(expr, output_fields=["metadata"]) filenames = list(set([entity['metadata']['filename'] for entity in entities])) @@ -169,4 +167,4 @@ def delete_by_filename(self, filename): expr = f"metadata['filename'] == '{filename}'" self.vector_db.delete(expr) # Load the collection to make the deletion take effect - self.vector_db.load() + self.vector_db.load() diff --git a/RAG/examples/advanced_rag/multimodal_rag/vectorstore/custom_img_parser.py b/RAG/examples/advanced_rag/multimodal_rag/vectorstore/custom_img_parser.py new file mode 100644 index 00000000..592c4865 --- /dev/null +++ b/RAG/examples/advanced_rag/multimodal_rag/vectorstore/custom_img_parser.py @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import cv2 +import pytesseract +from langchain.docstore.document import Document + +from RAG.examples.advanced_rag.multimodal_rag.vectorstore.custom_pdf_parser import ( + is_graph, + process_graph, + process_image, +) + + +def get_ocr_text(image_path, frame_num): + img = cv2.imread(image_path) + ocr_text = pytesseract.image_to_string(img) + return ocr_text + + +def process_img_file(img_path): + # convert to png file -> can check if this is needed + # if 1 img -> send to NeVa and get caption + OCR needs to be performed -> save img doc in processed data + # if giphy or gif image -> get all frames and save caption+OCR for each frame. + processed_data = [] + ocr_text = get_ocr_text(img_path, 1) + image_description = "" + if is_graph(img_path): + image_description = process_graph(img_path) + else: + image_description = process_image(img_path) + caption = image_description + f"This image contains text: {ocr_text}" + image_metadata = { + "x1": 0, + "y1": 0, + "x2": 0, + "x3": 0, + "source": f"{os.path.basename(img_path)}", + "image": img_path, + "caption": caption, + "type": "image", + "page_num": 1, + } + processed_data.append(Document(page_content=caption, metadata=image_metadata)) + + return processed_data diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/vectorstore/custom_pdf_parser.py b/RAG/examples/advanced_rag/multimodal_rag/vectorstore/custom_pdf_parser.py similarity index 52% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/vectorstore/custom_pdf_parser.py rename to RAG/examples/advanced_rag/multimodal_rag/vectorstore/custom_pdf_parser.py index cec1b40f..4d10ba77 100644 --- a/RetrievalAugmentedGeneration/examples/multimodal_rag/vectorstore/custom_pdf_parser.py +++ b/RAG/examples/advanced_rag/multimodal_rag/vectorstore/custom_pdf_parser.py @@ -13,18 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -from pdfplumber import open as pdf_open -import pandas as pd +import base64 import os -from langchain.docstore.document import Document -from RetrievalAugmentedGeneration.example.llm.llm_client import LLMClient -from RetrievalAugmentedGeneration.common.utils import get_config -from PIL import Image from io import BytesIO -import base64 + import cv2 +import pandas as pd import pytesseract -from RetrievalAugmentedGeneration.common.tracing import langchain_instrumentation_method_wrapper +from langchain.docstore.document import Document +from pdfplumber import open as pdf_open +from PIL import Image + +from RAG.examples.advanced_rag.multimodal_rag.llm.llm_client import LLMClient +from RAG.src.chain_server.tracing import langchain_instrumentation_method_wrapper +from RAG.src.chain_server.utils import get_config, get_prompts + def get_b64_image(image_path): image = Image.open(image_path).convert("RGB") @@ -33,42 +36,48 @@ def get_b64_image(image_path): b64_string = base64.b64encode(buffered.getvalue()).decode("utf-8") return b64_string -def is_bbox_overlapping(bbox1, bbox2): - return (bbox1[0]bbox2[0] and bbox1[1]>bbox2[3] and bbox1[3]bbox2[0] and bbox1[1]>bbox2[3] and bbox1[3] bbox2[0] and bbox1[1] > bbox2[3] and bbox1[3] < bbox2[1] -@langchain_instrumentation_method_wrapper -def is_graph(cb_handler, image_path): - # Placeholder function for graph detection logic - # Implement graph detection algorithm here - neva = LLMClient("ai-neva-22b", cb_handler=cb_handler) - b64_string = get_b64_image(image_path) - res = neva.multimodal_invoke(b64_string, creativity = 0, quality = 9, complexity = 0, verbosity = 9).content + +def is_graph(image_path): + res = process_image(image_path) if "graph" in res or "plot" in res or "chart" in res: return True else: return False + +@langchain_instrumentation_method_wrapper +def process_image(cb_handler, image_path): + neva = LLMClient("ai-neva-22b", cb_handler=cb_handler) + b64_string = get_b64_image(image_path) + res = neva.multimodal_invoke(b64_string, creativity=0, quality=9, complexity=0, verbosity=9).content + print(res) + return res + + @langchain_instrumentation_method_wrapper def process_graph(cb_handler, image_path): - # Placeholder function for graph processing logic - # Implement graph processing algorithm here # Call DePlot through the API - deplot = LLMClient("ai-google-deplot") + deplot = LLMClient("google/deplot") b64_string = get_b64_image(image_path) res = deplot.multimodal_invoke(b64_string) deplot_description = res.content # Accessing the model name environment variable settings = get_config() mixtral = LLMClient(model_name=settings.llm.model_name, is_response_generator=True, cb_handler=cb_handler) - response = mixtral.chat_with_prompt(system_prompt="Your responsibility is to explain charts. You are an expert in describing the responses of linearized tables into plain English text for LLMs to use.", - prompt="Explain the following linearized table. " + deplot_description) + response = mixtral.chat_with_prompt( + system_prompt=get_prompts().get("deplot_summarization_prompt", ""), + prompt="Explain the following linearized table. " + deplot_description, + ) full_response = "" for chunk in response: full_response += chunk return full_response + + def stringify_table(table_text): ans = "" for i in range(len(table_text)): @@ -76,27 +85,31 @@ def stringify_table(table_text): ans += table_text[i][j] + "," ans += "\n" return ans + + def text_to_table(table_text): columns = [] rows = [] try: for i in range(len(table_text)): columns.append(table_text[i][0]) - for i in range(1,len(table_text[0])): - row=[] + for i in range(1, len(table_text[0])): + row = [] for j in range(len(columns)): row.append(table_text[j][i]) rows.append(row) except Exception as e: print(f"Exception occured while converting extracted table text to Dataframe object : {e}") - return pd.DataFrame(rows,columns=columns) - - + return pd.DataFrame(rows, columns=columns) + + def extract_text_around_item(text_blocks, bbox, page_height, threshold_percentage=0.1): before_text, after_text = "", "" vertical_threshold_distance = page_height * threshold_percentage - horizontal_threshold_distance = (bbox[2]-bbox[0]) * threshold_percentage # Assuming similar threshold for horizontal distance + horizontal_threshold_distance = ( + bbox[2] - bbox[0] + ) * threshold_percentage # Assuming similar threshold for horizontal distance for block in text_blocks: vertical_distance = min(abs(block['y1'] - bbox[1]), abs(block['y0'] - bbox[3])) @@ -112,13 +125,14 @@ def extract_text_around_item(text_blocks, bbox, page_height, threshold_percentag return before_text, after_text + def process_text_blocks(text_blocks): char_count_threshold = 500 # Threshold for the number of characters in a group current_group = [] grouped_blocks = [] current_char_count = 0 for block in text_blocks: - if block['object_type'] in ('char','str'): # Check if the block is of text type + if block['object_type'] in ('char', 'str'): # Check if the block is of text type block_text = block['text'] block_char_count = len(block_text) @@ -139,84 +153,99 @@ def process_text_blocks(text_blocks): return grouped_blocks + def parse_via_ocr(filename, page, pagenum): ocr_docs = [] ocr_image = page.to_image(resolution=109) imgrefpath = os.path.join("/tmp-data", "multimodal/ocr_references") if not os.path.exists(imgrefpath): os.makedirs(imgrefpath) - image_path = os.path.join(imgrefpath, f"page{pagenum}.png") + image_path = os.path.join(imgrefpath, f"file{os.path.basename(filename).split('.')[0]}-page{pagenum}.png") ocr_image.save(image_path) img = cv2.imread(image_path) ocr_text = pytesseract.image_to_string(img) ocr_metadata = { - "x1":0, - "y1":0, - "x2":0, - "x3":0, - "source": f"{os.path.basename(filename)}", - "image": image_path, - "caption": ocr_text, - "type": "image", - "page_num": pagenum - } + "x1": 0, + "y1": 0, + "x2": 0, + "x3": 0, + "source": f"{os.path.basename(filename)}", + "image": image_path, + "caption": ocr_text, + "type": "image", + "page_num": pagenum, + } ocr_docs.append(Document(page_content="This is a page with text: " + ocr_text, metadata=ocr_metadata)) return ocr_docs + def parse_all_tables(filename, page, pagenum, text_blocks, ongoing_tables): table_docs = [] table_bboxes = [] ctr = 1 - try: - tables = page.find_tables(table_settings={"horizontal_strategy":"lines_strict", "vertical_strategy":"lines_strict"}) + try: + tables = page.find_tables( + table_settings={"horizontal_strategy": "lines_strict", "vertical_strategy": "lines_strict"} + ) except Exception as e: print(f"Error during table extraction: {e}") return table_docs, table_bboxes, ongoing_tables if tables: for table_num, table in enumerate(tables, start=1): - try: - tablerefdir = os.path.join("/tmp-data", "vectorstore/table_references") - if not os.path.exists(tablerefdir): - os.makedirs(tablerefdir) - df_xlsx_path = os.path.join(tablerefdir, f"table{table_num}-page{pagenum}.xlsx") - page_crop=page.crop(table.bbox) - if len(page_crop.extract_tables())>0: - table_df_text = page_crop.extract_tables()[0] - table_df = text_to_table(table_df_text) - table_df.to_excel(df_xlsx_path) - # Find text around the table - table_bbox = table.bbox - before_text, after_text = extract_text_around_item(text_blocks, table_bbox, page.height) - # Save table image - table_img_path = os.path.join(tablerefdir, f"table{table_num}-page{pagenum}.jpg") - img = page_crop.to_image(resolution=109) - img.save(table_img_path) - description = process_graph(table_img_path) - ctr +=1 - caption = before_text.replace("\n", " ") + description + after_text.replace("\n", " ") - if before_text == "" and after_text == "": - caption = " ".join(table_df.columns) - table_data_text = stringify_table(table_df_text) - table_metadata = { - "x1":0, - "y1":0, - "x2":0, - "x3":0, - "source": f"{os.path.basename(filename)}", - "dataframe": df_xlsx_path, - "image": table_img_path, - "caption": caption, - "type": "table", - "page_num": pagenum + 1 - } - all_cols = ", ".join(list(table_df.columns.values)) - doc = Document(page_content="This is a table with the caption: " + caption + f"\nThe columns are {all_cols} and the table data is {table_data_text}", metadata=table_metadata) - table_docs.append(doc) - except: - print(f"Skipping Table {table_num} due to Exception {e}") + try: + tablerefdir = os.path.join("/tmp-data", "vectorstore/table_references") + if not os.path.exists(tablerefdir): + os.makedirs(tablerefdir) + df_xlsx_path = os.path.join( + tablerefdir, f"file{os.path.basename(filename).split('.')[0]}-table{table_num}-page{pagenum}.xlsx" + ) + page_crop = page.crop(table.bbox) + if len(page_crop.extract_tables()) > 0: + table_df_text = page_crop.extract_tables()[0] + table_df = text_to_table(table_df_text) + table_df.to_excel(df_xlsx_path) + # Find text around the table + table_bbox = table.bbox + before_text, after_text = extract_text_around_item(text_blocks, table_bbox, page.height) + # Save table image + table_img_path = os.path.join( + tablerefdir, + f"file{os.path.basename(filename).split('.')[0]}-table{table_num}-page{pagenum}.jpg", + ) + img = page_crop.to_image(resolution=109) + img.save(table_img_path) + description = process_graph(table_img_path) + ctr += 1 + caption = before_text.replace("\n", " ") + description + after_text.replace("\n", " ") + if before_text == "" and after_text == "": + caption = " ".join(table_df.columns) + table_data_text = stringify_table(table_df_text) + table_metadata = { + "x1": 0, + "y1": 0, + "x2": 0, + "x3": 0, + "source": f"{os.path.basename(filename)}", + "dataframe": df_xlsx_path, + "image": table_img_path, + "caption": caption, + "type": "table", + "page_num": pagenum + 1, + } + all_cols = ", ".join(list(table_df.columns.values)) + doc = Document( + page_content="This is a table with the caption: " + + caption + + f"\nThe columns are {all_cols} and the table data is {table_data_text}", + metadata=table_metadata, + ) + table_docs.append(doc) + except: + print(f"Skipping Table {table_num} due to Exception {e}") return table_docs, table_bboxes, ongoing_tables + def parse_all_images(filename, page, pagenum, text_blocks): image_docs = [] image_list = page.images @@ -227,49 +256,59 @@ def parse_all_images(filename, page, pagenum, text_blocks): # xref = image_info['xref'] # if xref == 0: # continue # Skip inline images or undetectable images - image_bbox = (image['x0'],image['y0'], image['x1'],image['y1']) - # Check if the image size is at least 5% of the page size in any dimension - if image["width"] < page.width / 20 or image["height"] < page.height / 20: - continue # Skip very small images - - # Extract and save the image - page_crop = page.crop(image_bbox,strict=False) - image_data = page_crop.to_image() - imgrefpath = os.path.join("/tmp-data", "multimodal/image_references") - if not os.path.exists(imgrefpath): - os.makedirs(imgrefpath) - image_path = os.path.join(imgrefpath, f"image{image_num}-page{pagenum}.png") - image_data.save(image_path) - # Find text around the image - before_text, after_text = extract_text_around_item(text_blocks, image_bbox, page.height) - # skip images without a caption, they are likely just some logo or graphics - if before_text == "" and after_text == "": - continue - - # Process the image if it's a graph - image_description = " " - if is_graph(image_path): - image_description = process_graph(image_path) - - # Combine the texts to form a caption + # image_bbox = (image['x0'], image['y0'], image['x1'], image['y1']) + image_bbox = (image['x0'], page.cropbox[3] - image['y1'], image['x1'], page.cropbox[3] - image['y0']) + # Check if the image size is at least 5% of the page size in any dimension + if image["width"] < page.width / 20 or image["height"] < page.height / 20: + continue # Skip very small images + + # Extract and save the image + page_crop = page.crop(image_bbox, strict=False) + image_data = page_crop.to_image(resolution=109) + imgrefpath = os.path.join("/tmp-data", "multimodal/image_references") + if not os.path.exists(imgrefpath): + os.makedirs(imgrefpath) + image_path = os.path.join( + imgrefpath, f"file{os.path.basename(filename).split('.')[0]}-image{image_num}-page{pagenum}.png" + ) + image_data.save(image_path) + # Find text around the image + before_text, after_text = extract_text_around_item(text_blocks, image_bbox, page.height) + + # Process the image if it's a graph + image_description = " " + if is_graph(image_path): + image_description = process_graph(image_path) + + caption = "" + # Combine the texts to form a caption + if before_text == "" and after_text == "": + caption = image_description + else: caption = before_text.replace("\n", " ") + image_description + after_text.replace("\n", " ") - image_metadata = { - "x1":0, - "y1":0, - "x2":0, - "x3":0, - "source": f"{os.path.basename(filename)}", - "image": image_path, - "caption": caption, - "type": "image", - "page_num": pagenum - } + if caption == " ": + continue - image_docs.append(Document(page_content="This is an image with the caption: " + caption, metadata=image_metadata)) + image_metadata = { + "x1": 0, + "y1": 0, + "x2": 0, + "x3": 0, + "source": f"{os.path.basename(filename)}", + "image": image_path, + "caption": caption, + "type": "image", + "page_num": pagenum, + } + + image_docs.append( + Document(page_content="This is an image with the caption: " + caption, metadata=image_metadata) + ) return image_docs + def get_pdf_documents(filepath): all_pdf_documents = [] ongoing_tables = {} @@ -288,16 +327,18 @@ def get_pdf_documents(filepath): footer_threshold = page_height * 0.9 # Crop out page to remove footers and headers - page_crop = page.crop([0,header_threshold,page.width,footer_threshold]) + page_crop = page.crop([0, header_threshold, page.width, footer_threshold]) text_blocks = [obj for obj in page_crop.chars if obj['object_type'] == 'char'] grouped_text_blocks = process_text_blocks(text_blocks) - if len(grouped_text_blocks)==0: + if len(grouped_text_blocks) == 0: # Perform OCR on PDF pages ocr_docs = parse_via_ocr(filepath, page_crop, page_num) - page_docs.extend(ocr_docs) + page_docs.extend(ocr_docs) # Extract tables and their bounding boxes - table_docs, table_bboxes, ongoing_tables = parse_all_tables(filepath, page, page_num, text_blocks, ongoing_tables) + table_docs, table_bboxes, ongoing_tables = parse_all_tables( + filepath, page, page_num, text_blocks, ongoing_tables + ) page_docs.extend(table_docs) # Extract and process images @@ -307,12 +348,20 @@ def get_pdf_documents(filepath): # Process text blocks text_block_ctr = 0 for heading_block, content in grouped_text_blocks: - text_block_ctr +=1 - heading_bbox = (heading_block['x0'],heading_block['y0'],heading_block['x1'],heading_block['y1']) + text_block_ctr += 1 + heading_bbox = (heading_block['x0'], heading_block['y0'], heading_block['x1'], heading_block['y1']) # Check if the heading or its content overlaps with table or image bounding boxes - if not any(is_bbox_overlapping(heading_bbox,table_bbox) for table_bbox in table_bboxes): + if not any(is_bbox_overlapping(heading_bbox, table_bbox) for table_bbox in table_bboxes): bbox = {"x1": heading_bbox[0], "y1": heading_bbox[1], "x2": heading_bbox[2], "x3": heading_bbox[3]} - text_doc = Document(page_content=f"{heading_block['text']}\n{content}", metadata={**bbox, "type": "text", "page_num": page_num, "source": f"{os.path.basename(filepath)}"}) + text_doc = Document( + page_content=f"{heading_block['text']}\n{content}", + metadata={ + **bbox, + "type": "text", + "page_num": page_num, + "source": f"{os.path.basename(filepath)}", + }, + ) page_docs.append(text_doc) all_pdf_documents.append(page_docs) except Exception as e: diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/vectorstore/custom_powerpoint_parser.py b/RAG/examples/advanced_rag/multimodal_rag/vectorstore/custom_powerpoint_parser.py similarity index 82% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/vectorstore/custom_powerpoint_parser.py rename to RAG/examples/advanced_rag/multimodal_rag/vectorstore/custom_powerpoint_parser.py index 791a7d5d..0f26190d 100644 --- a/RetrievalAugmentedGeneration/examples/multimodal_rag/vectorstore/custom_powerpoint_parser.py +++ b/RAG/examples/advanced_rag/multimodal_rag/vectorstore/custom_powerpoint_parser.py @@ -15,10 +15,12 @@ import os import subprocess -from pptx import Presentation -from pdfplumber import open as pdf_open + from langchain.docstore.document import Document -from RetrievalAugmentedGeneration.example.vectorstore.custom_pdf_parser import is_graph, process_graph +from pdfplumber import open as pdf_open +from pptx import Presentation + +from RAG.examples.advanced_rag.multimodal_rag.vectorstore.custom_pdf_parser import is_graph, process_graph def convert_ppt_to_pdf(ppt_path): @@ -80,7 +82,11 @@ def extract_text_and_notes_from_ppt(ppt_path): def process_ppt_file(ppt_path): """Process a PowerPoint file.""" - pdf_path = os.path.join(os.getcwd(), "multimodal/ppt_references", os.path.basename(ppt_path).replace('.pptx', '.pdf').replace('.ppt', '.pdf')) + pdf_path = os.path.join( + os.getcwd(), + "multimodal/ppt_references", + os.path.basename(ppt_path).replace('.pptx', '.pdf').replace('.ppt', '.pdf'), + ) convert_ppt_to_pdf(ppt_path) images_data = convert_pdf_to_images(pdf_path) slide_texts = extract_text_and_notes_from_ppt(ppt_path) @@ -96,16 +102,21 @@ def process_ppt_file(ppt_path): image_description = process_graph(image_path) caption = slide_text + image_description + notes image_metadata = { - "x1":0, - "y1":0, - "x2":0, - "x3":0, - "source": f"{os.path.basename(ppt_path)}", - "image": image_path, - "caption": caption, - "type": "image", - "page_num": page_num + "x1": 0, + "y1": 0, + "x2": 0, + "x3": 0, + "source": f"{os.path.basename(ppt_path)}", + "image": image_path, + "caption": caption, + "type": "image", + "page_num": page_num, } - processed_data.append(Document(page_content = "This is a slide with the text: " + slide_text + image_description, metadata = image_metadata)) - - return processed_data \ No newline at end of file + processed_data.append( + Document( + page_content="This is a slide with the text: " + slide_text + image_description, + metadata=image_metadata, + ) + ) + + return processed_data diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/vectorstore/vectorstore_updater.py b/RAG/examples/advanced_rag/multimodal_rag/vectorstore/vectorstore_updater.py similarity index 84% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/vectorstore/vectorstore_updater.py rename to RAG/examples/advanced_rag/multimodal_rag/vectorstore/vectorstore_updater.py index 9e833db4..6d0181df 100644 --- a/RetrievalAugmentedGeneration/examples/multimodal_rag/vectorstore/vectorstore_updater.py +++ b/RAG/examples/advanced_rag/multimodal_rag/vectorstore/vectorstore_updater.py @@ -15,15 +15,19 @@ import logging import os + from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import UnstructuredFileLoader -from RetrievalAugmentedGeneration.example.vectorstore.custom_powerpoint_parser import process_ppt_file -from RetrievalAugmentedGeneration.example.vectorstore.custom_pdf_parser import get_pdf_documents + +from RAG.examples.advanced_rag.multimodal_rag.vectorstore.custom_img_parser import process_img_file +from RAG.examples.advanced_rag.multimodal_rag.vectorstore.custom_pdf_parser import get_pdf_documents +from RAG.examples.advanced_rag.multimodal_rag.vectorstore.custom_powerpoint_parser import process_ppt_file logger = logging.getLogger(__name__) CUSTOM_PROCESSING = True + def load_documents(file): """Load documents from the specified folder.""" raw_documents = [] @@ -38,6 +42,9 @@ def load_documents(file): elif file.endswith("ppt") or file.endswith("pptx"): pptx_docs = process_ppt_file(file) raw_documents.extend(pptx_docs) + elif file.endswith("png"): + img_docs = process_img_file(file) + raw_documents.extend(img_docs) else: # Load unstructured files and add them individually loader = UnstructuredFileLoader(file) @@ -50,10 +57,10 @@ def split_text(documents): """Split text documents into chunks.""" text_splitter = RecursiveCharacterTextSplitter( # Set a really small chunk size, just to show. - chunk_size = 1000, - chunk_overlap = 100, - length_function = len, - is_separator_regex = False, + chunk_size=1000, + chunk_overlap=100, + length_function=len, + is_separator_regex=False, ) split_docs = text_splitter.split_documents(documents) return split_docs @@ -75,7 +82,7 @@ def update_vectorstore(file_path, vector_client, embedder, config_name): # Adding file name to the metadata for document in documents: document.metadata["filename"] = os.path.basename(file_path) - + logger.info("[Step 3/4] Inserting documents into the vector store...") # Batch insert into Milvus collection vector_client.add_documents(documents) diff --git a/RAG/examples/advanced_rag/query_decomposition_rag/README.md b/RAG/examples/advanced_rag/query_decomposition_rag/README.md new file mode 100644 index 00000000..0a29073a --- /dev/null +++ b/RAG/examples/advanced_rag/query_decomposition_rag/README.md @@ -0,0 +1,87 @@ + + +# Query Decomposition RAG + +## Example Features + +This example deploys a recursive query decomposition example for chat Q&A. + +Query decomposition can perform RAG when the agent needs to access information from several different documents +(also referred to as _chunks_) or to perform some computation on the answers. +This example uses a custom LangChain agent that recursively breaks down the questions into subquestions. +The agent then attempts to answer the subquestions. + +The agent has access to two tools: + +- search: to perform standard RAG on a subquestion. +- math: to pose a math question to the LLM. + +The agent continues to break down the question into subquestions until it has the answers that it needs to form the final answer. + +| Model | Embedding | Framework | Vector Database | File Types | +| ------------------------ | ------------------------ | --------- | --------------- | ------------ | +| meta/llama3-70b-instruct | nvidia/nv-embedqa-e5-v5 | LangChain | Milvus | TXT, PDF, MD | + +![Diagram](../../../../docs/images/query_decomposition_rag_arch.png) + +## Prerequisites + +Complete the [common prerequisites](../../../../docs/common-prerequisites.md). + +## Build and Start the Containers + +1. Export your NVIDIA API key as an environment variable: + + ```text + export NVIDIA_API_KEY="nvapi-<...>" + ``` + +1. Start the containers: + + ```console + cd RAG/examples/advanced_rag/query_decomposition_rag/ + docker compose up -d --build + ``` + + *Example Output* + + ```output + ✔ Network nvidia-rag Created + ✔ Container rag-playground Started + ✔ Container milvus-minio Started + ✔ Container chain-server Started + ✔ Container milvus-etcd Started + ✔ Container milvus-standalone Started + ``` + +1. Confirm the containers are running: + + ```console + docker ps --format "table {{.ID}}\t{{.Names}}\t{{.Status}}" + ``` + + *Example Output* + + ```output + CONTAINER ID NAMES STATUS + 39a8524829da rag-playground Up 2 minutes + bfbd0193dbd2 chain-server Up 2 minutes + ec02ff3cc58b milvus-standalone Up 3 minutes + 6969cf5b4342 milvus-minio Up 3 minutes (healthy) + 57a068d62fbb milvus-etcd Up 3 minutes (healthy) + ``` + +1. Open a web browser and access to use the RAG Playground. + + Refer to [Using the Sample Web Application](../../../../docs/using-sample-web-application.md) + for information about uploading documents and using the web interface. + +## Next Steps + +- [Vector Database Customizations](../../../../docs/vector-database.md) +- Stop the containers by running `docker compose down`. +- Use the [RAG Application: Query Decomposition Agent](https://registry.ngc.nvidia.com/orgs/ohlfw0olaadg/teams/ea-participants/helm-charts/rag-app-query-decomposition-agent) + Helm chart to deploy this example in Kubernetes. diff --git a/RetrievalAugmentedGeneration/examples/query_decomposition_rag/__init__.py b/RAG/examples/advanced_rag/query_decomposition_rag/__init__.py similarity index 100% rename from RetrievalAugmentedGeneration/examples/query_decomposition_rag/__init__.py rename to RAG/examples/advanced_rag/query_decomposition_rag/__init__.py diff --git a/RetrievalAugmentedGeneration/examples/query_decomposition_rag/chains.py b/RAG/examples/advanced_rag/query_decomposition_rag/chains.py similarity index 72% rename from RetrievalAugmentedGeneration/examples/query_decomposition_rag/chains.py rename to RAG/examples/advanced_rag/query_decomposition_rag/chains.py index 1a5b9a08..05e74c65 100644 --- a/RetrievalAugmentedGeneration/examples/query_decomposition_rag/chains.py +++ b/RAG/examples/advanced_rag/query_decomposition_rag/chains.py @@ -20,44 +20,45 @@ Search tool is a RAG pipeline, whereas the math tool uses an LLM call to perform mathematical calculations. """ -from langchain_community.vectorstores.faiss import FAISS -from langchain_community.document_loaders import UnstructuredFileLoader -from langchain.text_splitter import CharacterTextSplitter -from langchain_core.prompts.chat import ChatPromptTemplate -from langchain_core.output_parsers.string import StrOutputParser -from langchain.chains.llm import LLMChain -from langchain_core.prompts.chat import BaseChatPromptTemplate -from langchain_core.messages.human import HumanMessage -from langchain.agents.agent import LLMSingleActionAgent, AgentOutputParser, AgentExecutor -from langchain.tools import Tool -from langchain.schema.agent import AgentFinish, AgentAction -from typing import List, Union, Dict, Any -import json -import jinja2 -import os import base64 +import json import logging -from typing import Generator, List -from RetrievalAugmentedGeneration.common.tracing import langchain_instrumentation_class_wrapper +import os +from typing import Any, Dict, Generator, List, Union -from RetrievalAugmentedGeneration.common.utils import ( +import jinja2 +from langchain.agents.agent import AgentExecutor, AgentOutputParser, LLMSingleActionAgent +from langchain.chains.llm import LLMChain +from langchain.schema.agent import AgentAction, AgentFinish +from langchain.text_splitter import CharacterTextSplitter +from langchain.tools import Tool +from langchain_community.document_loaders import UnstructuredFileLoader +from langchain_community.vectorstores.faiss import FAISS +from langchain_core.messages.human import HumanMessage +from langchain_core.output_parsers.string import StrOutputParser +from langchain_core.prompts.chat import BaseChatPromptTemplate, ChatPromptTemplate + +from RAG.src.chain_server.base import BaseExample +from RAG.src.chain_server.tracing import langchain_instrumentation_class_wrapper +from RAG.src.chain_server.utils import ( + create_vectorstore_langchain, + del_docs_vectorstore_langchain, get_config, - get_llm, - set_service_context, - get_embedding_model, get_doc_retriever, - create_vectorstore_langchain, get_docs_vectorstore_langchain, - del_docs_vectorstore_langchain, - get_vectorstore + get_embedding_model, + get_llm, + get_prompts, + get_vectorstore, + set_service_context, ) -from RetrievalAugmentedGeneration.common.base import BaseExample logger = logging.getLogger(__name__) vector_store_path = "vectorstore.pkl" document_embedder = get_embedding_model() settings = get_config() +prompts = get_prompts() try: vectorstore = create_vectorstore_langchain(document_embedder=document_embedder) @@ -67,6 +68,7 @@ ##### Helper methods and tools ##### + class Ledger: # Stores the state of the recursive decomposition def __init__(self): self.question_trace = [] @@ -87,47 +89,10 @@ def fetch_context(ledger: Ledger) -> str: return context -template = """Your task is to answer questions. If you cannot answer the question, you can request use for a tool and break the question into specific sub questions. Fill with Nil where no action is required. You should only return a JSON containing the tool and the generated sub questions. Consider the contextual information and only ask for information that you do not already have. Do not return any other explanations or text. The output should be a simple JSON structure! You are given two tools: -- Search -- Math -Search tool quickly finds and retrieves relevant answers from a given context, providing accurate and precise information to meet search needs. -Math tool performs essential operations, including multiplication, addition, subtraction, division, and greater than or less than comparisons, providing accurate results with ease. Utilize math tool when asked to find sum, difference of values. -Do not pass sub questions to any tool if they already have an answer in the Contextual Information. -If you have all the information needed to answer the question, mark the Tool_Request as Nil. -Contextual Information: -{{ context }} +template = prompts.get("tool_selector_prompt", "") -Question: -{{ question }} - -{"Tool_Request": "", "Generated Sub Questions": []} -""" - -math_tool_prompt = """Your task is to identify 2 variables and an operation from given questions. If you cannot answer the question, you can simply return "Not Possible". You should only return a JSON containing the `IsPossible`, `variable1`, `variable2`, and `operation`. Do not return any other explanations or text. The output should be a simple JSON structure! - You are given two options for `IsPossible`: -- Possible -- Not Possible - `variable1` and `variable2` should be real floating point numbers. - You are given four options for `operation symbols`: -- '+' (addition) -- '-' (subtraction) -- '*' (multiplication) -- '/' (division) -- '=' (equal to) -- '>' (greater than) -- '<' (less than) -- '>=' (greater than or equal to) -- '<=' (less than or equal to) - Only return the symbols for the specified operations and nothing else. -Contextual Information: -{{ context }} - -Question: -{{ question }} - -{"IsPossible": "", "variable1": [], "variable2": [], "operation": []} -""" +math_tool_prompt = prompts.get("math_tool_prompt", "") class CustomPromptTemplate(BaseChatPromptTemplate): @@ -168,10 +133,7 @@ def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]: or self.ledger.trace > 3 or local_state["Generated Sub Questions"][0] in self.ledger.question_trace ): - return AgentFinish( - return_values={"output": "success"}, - log=llm_output, - ) + return AgentFinish(return_values={"output": "success"}, log=llm_output,) if local_state["Tool_Request"] == "Search": self.ledger.trace += 1 @@ -184,11 +146,12 @@ def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]: ) raise ValueError(f"Invalid Tool name: {local_state['Tool_Request']}") + @langchain_instrumentation_class_wrapper class QueryDecompositionChatbot(BaseExample): def ingest_docs(self, filepath: str, filename: str): """Ingest documents to the VectorDB.""" - if not filename.endswith((".txt",".pdf",".md")): + if not filename.endswith((".txt", ".pdf", ".md")): raise ValueError(f"{filename} is not a valid Text, PDF or Markdown file") try: # Load raw documents from the directory @@ -196,7 +159,9 @@ def ingest_docs(self, filepath: str, filename: str): raw_documents = UnstructuredFileLoader(_path).load() if raw_documents: - text_splitter = CharacterTextSplitter(chunk_size=settings.text_splitter.chunk_size, chunk_overlap=settings.text_splitter.chunk_overlap) + text_splitter = CharacterTextSplitter( + chunk_size=settings.text_splitter.chunk_size, chunk_overlap=settings.text_splitter.chunk_overlap + ) documents = text_splitter.split_documents(raw_documents) vs = get_vectorstore(vectorstore, document_embedder) vs.add_documents(documents) @@ -207,32 +172,27 @@ def ingest_docs(self, filepath: str, filename: str): logger.error(f"Failed to ingest document due to exception {e}") raise ValueError("Failed to upload document. Please upload an unstructured text document.") - - def llm_chain( - self, query: str, chat_history: List["Message"], **kwargs - ) -> Generator[str, None, None]: + def llm_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Generator[str, None, None]: """Execute a simple LLM chain using the components defined above.""" logger.info("Using llm to generate response directly without knowledge base.") # WAR: Disable chat history (UI consistency). chat_history = [] - system_message = [("system", settings.prompts.chat_template)] + system_message = [("system", prompts.get("chat_template", ""))] conversation_history = [(msg.role, msg.content) for msg in chat_history] user_input = [("user", "{input}")] # Checking if conversation_history is not None and not empty - prompt_template = ChatPromptTemplate.from_messages( - system_message + conversation_history + user_input - ) if conversation_history else ChatPromptTemplate.from_messages( - system_message + user_input + prompt_template = ( + ChatPromptTemplate.from_messages(system_message + conversation_history + user_input) + if conversation_history + else ChatPromptTemplate.from_messages(system_message + user_input) ) llm = get_llm(**kwargs) chain = prompt_template | llm | StrOutputParser() - augmented_user_input = ( - "\n\nQuestion: " + query + "\n" - ) + augmented_user_input = "\n\nQuestion: " + query + "\n" logger.info(f"Prompt used for response generation: {prompt_template.format(input=augmented_user_input)}") - return chain.stream({"input": augmented_user_input}, config={"callbacks":[self.cb_handler]}) + return chain.stream({"input": augmented_user_input}, config={"callbacks": [self.cb_handler]}) def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Generator[str, None, None]: """Execute a Retrieval Augmented Generation chain using the components defined above.""" @@ -243,24 +203,21 @@ def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Gene final_context = self.run_agent(query, **kwargs) if not final_context: logger.warning("Retrieval failed to get any relevant context") - return iter(["No response generated from LLM, make sure your query is relavent to the ingested document."]) + return iter( + ["No response generated from LLM, make sure your query is relavent to the ingested document."] + ) logger.info(f"Final Answer from agent: {final_context}") # TODO Add chat_history - final_prompt_template = ChatPromptTemplate.from_messages( - [ - ("human", final_context) - ] - ) + final_prompt_template = ChatPromptTemplate.from_messages([("human", final_context)]) llm = get_llm(**kwargs) chain = final_prompt_template | llm | StrOutputParser() logger.info(f"Prompt used for final response generation: {final_prompt_template}") - return chain.stream({}, config={"callbacks":[self.cb_handler]}) + return chain.stream({}, config={"callbacks": [self.cb_handler]}) except ValueError as e: logger.warning(f"Failed to get response because {e}") return iter(["I can't find an answer for that."]) - def create_agent(self, **kwargs) -> AgentExecutor: """ Creates the tools, chain, output parser and agent used to fetch the full context. @@ -270,8 +227,16 @@ def create_agent(self, **kwargs) -> AgentExecutor: self.kwargs = kwargs tools = [ - Tool(name="Search", func=self.search, description="The Search Tool is a powerful querying system that quickly finds and retrieves relevant answers from a given context, providing accurate and precise information to meet your search needs."), - Tool(name="Math", func=self.math, description="The Math Tool is a versatile calculator that performs essential mathematical operations, including multiplication, addition, subtraction, division, and greater than or less than comparisons, providing accurate results with ease."), + Tool( + name="Search", + func=self.search, + description="The Search Tool is a powerful querying system that quickly finds and retrieves relevant answers from a given context, providing accurate and precise information to meet your search needs.", + ), + Tool( + name="Math", + func=self.math, + description="The Math Tool is a versatile calculator that performs essential mathematical operations, including multiplication, addition, subtraction, division, and greater than or less than comparisons, providing accurate results with ease.", + ), ] tool_names = [tool.name for tool in tools] @@ -284,17 +249,18 @@ def create_agent(self, **kwargs) -> AgentExecutor: llm_chain=llm_chain, output_parser=output_parser, stop=["\n\n"], allowed_tools=tool_names ) - agent_executor = AgentExecutor.from_agent_and_tools(agent=recursive_decomposition_agent, tools=tools, verbose=True, callbacks=[self.cb_handler]) + agent_executor = AgentExecutor.from_agent_and_tools( + agent=recursive_decomposition_agent, tools=tools, verbose=True, callbacks=[self.cb_handler] + ) return agent_executor - def run_agent(self, question: str, **kwargs): """ Run question on the agent """ agent_executor = self.create_agent(**kwargs) - agent_executor.invoke({"question": question}, config={"callbacks":[self.cb_handler]}) + agent_executor.invoke({"question": question}, config={"callbacks": [self.cb_handler]}) ##### LLM call to get final answer ###### @@ -324,7 +290,6 @@ def retriever(self, query: str) -> List[str]: logger.debug(result) return [hit.page_content for hit in result] - def extract_answer(self, chunks: List[str], question: str) -> str: """ Find the answer to the query from the retrieved chunks @@ -339,7 +304,6 @@ def extract_answer(self, chunks: List[str], question: str) -> str: answer = llm([HumanMessage(content=prompt)]) return answer.content - def search(self, sub_questions: List[str]): """ Search for the answer for each subquestion and add them to the ledger. @@ -353,7 +317,6 @@ def search(self, sub_questions: List[str]): self.ledger.question_trace.append(sub_question) self.ledger.answer_trace.append(sub_answer) - def math(self, sub_questions: List[str]): """ Places an LLM call to answer mathematical subquestions which do not require search @@ -365,8 +328,8 @@ def math(self, sub_questions: List[str]): llm = get_llm(**self.kwargs) sub_answer = llm([HumanMessage(content=prompt)]) sub_answer = json.loads(sub_answer.content) - final_sub_answer= str(sub_answer['variable1'])+sub_answer['operation']+str(sub_answer['variable2']) - final_sub_answer=final_sub_answer+'='+str(eval(final_sub_answer)) + final_sub_answer = str(sub_answer['variable1']) + sub_answer['operation'] + str(sub_answer['variable2']) + final_sub_answer = final_sub_answer + '=' + str(eval(final_sub_answer)) except: prompt = "Solve this mathematical question:\nQuestion: " + sub_questions[0] prompt += f"Context:\n{fetch_context(self.ledger)}\n" @@ -377,7 +340,6 @@ def math(self, sub_questions: List[str]): sub_answer = llm([HumanMessage(content=prompt)]) final_sub_answer = sub_answer.content - self.ledger.question_trace.append(sub_questions[0]) self.ledger.answer_trace.append(final_sub_answer) @@ -398,10 +360,7 @@ def document_search(self, content: str, num_docs: int) -> List[Dict[str, Any]]: result = [] for doc in docs: result.append( - { - "source": os.path.basename(doc.metadata.get('source', '')), - "content": doc.page_content - } + {"source": os.path.basename(doc.metadata.get('source', '')), "content": doc.page_content} ) return result return [] @@ -419,7 +378,6 @@ def get_documents(self) -> List[str]: logger.error(f"Vectorstore not initialized. Error details: {e}") return [] - def delete_documents(self, filenames: List[str]): """Delete documents from the vector index.""" try: diff --git a/deploy/compose/rag-app-query-decomposition-agent.yaml b/RAG/examples/advanced_rag/query_decomposition_rag/docker-compose.yaml similarity index 63% rename from deploy/compose/rag-app-query-decomposition-agent.yaml rename to RAG/examples/advanced_rag/query_decomposition_rag/docker-compose.yaml index 9bb8448b..aff87171 100644 --- a/deploy/compose/rag-app-query-decomposition-agent.yaml +++ b/RAG/examples/advanced_rag/query_decomposition_rag/docker-compose.yaml @@ -1,28 +1,34 @@ +include: + - path: + - ../../local_deploy/docker-compose-vectordb.yaml + - ../../local_deploy/docker-compose-nim-ms.yaml + services: chain-server: container_name: chain-server image: chain-server:${TAG:-latest} build: - context: ../../ - dockerfile: ./RetrievalAugmentedGeneration/Dockerfile + context: ../../../../ + dockerfile: RAG/src/chain_server/Dockerfile args: - EXAMPLE_NAME: query_decomposition_rag + EXAMPLE_PATH: 'advanced_rag/query_decomposition_rag' + volumes: + - ./prompt.yaml:/prompt.yaml command: --port 8081 --host 0.0.0.0 environment: + EXAMPLE_PATH: 'advanced_rag/query_decomposition_rag' APP_VECTORSTORE_URL: "http://milvus:19530" APP_VECTORSTORE_NAME: "milvus" APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-70b-instruct"} APP_LLM_MODELENGINE: nvidia-ai-endpoints APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""} - APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-snowflake/arctic-embed-l} + APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/nv-embedqa-e5-v5} APP_EMBEDDINGS_MODELENGINE: ${APP_EMBEDDINGS_MODELENGINE:-nvidia-ai-endpoints} APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-""} APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l APP_TEXTSPLITTER_CHUNKSIZE: 506 APP_TEXTSPLITTER_CHUNKOVERLAP: 200 NVIDIA_API_KEY: ${NVIDIA_API_KEY} - APP_PROMPTS_CHATTEMPLATE: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are positive in nature." - APP_PROMPTS_RAGTEMPLATE: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user." POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-password} POSTGRES_USER: ${POSTGRES_USER:-postgres} POSTGRES_DB: ${POSTGRES_DB:-api} @@ -38,29 +44,27 @@ services: expose: - "8081" shm_size: 5gb - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] + depends_on: + nemollm-embedding: + condition: service_healthy + required: false + nemollm-inference: + condition: service_healthy + required: false rag-playground: container_name: rag-playground image: rag-playground:${TAG:-latest} build: - context: ../.././RetrievalAugmentedGeneration/frontend/ + context: ../../../../RAG/src/rag_playground/ dockerfile: Dockerfile + args: + PLAYGROUND_MODE: ${PLAYGROUND_MODE:-default} command: --port 8090 environment: APP_SERVERURL: http://chain-server APP_SERVERPORT: 8081 APP_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-70b-instruct"} - RIVA_API_URI: ${RIVA_API_URI:-} - RIVA_API_KEY: ${RIVA_API_KEY:-} - RIVA_FUNCTION_ID: ${RIVA_FUNCTION_ID:-} - TTS_SAMPLE_RATE: ${TTS_SAMPLE_RATE:-48000} OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 OTEL_EXPORTER_OTLP_PROTOCOL: grpc ENABLE_TRACING: false diff --git a/RAG/examples/advanced_rag/query_decomposition_rag/prompt.yaml b/RAG/examples/advanced_rag/query_decomposition_rag/prompt.yaml new file mode 100644 index 00000000..cc38c8af --- /dev/null +++ b/RAG/examples/advanced_rag/query_decomposition_rag/prompt.yaml @@ -0,0 +1,45 @@ +chat_template: "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are positive in nature." + +rag_template: "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user." + +tool_selector_prompt: | + Your task is to answer questions. If you cannot answer the question, you can request use for a tool and break the question into specific sub questions. Fill with Nil where no action is required. You should only return a JSON containing the tool and the generated sub questions. Consider the contextual information and only ask for information that you do not already have. Do not return any other explanations or text. The output should be a simple JSON structure! You are given two tools: + - Search + - Math + Search tool quickly finds and retrieves relevant answers from a given context, providing accurate and precise information to meet search needs. + Math tool performs essential operations, including multiplication, addition, subtraction, division, and greater than or less than comparisons, providing accurate results with ease. Utilize math tool when asked to find sum, difference of values. + Do not pass sub questions to any tool if they already have an answer in the Contextual Information. + If you have all the information needed to answer the question, mark the Tool_Request as Nil. + + Contextual Information: + {{ context }} + + Question: + {{ question }} + + {"Tool_Request": "", "Generated Sub Questions": []} + +math_tool_prompt: | + Your task is to identify 2 variables and an operation from given questions. If you cannot answer the question, you can simply return "Not Possible". You should only return a JSON containing the `IsPossible`, `variable1`, `variable2`, and `operation`. Do not return any other explanations or text. The output should be a simple JSON structure! + You are given two options for `IsPossible`: + - Possible + - Not Possible + `variable1` and `variable2` should be real floating point numbers. + You are given four options for `operation symbols`: + - '+' (addition) + - '-' (subtraction) + - '*' (multiplication) + - '/' (division) + - '=' (equal to) + - '>' (greater than) + - '<' (less than) + - '>=' (greater than or equal to) + - '<=' (less than or equal to) + Only return the symbols for the specified operations and nothing else. + Contextual Information: + {{ context }} + + Question: + {{ question }} + + {"IsPossible": "", "variable1": [], "variable2": [], "operation": []} \ No newline at end of file diff --git a/RAG/examples/advanced_rag/structured_data_rag/README.md b/RAG/examples/advanced_rag/structured_data_rag/README.md new file mode 100644 index 00000000..dbbfba70 --- /dev/null +++ b/RAG/examples/advanced_rag/structured_data_rag/README.md @@ -0,0 +1,94 @@ + + +# Structured Data RAG + +## Example Features + +This example demonstrates how to use RAG with structured CSV data. + +This example uses models from the NVIDIA API Catalog. +This approach does not require embedding models or vector database solutions. +Instead, the example uses [PandasAI](https://docs.pandas-ai.com/en/latest/) to manage the workflow. + +For ingestion, the query server loads the structured data from a CSV file into a Pandas dataframe. +The query server can ingest multiple CSV files, provided the files have identical columns. +Ingestion of CSV files with differing columns is not supported and results in an exception. + +The core functionality uses a PandasAI agent to extract information from the dataframe. +This agent combines the query with the structure of the dataframe into an LLM prompt. +The LLM then generates Python code to extract the required information from the dataframe. +Subsequently, this generated code is executed on the dataframe and yields an output dataframe. + +To demonstrate the example, sample CSV files are available. +These are part of the structured data example Helm chart and represent a subset of the [Microsoft Azure Predictive Maintenance](https://www.kaggle.com/datasets/arnabbiswas1/microsoft-azure-predictive-maintenance) from Kaggle. +The CSV data retrieval prompt is specifically tuned for three CSV files from this dataset: `PdM_machines.csv`, `PdM_errors.csv`, and `PdM_failures.csv`. +The CSV files to use are specified in the `docker-compose.yaml` file by updating the environment variable `CSV_NAME`. +The default value is `PdM_machines`, but can be changed to `PdM_errors` or `PdM_failures`. + +| Model | Embedding | Framework | Vector Database | File Types | +| ------------------------ | ------------------------ | --------- | --------------- | ---------- | +| meta/llama3-70b-instruct | None | Custom | None | CSV | + +![Diagram](../../../../docs/images/structured_data_rag_arch.png) + +## Prerequisites + +Complete the [common prerequisites](../../../../docs/common-prerequisites.md). + +## Build and Start the Containers + +1. Export your NVIDIA API key as an environment variable: + + ```text + export NVIDIA_API_KEY="nvapi-<...>" + ``` + +1. Start the containers: + + ```console + cd RAG/examples/advanced_rag/structured_data_rag/ + docker compose up -d --build + ``` + + *Example Output* + + ```output + ✔ Network nvidia-rag Created + ✔ Container rag-playground Started + ✔ Container milvus-minio Started + ✔ Container chain-server Started + ✔ Container milvus-etcd Started + ✔ Container milvus-standalone Started + ``` + +1. Confirm the containers are running: + + ```console + docker ps --format "table {{.ID}}\t{{.Names}}\t{{.Status}}" + ``` + + *Example Output* + + ```output + CONTAINER ID NAMES STATUS + 39a8524829da rag-playground Up 2 minutes + bfbd0193dbd2 chain-server Up 2 minutes + ec02ff3cc58b milvus-standalone Up 3 minutes + 6969cf5b4342 milvus-minio Up 3 minutes (healthy) + 57a068d62fbb milvus-etcd Up 3 minutes (healthy) + ``` + +1. Open a web browser and access to use the RAG Playground. + + Refer to [Using the Sample Web Application](../../../../docs/using-sample-web-application.md) + for information about uploading documents and using the web interface. + +## Next Steps + +- [Vector Database Customizations](../../../../docs/vector-database.md) +- Stop the containers by running `docker compose down`. +- Use the [RAG Application: Structured Data Agent](https://registry.ngc.nvidia.com/orgs/ohlfw0olaadg/teams/ea-participants/helm-charts/rag-app-structured-data-chatbot) + Helm chart to deploy this example in Kubernetes. diff --git a/RetrievalAugmentedGeneration/examples/structured_data_rag/__init__.py b/RAG/examples/advanced_rag/structured_data_rag/__init__.py similarity index 100% rename from RetrievalAugmentedGeneration/examples/structured_data_rag/__init__.py rename to RAG/examples/advanced_rag/structured_data_rag/__init__.py diff --git a/RetrievalAugmentedGeneration/examples/structured_data_rag/chains.py b/RAG/examples/advanced_rag/structured_data_rag/chains.py similarity index 82% rename from RetrievalAugmentedGeneration/examples/structured_data_rag/chains.py rename to RAG/examples/advanced_rag/structured_data_rag/chains.py index d322bbcc..93004b40 100644 --- a/RetrievalAugmentedGeneration/examples/structured_data_rag/chains.py +++ b/RAG/examples/advanced_rag/structured_data_rag/chains.py @@ -16,6 +16,7 @@ """LLM Chains for executing Retrival Augmented Generation.""" import logging import os +import pathlib from typing import Generator, List import pandas as pd @@ -29,23 +30,23 @@ from pandasai import Agent as PandasAI_Agent from pandasai.responses.response_parser import ResponseParser -from integrations.pandasai.llms.nv_aiplay import NVIDIA as PandasAI_NVIDIA -from RetrievalAugmentedGeneration.common.base import BaseExample -from RetrievalAugmentedGeneration.common.utils import get_config, get_llm - # pylint: disable=no-name-in-module, disable=import-error -from RetrievalAugmentedGeneration.example.csv_utils import ( +from RAG.examples.advanced_rag.structured_data_rag.csv_utils import ( extract_df_desc, get_prompt_params, + is_result_valid, parse_prompt_config, - is_result_valid ) +from RAG.src.chain_server.base import BaseExample +from RAG.src.chain_server.utils import get_config, get_llm, get_prompts +from RAG.src.pandasai.llms.nv_aiplay import NVIDIA as PandasAI_NVIDIA logger = logging.getLogger(__name__) settings = get_config() INGESTED_CSV_FILES_LIST = "ingested_csv_files.txt" + class PandasDataFrame(ResponseParser): """Returns Pandas Dataframe instead of SmartDataFrame""" @@ -59,7 +60,6 @@ def format_dataframe(self, result): class CSVChatbot(BaseExample): """RAG example showcasing CSV parsing using Pandas AI Agent""" - def compare_csv_columns(self, ref_csv_file, current_csv_file): """Compares columns of two CSV files""" @@ -127,29 +127,27 @@ def ingest_docs(self, filepath: str, filename: str): f.write(filepath + "\n") else: raise ValueError( - f"Columns of the file {filepath} do not match the reference columns of {ref_csv_path} file." - ) + f"Columns of the file {filepath} do not match the reference columns of {ref_csv_path} file." + ) logger.info("Document %s ingested successfully", filename) - def llm_chain( - self, query: str, chat_history: List["Message"], **kwargs - ) -> Generator[str, None, None]: + def llm_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Generator[str, None, None]: """Execute a simple LLM chain using the components defined above.""" logger.info("Using llm to generate response directly without knowledge base.") # WAR: Disable chat history (UI consistency). chat_history = [] - system_message = [("system", get_config().prompts.chat_template)] + system_message = [("system", get_prompts().get("prompts").get("chat_template"))] conversation_history = [(msg.role, msg.content) for msg in chat_history] user_input = [("user", "{input}")] # Checking if conversation_history is not None and not empty - prompt = ChatPromptTemplate.from_messages( - system_message + conversation_history + user_input - ) if conversation_history else ChatPromptTemplate.from_messages( - system_message + user_input + prompt = ( + ChatPromptTemplate.from_messages(system_message + conversation_history + user_input) + if conversation_history + else ChatPromptTemplate.from_messages(system_message + user_input) ) logger.info(f"Using prompt for response generation: {prompt.format(input=query)}") @@ -171,35 +169,23 @@ def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Gene df = df.fillna(0) df_desc = extract_df_desc(df) - prompt_config = parse_prompt_config( - "RetrievalAugmentedGeneration/example/csv_prompt_config.yaml" - ) + prompt_config = get_prompts().get("prompts") logger.info(prompt_config.get("csv_prompts", [])) - data_retrieval_prompt_params = get_prompt_params( - prompt_config.get("csv_prompts", []) - ) + data_retrieval_prompt_params = get_prompt_params(prompt_config.get("csv_prompts", [])) llm_data_retrieval = PandasAI_NVIDIA(temperature=0.2, model=settings.llm.model_name_pandas_ai) - config_data_retrieval = { - "llm": llm_data_retrieval, - "response_parser": PandasDataFrame, - "max_retries": 6 - } - agent_data_retrieval = PandasAI_Agent( - [df], config=config_data_retrieval, memory_size=20 - ) + config_data_retrieval = {"llm": llm_data_retrieval, "response_parser": PandasDataFrame, "max_retries": 6} + agent_data_retrieval = PandasAI_Agent([df], config=config_data_retrieval, memory_size=20) data_retrieval_prompt = ChatPromptTemplate( messages=[ - SystemMessagePromptTemplate.from_template( - prompt_config.get("csv_data_retrieval_template", []) - ), + SystemMessagePromptTemplate.from_template(prompt_config.get("csv_data_retrieval_template", [])), HumanMessagePromptTemplate.from_template("{query}"), ], input_variables=["description", "instructions", "data_frame", "query"], ) conversation_history = [(msg.role, msg.content) for msg in chat_history] - conversation_history_messages = ChatPromptTemplate.from_messages(conversation_history).messages + conversation_history_messages = ChatPromptTemplate.from_messages(conversation_history).messages # Insert conversation_history between data_retrieval_prompt's SystemMessage & HumanMessage (query) if conversation_history_messages: data_retrieval_prompt.messages[1:1] = conversation_history_messages @@ -219,8 +205,7 @@ def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Gene result_df = str(result_df) response_prompt_template = PromptTemplate( - template=prompt_config.get("csv_response_template", []), - input_variables=["query", "data"], + template=prompt_config.get("csv_response_template", []), input_variables=["query", "data"], ) response_prompt = response_prompt_template.format(query=query, data=result_df) @@ -241,4 +226,4 @@ def get_documents(self) -> List[str]: def delete_documents(self, filenames: List[str]): """Delete documents from the vector index.""" logger.error("delete_documents not implemented") - return True \ No newline at end of file + return True diff --git a/RetrievalAugmentedGeneration/examples/structured_data_rag/csv_utils.py b/RAG/examples/advanced_rag/structured_data_rag/csv_utils.py similarity index 94% rename from RetrievalAugmentedGeneration/examples/structured_data_rag/csv_utils.py rename to RAG/examples/advanced_rag/structured_data_rag/csv_utils.py index 3d3590f7..50229e4b 100644 --- a/RetrievalAugmentedGeneration/examples/structured_data_rag/csv_utils.py +++ b/RAG/examples/advanced_rag/structured_data_rag/csv_utils.py @@ -15,11 +15,11 @@ """ Module to provide utility functions for CSV RAG example""" -import os import json -import pandas as pd +import os from typing import Dict, List +import pandas as pd import yaml @@ -52,10 +52,8 @@ def parse_prompt_config(config_path: str) -> Dict: # Check if the expected key 'prompts' is in the data if "prompts" not in data or not isinstance(data["prompts"], dict): - raise ValueError( - "Invalid YAML structure. Expected a 'prompts' key with a list of dictionaries." - ) - + raise ValueError("Invalid YAML structure. Expected a 'prompts' key with a list of dictionaries.") + env_prompts = None if "CSV_PROMPTS" in os.environ: try: @@ -99,8 +97,9 @@ def get_prompt_params(prompt_list: List) -> Dict[str, str]: return {} + def is_result_valid(result): """ Check for validity of resultant data frame""" if isinstance(result, pd.DataFrame): return not result.empty - return bool(result) \ No newline at end of file + return bool(result) diff --git a/RetrievalAugmentedGeneration/examples/structured_data_rag/PdM_errors.csv b/RAG/examples/advanced_rag/structured_data_rag/data/PdM_errors.csv similarity index 100% rename from RetrievalAugmentedGeneration/examples/structured_data_rag/PdM_errors.csv rename to RAG/examples/advanced_rag/structured_data_rag/data/PdM_errors.csv diff --git a/RetrievalAugmentedGeneration/examples/structured_data_rag/PdM_failures.csv b/RAG/examples/advanced_rag/structured_data_rag/data/PdM_failures.csv similarity index 100% rename from RetrievalAugmentedGeneration/examples/structured_data_rag/PdM_failures.csv rename to RAG/examples/advanced_rag/structured_data_rag/data/PdM_failures.csv diff --git a/RetrievalAugmentedGeneration/examples/structured_data_rag/PdM_machines.csv b/RAG/examples/advanced_rag/structured_data_rag/data/PdM_machines.csv similarity index 100% rename from RetrievalAugmentedGeneration/examples/structured_data_rag/PdM_machines.csv rename to RAG/examples/advanced_rag/structured_data_rag/data/PdM_machines.csv diff --git a/deploy/compose/rag-app-structured-data-chatbot.yaml b/RAG/examples/advanced_rag/structured_data_rag/docker-compose.yaml similarity index 74% rename from deploy/compose/rag-app-structured-data-chatbot.yaml rename to RAG/examples/advanced_rag/structured_data_rag/docker-compose.yaml index c8723d89..e8ae62cb 100644 --- a/deploy/compose/rag-app-structured-data-chatbot.yaml +++ b/RAG/examples/advanced_rag/structured_data_rag/docker-compose.yaml @@ -1,14 +1,21 @@ +include: + - path: + - ../../local_deploy/docker-compose-nim-ms.yaml + services: chain-server: container_name: chain-server image: chain-server:${TAG:-latest} build: - context: ../../ - dockerfile: ./RetrievalAugmentedGeneration/Dockerfile + context: ../../../../ + dockerfile: RAG/src/chain_server/Dockerfile args: - EXAMPLE_NAME: structured_data_rag + EXAMPLE_PATH: 'advanced_rag/structured_data_rag' + volumes: + - ./prompt.yaml:/prompt.yaml command: --port 8081 --host 0.0.0.0 environment: + EXAMPLE_PATH: 'advanced_rag/structured_data_rag' APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-meta/llama3-70b-instruct} APP_LLM_MODELENGINE: nvidia-ai-endpoints APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""} @@ -24,29 +31,24 @@ services: expose: - "8081" shm_size: 5gb - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] + depends_on: + nemollm-inference: + condition: service_healthy + required: false rag-playground: container_name: rag-playground image: rag-playground:${TAG:-latest} build: - context: ../.././RetrievalAugmentedGeneration/frontend/ + context: ../../../../RAG/src/rag_playground/ dockerfile: Dockerfile + args: + PLAYGROUND_MODE: ${PLAYGROUND_MODE:-default} command: --port 8090 environment: APP_SERVERURL: http://chain-server APP_SERVERPORT: 8081 APP_MODELNAME: ${APP_LLM_MODELNAME:-meta/llama3-70b-instruct} - RIVA_API_URI: ${RIVA_API_URI:-} - RIVA_API_KEY: ${RIVA_API_KEY:-} - RIVA_FUNCTION_ID: ${RIVA_FUNCTION_ID:-} - TTS_SAMPLE_RATE: ${TTS_SAMPLE_RATE:-48000} ports: - "8090:8090" expose: diff --git a/RetrievalAugmentedGeneration/examples/structured_data_rag/csv_prompt_config.yaml b/RAG/examples/advanced_rag/structured_data_rag/prompt.yaml similarity index 88% rename from RetrievalAugmentedGeneration/examples/structured_data_rag/csv_prompt_config.yaml rename to RAG/examples/advanced_rag/structured_data_rag/prompt.yaml index 8178a9e2..a01430d5 100644 --- a/RetrievalAugmentedGeneration/examples/structured_data_rag/csv_prompt_config.yaml +++ b/RAG/examples/advanced_rag/structured_data_rag/prompt.yaml @@ -1,5 +1,9 @@ prompts: + chat_template: You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Please ensure that your responses are positive in nature. + + rag_template: You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user. + csv_data_retrieval_template: | You are an expert data retrieval agent who writes functional python code and utilzes Pandas library in python for data retrieval. diff --git a/RetrievalAugmentedGeneration/examples/structured_data_rag/requirements.txt b/RAG/examples/advanced_rag/structured_data_rag/requirements.txt similarity index 100% rename from RetrievalAugmentedGeneration/examples/structured_data_rag/requirements.txt rename to RAG/examples/advanced_rag/structured_data_rag/requirements.txt diff --git a/RAG/examples/basic_rag/langchain/README.md b/RAG/examples/basic_rag/langchain/README.md new file mode 100644 index 00000000..d4e9dd2a --- /dev/null +++ b/RAG/examples/basic_rag/langchain/README.md @@ -0,0 +1,74 @@ + + +# Basic RAG Using LangChain + +## Example Features + +This example deploys a basic RAG pipeline for chat Q&A and serves inferencing from an NVIDIA API Catalog endpoint. +You do not need a GPU on your machine to run this example. + +| Model | Embedding | Framework | Vector Database | File Types | +| ------------------------ | ------------------------ | --------- | --------------- | ------------ | +| meta/llama3-70b-instruct | nvidia/nv-embedqa-e5-v5 | LangChain | Milvus | TXT, PDF, MD | + +![Diagram](../../../../docs/images/basic_rag_langchain_arch.png) + +## Prerequisites + +Complete the [common prerequisites](../../../../docs/common-prerequisites.md). + +## Build and Start the Containers + +1. Export your NVIDIA API key as an environment variable: + + ```text + export NVIDIA_API_KEY="nvapi-<...>" + ``` + +1. Start the containers: + + ```console + cd RAG/examples/basic_rag/langchain/ + docker compose up -d --build + ``` + + *Example Output* + + ```output + ✔ Network nvidia-rag Created + ✔ Container rag-playground Started + ✔ Container milvus-minio Started + ✔ Container chain-server Started + ✔ Container milvus-etcd Started + ✔ Container milvus-standalone Started + ``` + +1. Confirm the containers are running: + + ```console + docker ps --format "table {{.ID}}\t{{.Names}}\t{{.Status}}" + ``` + + *Example Output* + + ```output + CONTAINER ID NAMES STATUS + 39a8524829da rag-playground Up 2 minutes + bfbd0193dbd2 chain-server Up 2 minutes + ec02ff3cc58b milvus-standalone Up 3 minutes + 6969cf5b4342 milvus-minio Up 3 minutes (healthy) + 57a068d62fbb milvus-etcd Up 3 minutes (healthy) + ``` + +1. Open a web browser and access to use the RAG Playground. + + Refer to [Using the Sample Web Application](../../../../docs/using-sample-web-application.md) + for information about uploading documents and using the web interface. + +## Next Steps + +- [Vector Database Customizations](../../../../docs/vector-database.md) +- Stop the containers by running `docker compose down`. diff --git a/RetrievalAugmentedGeneration/examples/nvidia_api_catalog/chains.py b/RAG/examples/basic_rag/langchain/chains.py similarity index 53% rename from RetrievalAugmentedGeneration/examples/nvidia_api_catalog/chains.py rename to RAG/examples/basic_rag/langchain/chains.py index 82f33017..b144d445 100644 --- a/RetrievalAugmentedGeneration/examples/nvidia_api_catalog/chains.py +++ b/RAG/examples/basic_rag/langchain/chains.py @@ -15,24 +15,32 @@ import logging import os -from functools import lru_cache -from typing import Generator, List, Dict, Any +from typing import Any, Dict, Generator, List from langchain_community.document_loaders import UnstructuredFileLoader -from langchain.text_splitter import CharacterTextSplitter -from langchain_community.vectorstores.faiss import FAISS from langchain_core.output_parsers.string import StrOutputParser from langchain_core.prompts.chat import ChatPromptTemplate -from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings -from RetrievalAugmentedGeneration.common.base import BaseExample -from RetrievalAugmentedGeneration.common.utils import get_config, get_llm, get_embedding_model, create_vectorstore_langchain, get_docs_vectorstore_langchain, del_docs_vectorstore_langchain, get_text_splitter, get_vectorstore -from RetrievalAugmentedGeneration.common.tracing import langchain_instrumentation_class_wrapper + +from RAG.src.chain_server.base import BaseExample +from RAG.src.chain_server.tracing import langchain_instrumentation_class_wrapper +from RAG.src.chain_server.utils import ( + create_vectorstore_langchain, + del_docs_vectorstore_langchain, + get_config, + get_docs_vectorstore_langchain, + get_embedding_model, + get_llm, + get_prompts, + get_text_splitter, + get_vectorstore, +) logger = logging.getLogger(__name__) vector_store_path = "vectorstore.pkl" document_embedder = get_embedding_model() text_splitter = None settings = get_config() +prompts = get_prompts() try: vectorstore = create_vectorstore_langchain(document_embedder=document_embedder) @@ -40,11 +48,21 @@ vectorstore = None logger.info(f"Unable to connect to vector store during initialization: {e}") + @langchain_instrumentation_class_wrapper class NvidiaAPICatalog(BaseExample): - def ingest_docs(self, filepath: str, filename: str): - """Ingest documents to the VectorDB.""" - if not filename.endswith((".txt",".pdf",".md")): + def ingest_docs(self, filepath: str, filename: str) -> None: + """Ingests documents to the VectorDB. + It's called when the POST endpoint of `/documents` API is invoked. + + Args: + filepath (str): The path to the document file. + filename (str): The name of the document file. + + Raises: + ValueError: If there's an error during document ingestion or the file format is not supported. + """ + if not filename.endswith((".txt", ".pdf", ".md")): raise ValueError(f"{filename} is not a valid Text, PDF or Markdown file") try: # Load raw documents from the directory @@ -53,11 +71,15 @@ def ingest_docs(self, filepath: str, filename: str): if raw_documents: global text_splitter + # Get text splitter instance, it is selected based on environment variable APP_TEXTSPLITTER_MODELNAME + # tokenizer dimension of text splitter should be same as embedding model if not text_splitter: text_splitter = get_text_splitter() + # split documents based on configuration provided documents = text_splitter.split_documents(raw_documents) vs = get_vectorstore(vectorstore, document_embedder) + # ingest documents into vectorstore vs.add_documents(documents) else: logger.warning("No documents available to process!") @@ -65,61 +87,79 @@ def ingest_docs(self, filepath: str, filename: str): logger.error(f"Failed to ingest document due to exception {e}") raise ValueError("Failed to upload document. Please upload an unstructured text document.") - def llm_chain( - self, query: str, chat_history: List["Message"], **kwargs - ) -> Generator[str, None, None]: - """Execute a simple LLM chain using the components defined above.""" + def llm_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Generator[str, None, None]: + """Execute a simple LLM chain using the components defined above. + It's called when the `/generate` API is invoked with `use_knowledge_base` set to `False`. + + Args: + query (str): Query to be answered by llm. + chat_history (List[Message]): Conversation history between user and chain. + """ logger.info("Using llm to generate response directly without knowledge base.") # WAR: Disable chat history (UI consistency). chat_history = [] - system_message = [("system", settings.prompts.chat_template)] + system_message = [("system", prompts.get("chat_template", ""))] conversation_history = [(msg.role, msg.content) for msg in chat_history] user_input = [("user", "{input}")] # Checking if conversation_history is not None and not empty - prompt_template = ChatPromptTemplate.from_messages( - system_message + conversation_history + user_input - ) if conversation_history else ChatPromptTemplate.from_messages( - system_message + user_input + prompt_template = ( + ChatPromptTemplate.from_messages(system_message + conversation_history + user_input) + if conversation_history + else ChatPromptTemplate.from_messages(system_message + user_input) ) llm = get_llm(**kwargs) + # Simple langchain chain to generate response based on user's query chain = prompt_template | llm | StrOutputParser() - augmented_user_input = ( - "\n\nQuestion: " + query + "\n" - ) + augmented_user_input = "\n\nQuestion: " + query + "\n" logger.info(f"Prompt used for response generation: {prompt_template.format(input=augmented_user_input)}") - return chain.stream({"input": augmented_user_input}, config={"callbacks":[self.cb_handler]}) + return chain.stream({"input": augmented_user_input}, config={"callbacks": [self.cb_handler]}) def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Generator[str, None, None]: - """Execute a Retrieval Augmented Generation chain using the components defined above.""" + """Execute a Retrieval Augmented Generation chain using the components defined above. + It's called when the `/generate` API is invoked with `use_knowledge_base` set to `True`. + + Args: + query (str): Query to be answered by llm. + chat_history (List[Message]): Conversation history between user and chain. + """ logger.info("Using rag to generate response from document") # WAR: Disable chat history (UI consistency). chat_history = [] - system_message = [("system", settings.prompts.rag_template)] + system_message = [("system", prompts.get("rag_template", ""))] conversation_history = [(msg.role, msg.content) for msg in chat_history] user_input = [("user", "{input}")] # Checking if conversation_history is not None and not empty - prompt_template = ChatPromptTemplate.from_messages( - system_message + conversation_history + user_input - ) if conversation_history else ChatPromptTemplate.from_messages( - system_message + user_input + prompt_template = ( + ChatPromptTemplate.from_messages(system_message + conversation_history + user_input) + if conversation_history + else ChatPromptTemplate.from_messages(system_message + user_input) ) llm = get_llm(**kwargs) + # Create a simple chain with conversation history and context chain = prompt_template | llm | StrOutputParser() try: vs = get_vectorstore(vectorstore, document_embedder) if vs != None: try: - logger.info(f"Getting retrieved top k values: {settings.retriever.top_k} with confidence threshold: {settings.retriever.score_threshold}") - retriever = vs.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": settings.retriever.score_threshold, "k": settings.retriever.top_k}) + logger.info( + f"Getting retrieved top k values: {settings.retriever.top_k} with confidence threshold: {settings.retriever.score_threshold}" + ) + retriever = vs.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={ + "score_threshold": settings.retriever.score_threshold, + "k": settings.retriever.top_k, + }, + ) docs = retriever.get_relevant_documents(query, callbacks=[self.cb_handler]) except NotImplementedError: # Some retriever like milvus don't have similarity score threshold implemented @@ -129,37 +169,45 @@ def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Gene logger.debug(f"Retrieved documents are: {docs}") if not docs: logger.warning("Retrieval failed to get any relevant context") - return iter(["No response generated from LLM, make sure your query is relavent to the ingested document."]) + return iter( + ["No response generated from LLM, make sure your query is relavent to the ingested document."] + ) context = "" for doc in docs: context += doc.page_content + "\n\n" - augmented_user_input = ( - "Context: " + context + "\n\nQuestion: " + query + "\n" - ) + # Create input with context and user query to be ingested in prompt to retrieve contextal response from llm + augmented_user_input = "Context: " + context + "\n\nQuestion: " + query + "\n" - logger.info(f"Prompt used for response generation: {prompt_template.format(input=augmented_user_input)}") - return chain.stream({"input": augmented_user_input}, config={"callbacks":[self.cb_handler]}) + logger.info( + f"Prompt used for response generation: {prompt_template.format(input=augmented_user_input)}" + ) + return chain.stream({"input": augmented_user_input}, config={"callbacks": [self.cb_handler]}) except Exception as e: logger.warning(f"Failed to generate response due to exception {e}") - logger.warning( - "No response generated from LLM, make sure you've ingested document." - ) + logger.warning("No response generated from LLM, make sure you've ingested document.") return iter( - [ - "No response generated from LLM, make sure you have ingested document from the Knowledge Base Tab." - ] + ["No response generated from LLM, make sure you have ingested document from the Knowledge Base Tab."] ) def document_search(self, content: str, num_docs: int) -> List[Dict[str, Any]]: - """Search for the most relevant documents for the given search parameters.""" + """Search for the most relevant documents for the given search parameters. + It's called when the `/search` API is invoked. + + Args: + content (str): Query to be searched from vectorstore. + num_docs (int): Number of similar docs to be retrieved from vectorstore. + """ try: vs = get_vectorstore(vectorstore, document_embedder) if vs != None: try: - retriever = vs.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": settings.retriever.score_threshold, "k": settings.retriever.top_k}) + retriever = vs.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={"score_threshold": settings.retriever.score_threshold, "k": num_docs}, + ) docs = retriever.get_relevant_documents(content, callbacks=[self.cb_handler]) except NotImplementedError: # Some retriever like milvus don't have similarity score threshold implemented @@ -169,10 +217,7 @@ def document_search(self, content: str, num_docs: int) -> List[Dict[str, Any]]: result = [] for doc in docs: result.append( - { - "source": os.path.basename(doc.metadata.get('source', '')), - "content": doc.page_content - } + {"source": os.path.basename(doc.metadata.get('source', '')), "content": doc.page_content} ) return result return [] @@ -180,7 +225,12 @@ def document_search(self, content: str, num_docs: int) -> List[Dict[str, Any]]: logger.error(f"Error from POST /search endpoint. Error details: {e}") def get_documents(self) -> List[str]: - """Retrieves filenames stored in the vector store.""" + """Retrieves filenames stored in the vector store. + It's called when the GET endpoint of `/documents` API is invoked. + + Returns: + List[str]: List of filenames ingested in vectorstore. + """ try: vs = get_vectorstore(vectorstore, document_embedder) if vs: @@ -189,12 +239,18 @@ def get_documents(self) -> List[str]: logger.error(f"Vectorstore not initialized. Error details: {e}") return [] + def delete_documents(self, filenames: List[str]) -> bool: + """Delete documents from the vector index. + It's called when the DELETE endpoint of `/documents` API is invoked. - def delete_documents(self, filenames: List[str]): - """Delete documents from the vector index.""" + Args: + filenames (List[str]): List of filenames to be deleted from vectorstore. + """ try: + # Get vectorstore instance vs = get_vectorstore(vectorstore, document_embedder) if vs: return del_docs_vectorstore_langchain(vs, filenames) except Exception as e: - logger.error(f"Vectorstore not initialized. Error details: {e}") \ No newline at end of file + logger.error(f"Vectorstore not initialized. Error details: {e}") + return False diff --git a/RAG/examples/basic_rag/langchain/docker-compose.yaml b/RAG/examples/basic_rag/langchain/docker-compose.yaml new file mode 100644 index 00000000..33483c56 --- /dev/null +++ b/RAG/examples/basic_rag/langchain/docker-compose.yaml @@ -0,0 +1,97 @@ +include: + - path: + - ../../local_deploy/docker-compose-vectordb.yaml + - ../../local_deploy/docker-compose-nim-ms.yaml + +services: + chain-server: + container_name: chain-server + image: chain-server:${TAG:-latest} + build: + # Set context to repo's root directory + context: ../../../../ + dockerfile: RAG/src/chain_server/Dockerfile + args: + # Build args, used to copy relevant directory inside the container relative to GenerativeAIExamples/RAG/examples + EXAMPLE_PATH: 'basic_rag/langchain' + volumes: + - ./prompt.yaml:/prompt.yaml + # start the server on port 8081 + command: --port 8081 --host 0.0.0.0 + environment: + # Path to example directory relative to GenerativeAIExamples/RAG/examples + EXAMPLE_PATH: 'basic_rag/langchain' + # URL on which vectorstore is hosted + APP_VECTORSTORE_URL: "http://milvus:19530" + # Type of vectordb used to store embedding supported type milvus, pgvector + APP_VECTORSTORE_NAME: "milvus" + # url on which llm model is hosted. If "", Nvidia hosted API is used + APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-70b-instruct"} + # embedding model engine used for inference, supported type nvidia-ai-endpoints, huggingface + APP_LLM_MODELENGINE: nvidia-ai-endpoints + APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""} + APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/nv-embedqa-e5-v5} + # embedding model engine used for inference, supported type nvidia-ai-endpoints + APP_EMBEDDINGS_MODELENGINE: ${APP_EMBEDDINGS_MODELENGINE:-nvidia-ai-endpoints} + # url on which embedding model is hosted. If "", Nvidia hosted API is used + APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-""} + # text splitter model name, it's fetched from huggingface + APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l + APP_TEXTSPLITTER_CHUNKSIZE: 506 + APP_TEXTSPLITTER_CHUNKOVERLAP: 200 + NVIDIA_API_KEY: ${NVIDIA_API_KEY} + # vectorstore collection name to store embeddings + COLLECTION_NAME: ${COLLECTION_NAME:-nvidia_api_catalog} + APP_RETRIEVER_TOPK: 4 + APP_RETRIEVER_SCORETHRESHOLD: 0.25 + # observability server url + OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 + OTEL_EXPORTER_OTLP_PROTOCOL: grpc + # enable observability in chain server + ENABLE_TRACING: false + # Log level for server, supported level NOTSET, DEBUG, INFO, WARN, ERROR, CRITICAL + LOGLEVEL: ${LOGLEVEL:-INFO} + ports: + - "8081:8081" + expose: + - "8081" + shm_size: 5gb + depends_on: + nemollm-embedding: + condition: service_healthy + required: false + nemollm-inference: + condition: service_healthy + required: false + + rag-playground: + container_name: rag-playground + image: rag-playground:${TAG:-latest} + build: + # Set context to repo's root directory + context: ../../../../RAG/src/rag_playground/ + dockerfile: Dockerfile + args: + # select UI type, supported model default, speech + PLAYGROUND_MODE: ${PLAYGROUND_MODE:-default} + command: --port 8090 + environment: + # URL or chain server container + APP_SERVERURL: http://chain-server + APP_SERVERPORT: 8081 + # model name displayed on UI + APP_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-70b-instruct"} + OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 + OTEL_EXPORTER_OTLP_PROTOCOL: grpc + # enable observability in rag playground + ENABLE_TRACING: false + ports: + - "8090:8090" + expose: + - "8090" + depends_on: + - chain-server + +networks: + default: + name: nvidia-rag diff --git a/RAG/examples/basic_rag/langchain/prompt.yaml b/RAG/examples/basic_rag/langchain/prompt.yaml new file mode 100644 index 00000000..ebbaa8d9 --- /dev/null +++ b/RAG/examples/basic_rag/langchain/prompt.yaml @@ -0,0 +1,9 @@ +chat_template: | + You are a helpful, respectful and honest assistant. + Always answer as helpfully as possible, while being safe. + Please ensure that your responses are positive in nature. + +rag_template: | + You are a helpful AI assistant named Envie. + You will reply to questions only based on the context that you are provided. + If something is out of context, you will refrain from replying and politely decline to respond to the user. \ No newline at end of file diff --git a/RAG/examples/basic_rag/llamaindex/README.md b/RAG/examples/basic_rag/llamaindex/README.md new file mode 100644 index 00000000..627feff5 --- /dev/null +++ b/RAG/examples/basic_rag/llamaindex/README.md @@ -0,0 +1,76 @@ + + +# Basic RAG Using LlamaIndex + +## Example Features + +This example deploys a basic RAG pipeline for chat Q&A and serves inferencing from an NVIDIA API Catalog endpoint. +You do not need a GPU on your machine to run this example. + +| Model | Embedding | Framework | Vector Database | File Types | +| ----------------------- | ------------------------ | ---------- | --------------- | ------------------------------------ | +| meta/llama3-8b-instruct | nvidia/nv-embedqa-e5-v5 | LlamaIndex | Milvus | HTML, TXT, PDF, MD, DOCX, PPTX, XLSX | + +![Diagram](../../../../docs/images/basic_rag_llamaindex_arch.png) + +## Prerequisites + +Complete the [common prerequisites](../../../../docs/common-prerequisites.md). + +## Build and Start the Containers + +1. Export your NVIDIA API key as an environment variable: + + ```text + export NVIDIA_API_KEY="nvapi-<...>" + ``` + +1. Start the containers: + + ```console + cd RAG/examples/basic_rag/llamaindex/ + docker compose up -d --build + ``` + + *Example Output* + + ```output + ✔ Network nvidia-rag Created + ✔ Container rag-playground Started + ✔ Container milvus-minio Started + ✔ Container chain-server Started + ✔ Container milvus-etcd Started + ✔ Container milvus-standalone Started + ``` + +1. Confirm the containers are running: + + ```console + docker ps --format "table {{.ID}}\t{{.Names}}\t{{.Status}}" + ``` + + *Example Output* + + ```output + CONTAINER ID NAMES STATUS + 39a8524829da rag-playground Up 2 minutes + bfbd0193dbd2 chain-server Up 2 minutes + ec02ff3cc58b milvus-standalone Up 3 minutes + 6969cf5b4342 milvus-minio Up 3 minutes (healthy) + 57a068d62fbb milvus-etcd Up 3 minutes (healthy) + ``` + +1. Open a web browser and access to use the RAG Playground. + + Refer to [Using the Sample Web Application](../../../../docs/using-sample-web-application.md) + for information about uploading documents and using the web interface. + +## Next Steps + +- [Vector Database Customizations](../../../../docs/vector-database.md) +- Stop the containers by running `docker compose down`. +- Use the [RAG Application: Text QA Chatbot](https://registry.ngc.nvidia.com/orgs/ohlfw0olaadg/teams/ea-participants/helm-charts/rag-app-text-chatbot) + Helm chart to deploy this example in Kubernetes. diff --git a/RetrievalAugmentedGeneration/__init__.py b/RAG/examples/basic_rag/llamaindex/__init__.py similarity index 95% rename from RetrievalAugmentedGeneration/__init__.py rename to RAG/examples/basic_rag/llamaindex/__init__.py index e42268fe..9ba9d431 100644 --- a/RetrievalAugmentedGeneration/__init__.py +++ b/RAG/examples/basic_rag/llamaindex/__init__.py @@ -11,4 +11,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/RetrievalAugmentedGeneration/examples/developer_rag/chains.py b/RAG/examples/basic_rag/llamaindex/chains.py similarity index 54% rename from RetrievalAugmentedGeneration/examples/developer_rag/chains.py rename to RAG/examples/basic_rag/llamaindex/chains.py index 65b4c6c2..3b61774b 100644 --- a/RetrievalAugmentedGeneration/examples/developer_rag/chains.py +++ b/RAG/examples/basic_rag/llamaindex/chains.py @@ -14,50 +14,41 @@ # limitations under the License. """LLM Chains for executing Retrival Augmented Generation.""" -import os import logging -import nltk +import os from pathlib import Path -from typing import Generator, List, Dict, Any - -from llama_index.core.prompts.base import Prompt -from llama_index.core.readers import download_loader -from llama_index.core.query_engine import RetrieverQueryEngine -from llama_index.core.base.response.schema import StreamingResponse -from llama_index.core.node_parser import LangchainNodeParser -from llama_index.llms.langchain import LangChainLLM -from llama_index.embeddings.langchain import LangchainEmbedding -from RetrievalAugmentedGeneration.common.tracing import llama_index_cb_handler -from llama_index.core import Settings -from llama_index.core.callbacks import CallbackManager +from typing import Any, Dict, Generator, List from langchain_core.output_parsers.string import StrOutputParser from langchain_core.prompts.chat import ChatPromptTemplate +from llama_index.core import Settings +from llama_index.core.base.response.schema import StreamingResponse +from llama_index.core.callbacks import CallbackManager +from llama_index.core.node_parser import LangchainNodeParser +from llama_index.core.prompts.base import Prompt +from llama_index.core.query_engine import RetrieverQueryEngine +from llama_index.readers.file import PDFReader, UnstructuredReader -from RetrievalAugmentedGeneration.common.utils import ( +from RAG.src.chain_server.base import BaseExample +from RAG.src.chain_server.tracing import langchain_instrumentation_class_wrapper, llama_index_cb_handler +from RAG.src.chain_server.utils import ( LimitRetrievedNodesLength, + del_docs_vectorstore_llamaindex, get_config, get_doc_retriever, + get_docs_vectorstore_llamaindex, + get_embedding_model, get_llm, + get_prompts, get_text_splitter, get_vector_index, set_service_context, - get_embedding_model, - get_docs_vectorstore_llamaindex, - del_docs_vectorstore_llamaindex, -) -from RetrievalAugmentedGeneration.common.base import BaseExample -from RetrievalAugmentedGeneration.common.tracing import ( - langchain_instrumentation_class_wrapper, ) -# nltk downloader -# nltk.download("averaged_perceptron_tagger") - # prestage the embedding model _ = get_embedding_model() set_service_context() - +prompts = get_prompts() logger = logging.getLogger(__name__) text_splitter = None @@ -65,109 +56,131 @@ @langchain_instrumentation_class_wrapper class QAChatbot(BaseExample): + def ingest_docs(self, filepath: str, filename: str) -> None: + """Ingests documents to the VectorDB. + It's called when the POST endpoint of `/documents` API is invoked. - def ingest_docs(self, filepath: str, filename: str): - """Ingest documents to the VectorDB.""" + Args: + filepath (str): The path to the document file. + filename (str): The name of the document file. + + Raises: + ValueError: If there's an error during document ingestion or the file format is not supported. + """ try: + # Set callback manager for observability Settings.callback_manager = CallbackManager([llama_index_cb_handler]) logger.info(f"Ingesting {filename} in vectorDB") _, ext = os.path.splitext(filename) + # Load data based on file extension if ext.lower() == ".pdf": - PDFReader = download_loader("PDFReader") loader = PDFReader() documents = loader.load_data(file=Path(filepath)) - else: - unstruct_reader = download_loader("UnstructuredReader") - loader = unstruct_reader() + loader = UnstructuredReader() documents = loader.load_data(file=Path(filepath), split_documents=False) - filename = filename[:-4] - + # Add filename as metadata to each document for document in documents: document.metadata = {"filename": filename, "common_field": "all"} + # do not generate embedding for filename and page_label document.excluded_embed_metadata_keys = ["filename", "page_label"] + # Get vectorstore instance, vectorstore is selected based on environment variable APP_VECTORSTORE_NAME defaults to milvus index = get_vector_index() global text_splitter + # Get text splitter instance, text splitter is selected based on environment variable APP_TEXTSPLITTER_MODELNAME + # tokenizer dimension of text splitter should be same as embedding model if not text_splitter: text_splitter = get_text_splitter() + + # Create nodes using existing text splitter node_parser = LangchainNodeParser(text_splitter) nodes = node_parser.get_nodes_from_documents(documents) + + # Ingest document in vectorstore index.insert_nodes(nodes) logger.info(f"Document {filename} ingested successfully") except Exception as e: logger.error(f"Failed to ingest document due to exception {e}") - raise ValueError( - "Failed to upload document. Please upload an unstructured text document." - ) - - def get_documents(self): - """Retrieves filenames stored in the vector store.""" + raise ValueError("Failed to upload document. Please upload an unstructured text document.") + + def get_documents(self) -> List[str]: + """Retrieves filenames stored in the vector store. + It's called when the GET endpoint of `/documents` API is invoked. + + Returns: + List[str]: List of filenames ingested in vectorstore. + """ return get_docs_vectorstore_llamaindex() - def delete_documents(self, filenames: List[str]): - """Delete documents from the vector index.""" + def delete_documents(self, filenames: List[str]) -> bool: + """Delete documents from the vector index. + It's called when the DELETE endpoint of `/documents` API is invoked. + + Args: + filenames (List[str]): List of filenames to be deleted from vectorstore. + """ return del_docs_vectorstore_llamaindex(filenames) - def llm_chain( - self, query: str, chat_history: List["Message"], **kwargs - ) -> Generator[str, None, None]: - """Execute a simple LLM chain using the components defined above.""" + def llm_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Generator[str, None, None]: + """Execute a simple LLM chain using the components defined above. + It's called when the `/generate` API is invoked with `use_knowledge_base` set to `False`. + + Args: + query (str): Query to be answered by llm. + chat_history (List[Message]): Conversation history between user and chain. + """ logger.info("Using llm to generate response directly without knowledge base.") set_service_context(**kwargs) # TODO Include chat_history - prompt = get_config().prompts.chat_template + prompt = prompts.get("chat_template", "") logger.info(f"Prompt used for response generation: {prompt}") system_message = [("system", prompt)] user_input = [("user", "{query_str}")] - prompt_template = ChatPromptTemplate.from_messages( - system_message + user_input - ) + prompt_template = ChatPromptTemplate.from_messages(system_message + user_input) llm = get_llm(**kwargs) + # Simple langchain chain to generate response based on user's query chain = prompt_template | llm | StrOutputParser() - return chain.stream( - {"query_str": query}, - config={"callbacks": [self.cb_handler]}, - ) + return chain.stream({"query_str": query}, config={"callbacks": [self.cb_handler]},) + + def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Generator[str, None, None]: + """Execute a Retrieval Augmented Generation chain using the components defined above. + It's called when the `/generate` API is invoked with `use_knowledge_base` set to `True`. - def rag_chain( - self, query: str, chat_history: List["Message"], **kwargs - ) -> Generator[str, None, None]: - """Execute a Retrieval Augmented Generation chain using the components defined above.""" + Args: + query (str): Query to be answered by llm. + chat_history (List[Message]): Conversation history between user and chain. + """ logger.info("Using rag to generate response from document") + # update llm model, embedding model and callback for llamaindex context set_service_context(**kwargs) retriever = get_doc_retriever(num_nodes=get_config().retriever.top_k) - qa_template = Prompt(get_config().prompts.rag_template) + qa_template = Prompt(prompts.get("rag_template", "")) logger.info(f"Prompt template used for response generation: {qa_template}") # Handling Retrieval failure nodes = retriever.retrieve(query) + # If no document is retrieved from docstore, send fallback response if not nodes: logger.warning("Retrieval failed to get any relevant context") - return iter( - [ - "No response generated from LLM, make sure your query is relavent to the ingested document." - ] - ) + return iter(["No response generated from LLM, make sure your query is relavent to the ingested document."]) # TODO Include chat_history + # create llamaindex query_engine object to generate response query_engine = RetrieverQueryEngine.from_args( - retriever, - text_qa_template=qa_template, - node_postprocessors=[LimitRetrievedNodesLength()], - streaming=True, + retriever, text_qa_template=qa_template, node_postprocessors=[LimitRetrievedNodesLength()], streaming=True, ) response = query_engine.query(query) @@ -175,16 +188,21 @@ def rag_chain( if isinstance(response, StreamingResponse): return response.response_gen - logger.warning( - "No response generated from LLM, make sure you've ingested document." - ) + logger.warning("No response generated from LLM, make sure you've ingested document.") return StreamingResponse(iter(["No response generated from LLM, make sure you have ingested document from the Knowledge Base Tab."])).response_gen # type: ignore def document_search(self, content: str, num_docs: int) -> List[Dict[str, Any]]: - """Search for the most relevant documents for the given search parameters.""" + """Search for the most relevant documents for the given search parameters. + It's called when the `/search` API is invoked. + + Args: + content (str): Query to be searched from vectorstore. + num_docs (int): Number of similar docs to be retrieved from vectorstore. + """ try: - retriever = get_doc_retriever(num_nodes=get_config().retriever.top_k) + # Get retriever instance + retriever = get_doc_retriever(num_nodes=num_docs) nodes = retriever.retrieve(content) output = [] for node in nodes: diff --git a/RAG/examples/basic_rag/llamaindex/docker-compose.yaml b/RAG/examples/basic_rag/llamaindex/docker-compose.yaml new file mode 100644 index 00000000..520f7037 --- /dev/null +++ b/RAG/examples/basic_rag/llamaindex/docker-compose.yaml @@ -0,0 +1,99 @@ +include: + - path: + - ../../local_deploy/docker-compose-vectordb.yaml + - ../../local_deploy/docker-compose-nim-ms.yaml + +services: + chain-server: + container_name: chain-server + image: chain-server:${TAG:-latest} + build: + # Set context to repo's root directory + context: ../../../../ + dockerfile: RAG/src/chain_server/Dockerfile + args: + # Build args, used to copy relevant directory inside the container relative to GenerativeAIExamples/RAG/examples + EXAMPLE_PATH: 'basic_rag/llamaindex' + volumes: + - ./prompt.yaml:/prompt.yaml + # start the server on port 8081 + command: --port 8081 --host 0.0.0.0 + environment: + # Path to example directory relative to GenerativeAIExamples/RAG/examples + EXAMPLE_PATH: 'basic_rag/llamaindex' + # URL on which vectorstore is hosted + APP_VECTORSTORE_URL: "http://milvus:19530" + # Type of vectordb used to store embedding supported type milvus, pgvector + APP_VECTORSTORE_NAME: "milvus" + APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/nv-embedqa-e5-v5} + # embedding model engine used for inference, supported type nvidia-ai-endpoints, huggingface + APP_EMBEDDINGS_MODELENGINE: ${APP_EMBEDDINGS_MODELENGINE:-nvidia-ai-endpoints} + # url on which embedding model is hosted. If "", Nvidia hosted API is used + APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL:-""} + # url on which llm model is hosted. If "", Nvidia hosted API is used + APP_LLM_SERVERURL: ${APP_LLM_SERVERURL:-""} + APP_LLM_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-8b-instruct"} + # embedding model engine used for inference, supported type nvidia-ai-endpoints + APP_LLM_MODELENGINE: ${APP_LLM_MODELENGINE:-nvidia-ai-endpoints} + NVIDIA_API_KEY: ${NVIDIA_API_KEY} + # vectorstore collection name to store embeddings + COLLECTION_NAME: ${COLLECTION_NAME:-developer_rag} + APP_RETRIEVER_TOPK: 4 + APP_RETRIEVER_SCORETHRESHOLD: 0.25 + # observability server url + OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 + OTEL_EXPORTER_OTLP_PROTOCOL: grpc + # enable observability in chain server + ENABLE_TRACING: false + # text splitter model name, it's fetched from huggingface + APP_TEXTSPLITTER_MODELNAME: Snowflake/snowflake-arctic-embed-l + APP_TEXTSPLITTER_CHUNKSIZE: 506 + APP_TEXTSPLITTER_CHUNKOVERLAP: 200 + # Log level for server, supported level NOTSET, DEBUG, INFO, WARN, ERROR, CRITICAL + LOGLEVEL: ${LOGLEVEL:-INFO} + ports: + - "8081:8081" + expose: + - "8081" + shm_size: 5gb + depends_on: + nemollm-embedding: + condition: service_healthy + required: false + nemollm-inference: + condition: service_healthy + required: false + + + rag-playground: + container_name: rag-playground + image: rag-playground:${TAG:-latest} + build: + # Set context to repo's root directory + context: ../../../../RAG/src/rag_playground/ + dockerfile: Dockerfile + args: + # select UI type, supported model default, speech + PLAYGROUND_MODE: ${PLAYGROUND_MODE:-default} + command: --port 8090 + environment: + # URL or chain server container + APP_SERVERURL: http://chain-server + APP_SERVERPORT: 8081 + # model name displayed on UI + APP_MODELNAME: ${APP_LLM_MODELNAME:-"meta/llama3-8b-instruct"} + # observability server url + OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4317 + OTEL_EXPORTER_OTLP_PROTOCOL: grpc + # enable observability in rag playground + ENABLE_TRACING: false + ports: + - "8090:8090" + expose: + - "8090" + depends_on: + - chain-server + +networks: + default: + name: nvidia-rag diff --git a/RAG/examples/basic_rag/llamaindex/prompt.yaml b/RAG/examples/basic_rag/llamaindex/prompt.yaml new file mode 100644 index 00000000..4bf764a0 --- /dev/null +++ b/RAG/examples/basic_rag/llamaindex/prompt.yaml @@ -0,0 +1,10 @@ +chat_template: | + You are a helpful, respectful and honest assistant. + Always answer as helpfully as possible, while being safe. + Please ensure that your responses are positive in nature. + +rag_template: | + Use the following context to answer the user's question. If you don't know the answer, + just say that you don't know, don't try to make up an answer. + Context: {context_str} Question: {query_str} Only return the helpful + answer below and nothing else. Helpful answer: \ No newline at end of file diff --git a/RAG/examples/local_deploy/.env b/RAG/examples/local_deploy/.env new file mode 100644 index 00000000..3aff5614 --- /dev/null +++ b/RAG/examples/local_deploy/.env @@ -0,0 +1,9 @@ +# Set this when using on-prem models +NGC_API_KEY=${NGC_API_KEY} + +# Path where models will be stored and cached for NIM's +# NOTE: This should be an absolute path and not relative path +MODEL_DIRECTORY=${PWD} + +# Default value of user id if not provided explicitly +USERID=1000 \ No newline at end of file diff --git a/RAG/examples/local_deploy/README.md b/RAG/examples/local_deploy/README.md new file mode 100644 index 00000000..216da91d --- /dev/null +++ b/RAG/examples/local_deploy/README.md @@ -0,0 +1,110 @@ + + +# On Premises Deployment Using NVIDIA NIM microservices with GPUs + +You can adapt any example to use on premises machines and NVIDIA NIM microservices. +By performing the additional prerequisites that are required to get access to the containers and use GPUs with Docker, +you can use local machines with GPUs and local microservices instead of NVIDIA API Catalog endpoints. + +## Prerequisites + +- You have an active subscription to an NVIDIA AI Enterprise product or you are an [NVIDIA Developer Program](https://developer.nvidia.com/developer-program) member. + +- Complete the [common prerequisites](../../../docs/common-prerequisites.md). + + Ensure that you configure the host with the NVIDIA Container Toolkit. + +- A host with at least two NVIDIA A100, H100, or L40S GPUs. + + You need at least one GPU for the inference container and one GPU for the embedding container. + By default, Milvus requires one GPU as well. + +- You have an NGC API key. + Refer to [Generating NGC API Keys](https://docs.nvidia.com/ngc/gpu-cloud/ngc-user-guide/index.html#generating-api-key) + in the _NVIDIA NGC User Guide_ for more information. + +## Start the Containers + +1. Export NGC related environment variables: + + ```text + export NGC_API_KEY="M2..." + ``` + +1. Create a directory to cache the models and export the path to the cache as an environment variable: + + ```console + mkdir -p ~/.cache/model-cache + export MODEL_DIRECTORY=~/.cache/model-cache + ``` + +1. Export the connection information for the inference and embedding services: + + ```console + export APP_LLM_SERVERURL="nemollm-inference:8000" + export APP_EMBEDDINGS_SERVERURL="nemollm-embedding:8000" + ``` + +1. Start the example-specific containers. + + Replace the path in the following `cd` command with the path to the example that you want to run. + + ```console + cd RAG/examples/basic_rag/langchain + USERID=$(id -u) docker compose --profile local-nim --profile milvus up -d --build + ``` + + *Example Output* + + ```output + ✔ Container milvus-minio Running + ✔ Container chain-server Running + ✔ Container nemo-retriever-embedding-microservice Started + ✔ Container milvus-etcd Running + ✔ Container nemollm-inference-microservice Started + ✔ Container rag-playground Started + ✔ Container milvus-standalone Started + ``` + +1. Optional: Deploy Reranking service if needed by your example. This is required currently for only the [Multi-Turn Rag Example](../advanced_rag/multi_turn_rag/). + ```console + export APP_RANKING_SERVERURL="ranking-ms:8000" + cd RAG/examples/local_deploy + USERID=$(id -u) docker compose -f docker-compose-nim-ms.yaml up -d ranking-ms + ``` + +2. Open a web browser and access to use the RAG Playground. + + Refer to [Using the Sample Web Application](../../../docs/using-sample-web-application.md) + for information about uploading documents and using the web interface. + +## Tips for GPU Use + +When you start the microservices in the `local_deploy` directory, you can specify the GPUs use by setting the following environment variables before you run `docker compose up`. + +INFERENCE_GPU_COUNT: + Specify the number of GPUs to use with the NVIDIA NIM for LLMs container. + +EMBEDDING_MS_GPU_ID: + Specify the GPU IDs to use with the NVIDIA NeMo Retriever Text Embedding NIM container. + +RANKING_MS_GPU_ID: + Specify the GPU IDs to use with the NVIDIA NeMo Retriever Text Reranking NIM container. + +VECTORSTORE_GPU_DEVICE_ID: + Specify the GPU IDs to use with Milvus. + +## Related Information + +- [*NVIDIA NIM for LLMs*](https://docs.nvidia.com/nim/large-language-models/latest/index.html) + +The preceding document frequently demonstrates using the curl command to interact with the microservices. +You can determine the IP address for each container by running `docker network inspect nvidia-rag | jq '.[].Containers[] | {Name, IPv4Address}'`. + +## Next Steps + +- [Vector Database Customizations](../../../docs/vector-database.md) +- Stop the containers by running `docker compose --profile local-nim down`. diff --git a/RAG/examples/local_deploy/docker-compose-nim-ms.yaml b/RAG/examples/local_deploy/docker-compose-nim-ms.yaml new file mode 100644 index 00000000..eb44f8ac --- /dev/null +++ b/RAG/examples/local_deploy/docker-compose-nim-ms.yaml @@ -0,0 +1,86 @@ +services: + nemollm-inference: + container_name: nemollm-inference-microservice + image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0 + volumes: + - ${MODEL_DIRECTORY}:/opt/nim/.cache + user: "${USERID}" + ports: + - "8000:8000" + expose: + - "8000" + environment: + NGC_API_KEY: ${NGC_API_KEY} + shm_size: 20gb + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: ${INFERENCE_GPU_COUNT:-all} + # device_ids: ['${LLM_MS_GPU_ID:-0}'] + capabilities: [gpu] + healthcheck: + test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:8000/v1/health/ready')"] + interval: 10s + timeout: 20s + retries: 100 + profiles: ["local-nim", "nemo-retriever"] + + nemollm-embedding: + container_name: nemo-retriever-embedding-microservice + image: nvcr.io/nim/nvidia/nv-embedqa-e5-v5:1.0.0 + volumes: + - ${MODEL_DIRECTORY}:/opt/nim/.cache + ports: + - "9080:8000" + expose: + - "8000" + environment: + NGC_API_KEY: ${NGC_API_KEY} + user: "${USERID}" + shm_size: 16GB + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['${EMBEDDING_MS_GPU_ID:-0}'] + capabilities: [gpu] + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/v1/health/ready"] + interval: 30s + timeout: 20s + retries: 3 + start_period: 10m + profiles: ["local-nim", "nemo-retriever"] + + ranking-ms: + container_name: nemo-retriever-ranking-microservice + image: nvcr.io/nim/nvidia/nv-rerankqa-mistral-4b-v3:1.0.0 + volumes: + - ${MODEL_DIRECTORY}:/opt/nim/.cache + ports: + - "1976:8000" + expose: + - "8000" + environment: + NGC_API_KEY: ${NGC_API_KEY} + user: "${USERID}" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 10s + timeout: 20s + retries: 100 + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['${RANKING_MS_GPU_ID:-0}'] + capabilities: [gpu] + profiles: ["nemo-retriever"] + +networks: + default: + name: nvidia-rag diff --git a/deploy/compose/docker-compose-vectordb.yaml b/RAG/examples/local_deploy/docker-compose-vectordb.yaml similarity index 88% rename from deploy/compose/docker-compose-vectordb.yaml rename to RAG/examples/local_deploy/docker-compose-vectordb.yaml index 98c625ab..cd76bc98 100644 --- a/deploy/compose/docker-compose-vectordb.yaml +++ b/RAG/examples/local_deploy/docker-compose-vectordb.yaml @@ -12,7 +12,7 @@ services: - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} - POSTGRES_USER=${POSTGRES_USER:-postgres} - POSTGRES_DB=${POSTGRES_DB:-api} - profiles: ["llm-embedding"] + profiles: ["pgvector"] etcd: @@ -31,7 +31,7 @@ services: interval: 30s timeout: 20s retries: 3 - profiles: ["llm-embedding", "nemo-retriever"] + profiles: ["nemo-retriever", "milvus", ""] minio: container_name: milvus-minio @@ -50,11 +50,11 @@ services: interval: 30s timeout: 20s retries: 3 - profiles: ["llm-embedding", "nemo-retriever"] + profiles: ["nemo-retriever", "milvus", ""] milvus: container_name: milvus-standalone - image: milvusdb/milvus:v2.4.4-gpu + image: milvusdb/milvus:v2.4.5 command: ["milvus", "run", "standalone"] environment: ETCD_ENDPOINTS: etcd:2379 @@ -74,14 +74,7 @@ services: depends_on: - "etcd" - "minio" - deploy: - resources: - reservations: - devices: - - driver: nvidia - capabilities: ["gpu"] - device_ids: ['${VECTORSTORE_GPU_DEVICE_ID:-0}'] - profiles: ["llm-embedding", "nemo-retriever"] + profiles: ["nemo-retriever", "milvus", ""] elasticsearch: image: "docker.elastic.co/elasticsearch/elasticsearch:8.12.0" diff --git a/RAG/notebooks/README.md b/RAG/notebooks/README.md new file mode 100644 index 00000000..b1aa52e1 --- /dev/null +++ b/RAG/notebooks/README.md @@ -0,0 +1,54 @@ + + +# Running RAG Example Notebooks + + +* [About the Notebooks](#about-the-notebooks) +* [Prerequisites](#prerequisites) +* [Running the Notebooks](#running-the-notebooks) + + + +## About the Notebooks + +The notebooks show how to use the `langchain-nvidia-ai-endpoints` and `llama-index-embeddings-nvidia` Python packages. +These packages provide the basics for developing a RAG application and performing inference either from NVIDIA API Catalog endpoints or a local deployment of NVIDIA microservices. + +## Prerequisites + +- You have Python 3 installed. +- Complete the [common prerequisites](../../docs/common-prerequisites.md). + +## Running the Notebooks + +1. Export your NVIDIA API key as an environment variable: + + ```text + export NVIDIA_API_KEY="nvapi-<...>" + ``` + +1. Create a virtual environment: + + ```console + python3 -m venv .venv + source .venv/bin/activate + ``` + +1. Install JupyterLab in the virtual environment: + + ```console + pip3 install jupyterlab + ``` + +1. Start the JupyterLab server: + + ```console + jupyter lab --allow-root --ip=0.0.0.0 --NotebookApp.token='' --port=8889 + ``` + +1. Open a web browser and access . + + Browse to the `RAG/notebooks` directory to open an execute the cells of the notebooks. diff --git a/experimental/multimodal_assistant/llm/__init__.py b/RAG/notebooks/__init__.py similarity index 100% rename from experimental/multimodal_assistant/llm/__init__.py rename to RAG/notebooks/__init__.py diff --git a/notebooks/04_Agent_use_tools_leveraging_NVIDIA_AI_endpoints.ipynb b/RAG/notebooks/langchain/Agent_use_tools_leveraging_NVIDIA_AI_endpoints.ipynb similarity index 86% rename from notebooks/04_Agent_use_tools_leveraging_NVIDIA_AI_endpoints.ipynb rename to RAG/notebooks/langchain/Agent_use_tools_leveraging_NVIDIA_AI_endpoints.ipynb index b38f28dd..32f9fd38 100644 --- a/notebooks/04_Agent_use_tools_leveraging_NVIDIA_AI_endpoints.ipynb +++ b/RAG/notebooks/langchain/Agent_use_tools_leveraging_NVIDIA_AI_endpoints.ipynb @@ -5,25 +5,15 @@ "id": "481943fc", "metadata": {}, "source": [ - "# Multimodal Models from NVIDIA AI Catelog and AI Catalog with LangChain Agent \n", - "\n", + "# Multimodal Models from NVIDIA AI Catalog and LangChain Agent \n", "\n", "## Prerequisites\n", "\n", - "To run this notebook, you need the following:\n", - "\n", - "1. Performed the [setup](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints#setup) and generated an API key.\n", - "\n", - "2. Installed Python dependencies from [requirements.txt](https://github.com/NVIDIA/GenerativeAIExamples/blob/main/notebooks/requirements.txt).\n", - "\n", - "3. Installed additional packages for this notebook: \n", - "\n", - " pip install gradio matplotlib scikit-image\n", - "\n", + "To run this notebook, you need to [follow the steps from here](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints#setup) and generate an API key from [NVIDIA API Catalog](https://build.nvidia.com/).\n", "\n", "This notebook covers the following custom plug-in components:\n", "\n", - "- LLM using **ai-mixtral-8x7b-instruct**\n", + "- LLM using **mixtral-8x7b-instruct**\n", " \n", "- A NVIDIA AI Catalog **Deplot** as one of the tool\n", "\n", @@ -33,7 +23,7 @@ "\n", "At the end of the day, as below illustrated, we would like to have a UI which allow user to upload image of their choice and have the agent choose tools to do visual reasoning. \n", "\n", - "![interactive UI](./imgs/visual_reasoning.png) \n", + "![interactive UI](./data/imgs/visual_reasoning.png) \n", "Note: As one can see, since we are using NVIDIA AI Catalog as an API, there is no further requirement in the prerequisites about GPUs as compute hardware\n" ] }, @@ -44,9 +34,9 @@ "metadata": {}, "outputs": [], "source": [ - "# uncomment the below to install additional python packages.\n", - "#!pip install unstructured\n", - "#!pip install matplotlib scikit-image\n", + "# Install python packages.\n", + "!pip install langchain==0.2.5\n", + "!pip install langchain-nvidia-ai-endpoints==0.1.2\n", "!pip install gradio==3.48.0" ] }, @@ -71,12 +61,12 @@ "\n", "# del os.environ['NVIDIA_API_KEY'] ## delete key and reset\n", "if os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n", + " nvapi_key = os.environ.get(\"NVIDIA_API_KEY\", \"\")\n", " print(\"Valid NVIDIA_API_KEY already in environment. Delete to reset\")\n", "else:\n", " nvapi_key = getpass.getpass(\"NVAPI Key (starts with nvapi-): \")\n", " assert nvapi_key.startswith(\"nvapi-\"), f\"{nvapi_key[:5]}... is not a valid key\"\n", - " os.environ[\"NVIDIA_API_KEY\"] = nvapi_key\n", - "global nvapi_key" + " os.environ[\"NVIDIA_API_KEY\"] = nvapi_key" ] }, { @@ -84,7 +74,7 @@ "id": "de94bb16", "metadata": {}, "source": [ - "### Step 2 - wrap the Fuyu API call into a function and verify by supplying an image to get a respond" + "### Step 2 - Wrap the Fuyu API call into a function and verify by supplying an image to get a respond" ] }, { @@ -94,8 +84,6 @@ "metadata": {}, "outputs": [], "source": [ - "import openai, httpx, sys\n", - "\n", "import base64, io\n", "from PIL import Image\n", "import requests, json\n", @@ -128,11 +116,11 @@ "def fuyu(prompt,img_path):\n", " invoke_url = \"https://ai.api.nvidia.com/v1/vlm/adept/fuyu-8b\"\n", " stream = True\n", - " \n", - " \n", + "\n", + "\n", " image_b64=img2base64_string(img_path)\n", - " \n", - " \n", + "\n", + "\n", " assert len(image_b64) < 200_000, \\\n", " \"To upload larger images, use the assets API (see docs)\"\n", "\n", @@ -140,7 +128,7 @@ " \"Authorization\": f\"Bearer {nvapi_key}\",\n", " \"Accept\": \"text/event-stream\" if stream else \"application/json\"\n", " }\n", - " \n", + "\n", " payload = {\n", " \"messages\": [\n", " {\n", @@ -154,9 +142,9 @@ " \"seed\": 0,\n", " \"stream\": stream\n", " }\n", - " \n", + "\n", " response = requests.post(invoke_url, headers=headers, json=payload)\n", - " \n", + "\n", " if stream:\n", " output=[]\n", " for line in response.iter_lines():\n", @@ -173,7 +161,7 @@ "id": "b637395f", "metadata": {}, "source": [ - "fetch a test image of a pair of white sneakers and verify the function works" + "Fetch a test image of a pair of white sneakers and verify the function works" ] }, { @@ -183,7 +171,7 @@ "metadata": {}, "outputs": [], "source": [ - "!wget \"https://docs.google.com/uc?export=download&id=12ZpBBFkYu-jzz1iz356U5kMikn4uN9ww\" -O ./toy_data/jordan.png" + "!wget \"https://docs.google.com/uc?export=download&id=12ZpBBFkYu-jzz1iz356U5kMikn4uN9ww\" -O ./data/imgs/jordan.png" ] }, { @@ -193,7 +181,7 @@ "metadata": {}, "outputs": [], "source": [ - "img_path=\"./toy_data/jordan.png\"\n", + "img_path=\"./data/imgs/jordan.png\"\n", "prompt=\"describe the image\"\n", "out=fuyu(prompt,img_path)\n", "out" @@ -216,24 +204,7 @@ "source": [ "# test run and see that you can genreate a respond successfully\n", "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n", - "llm = ChatNVIDIA(model=\"ai-mixtral-8x7b-instruct\", nvidia_api_key=nvapi_key, max_tokens=1024)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "288e14ae", - "metadata": {}, - "outputs": [], - "source": [ - "#Set up Prerequisites for Image Captioning App User Interface\n", - "import os\n", - "import io\n", - "import IPython.display\n", - "from PIL import Image\n", - "import base64\n", - "import requests\n", - "import gradio as gr\n" + "llm = ChatNVIDIA(model=\"mistralai/mixtral-8x7b-instruct-v0.1\", nvidia_api_key=nvapi_key, max_tokens=1024)\n" ] }, { @@ -241,7 +212,7 @@ "id": "e8fffc74", "metadata": {}, "source": [ - "### Step 4- wrap Deplot and Fuyu as tools for later usage" + "### Step 4- Wrap Deplot and Fuyu as tools for later usage" ] }, { @@ -252,24 +223,15 @@ "outputs": [], "source": [ "#Set up Prerequisites for Image Captioning App User Interface\n", - "import os\n", "import io\n", - "import IPython.display\n", "from PIL import Image\n", "import base64\n", "import requests\n", - "import gradio as gr\n", - "\n", "from langchain.tools import BaseTool\n", - "from transformers import BlipProcessor, BlipForConditionalGeneration, DetrImageProcessor, DetrForObjectDetection\n", "from PIL import Image\n", - "import torch\n", - "#\n", - "import os\n", - "from tempfile import NamedTemporaryFile\n", "from langchain.agents import initialize_agent\n", "from langchain.chains.conversation.memory import ConversationBufferWindowMemory\n", - " \n", + "\n", "def fetch_outputs(output):\n", " collect_streaming_outputs=[]\n", " for o in output:\n", @@ -399,7 +361,7 @@ "id": "467237c3", "metadata": {}, "source": [ - "### Step 5 - initaite the agent with tools we previously defined " + "### Step 5 - Initiate the agent with tools we previously defined " ] }, { @@ -446,7 +408,7 @@ "metadata": {}, "outputs": [], "source": [ - "img_path=\"./toy_data/jordan.png\"\n", + "img_path=\"./data/imgs/jordan.png\"\n", "response = agent.invoke({\"input\":f' this is the image path: {img_path}'})\n", "print(response['output'])\n" ] diff --git a/notebooks/07_Chat_with_nvidia_financial_reports.ipynb b/RAG/notebooks/langchain/Chat_with_nvidia_financial_reports.ipynb similarity index 84% rename from notebooks/07_Chat_with_nvidia_financial_reports.ipynb rename to RAG/notebooks/langchain/Chat_with_nvidia_financial_reports.ipynb index 9ad5c16a..480dc74d 100644 --- a/notebooks/07_Chat_with_nvidia_financial_reports.ipynb +++ b/RAG/notebooks/langchain/Chat_with_nvidia_financial_reports.ipynb @@ -5,9 +5,9 @@ "id": "4ff7339a", "metadata": {}, "source": [ - "# Notebook: Chatting with NVIDIA Financial Reports\n", + "# Chatting with NVIDIA Financial Reports\n", "\n", - " In this notebook, we are going to use milvus as vectorstore, the **mixtral_8x7b as LLM** and **ai-embed-qa-4 embedding** provided by [NVIDIA_AI_Endpoint](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints) as LLM and embedding model, and build a simply RAG example for chatting with NVIDIA Financial Reports.\n", + " In this notebook, we are going to use milvus as vectorstore, the **mixtral_8x7b as LLM** and **NV-Embed-QA embedding** provided by [NVIDIA_AI_Endpoint](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints) as LLM and embedding model, and build a simply RAG example for chatting with NVIDIA Financial Reports.\n", "\n", "\n", "NVIDIA financial reports are available pubicly in nvidianews. \n", @@ -16,7 +16,50 @@ "\n", "https://nvidianews.nvidia.com/news/nvidia-announces-financial-results-for-first-quarter-fiscal-2024\n", "\n", - "\"drawing\"" + "\"drawing\"" + ] + }, + { + "cell_type": "markdown", + "id": "49b2bf03", + "metadata": {}, + "source": [ + "Before starting with the notebook let's install the required python packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25407f7e", + "metadata": {}, + "outputs": [], + "source": [ + "# Install required python packages.\n", + "!pip install langchain==0.2.10\n", + "!pip install langchain-community==0.2.9\n", + "!pip install langchain-nvidia-ai-endpoints==0.1.2\n", + "!pip install markdownify==0.12.1\n", + "!pip install pymilvus==2.3.1\n", + "!pip install sentence-transformers==3.0.1" + ] + }, + { + "cell_type": "markdown", + "id": "3471907b", + "metadata": {}, + "source": [ + "You will also need to run milvus vector database. This repository houses a docker compose file using which you can deploy milvus. Execute the below command from the root of the GenerativeAIExamples repository to start the Milvus containers." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44f16047", + "metadata": {}, + "outputs": [], + "source": [ + "# Execute this on your terminal from the root of your repository\n", + "# docker compose -f RAG/examples/local_deploy/docker-compose-vectordb.yaml up -d" ] }, { @@ -63,12 +106,8 @@ "source": [ "# test run and see that you can genreate a respond successfully\n", "from langchain_nvidia_ai_endpoints import ChatNVIDIA,NVIDIAEmbeddings\n", - "llm = ChatNVIDIA(model=\"ai-mixtral-8x7b-instruct\", nvidia_api_key=nvapi_key, max_tokens=1024)\n", - "from langchain.vectorstores import Milvus\n", - "import torch\n", - "import time\n", - "embedder_document = NVIDIAEmbeddings(model=\"ai-embed-qa-4\", model_type=\"passage\")\n", - "embedder_query = NVIDIAEmbeddings(model=\"ai-embed-qa-4\", model_type=\"query\")" + "llm = ChatNVIDIA(model=\"mistralai/mixtral-8x7b-instruct-v0.1\", max_tokens=1024)\n", + "embedder_document = NVIDIAEmbeddings(model=\"NV-Embed-QA\", truncate=\"END\")" ] }, { @@ -129,6 +168,7 @@ "# extract the url, title, text content, and tables in the html\n", "from bs4 import BeautifulSoup\n", "import markdownify\n", + "\n", "def extract_url_title_time(soup):\n", " url = \"\"\n", " title = \"\"\n", @@ -260,17 +300,14 @@ "outputs": [], "source": [ "COLLECTION_NAME = \"NVIDIA_Finance\"\n", - "from langchain.vectorstores import Milvus\n", - "vectorstore = Milvus(\n", - " embedding_function=embedder_document,\n", + "from langchain_community.vectorstores import Milvus\n", + "vectorstore = Milvus.from_documents(\n", + " documents,\n", + " embedder_document,\n", " collection_name=COLLECTION_NAME,\n", - " connection_args={\n", - " \"host\": \"milvus\",\n", - " \"port\": \"19530\"},\n", - " drop_old = True,\n", - " auto_id = True\n", - " )\n", - "vectorstore.add_documents(documents)\n", + " connection_args={\"uri\": 'http://milvus:19530'}, # replace this with the ip of the workstation where milvus is running\n", + " drop_old=True,\n", + ")\n", "docs = vectorstore.similarity_search(\"what are 2024 Q3 revenues? \")" ] }, @@ -360,7 +397,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/notebooks/06_LangGraph_HandlingAgent_IntermediateSteps.ipynb b/RAG/notebooks/langchain/LangGraph_HandlingAgent_IntermediateSteps.ipynb similarity index 89% rename from notebooks/06_LangGraph_HandlingAgent_IntermediateSteps.ipynb rename to RAG/notebooks/langchain/LangGraph_HandlingAgent_IntermediateSteps.ipynb index 8a85c771..e1f18b27 100644 --- a/notebooks/06_LangGraph_HandlingAgent_IntermediateSteps.ipynb +++ b/RAG/notebooks/langchain/LangGraph_HandlingAgent_IntermediateSteps.ipynb @@ -13,9 +13,9 @@ "We demonstrate how to handle the logic of the intermediate steps from the agent leveraging different provided tools within langGraph.\n", "\n", "\n", - "- We will be leveraging LLM [ai-mixtral-8x7b-instruct from NVIDIA API Catalog](https://build.nvidia.com/mistralai/mixtral-8x7b-instruct).\n", + "- We will be leveraging LLM [mixtral-8x7b-instruct-v0.1 from NVIDIA API Catalog](https://build.nvidia.com/mistralai/mixtral-8x7b-instruct).\n", "\n", - "- Simple Faiss Retriever as one of the tools with the [ai-embed-qa-4 from NVIDIA API Catalog](https://build.nvidia.com/nvidia/embed-qa-4).\n", + "- Simple Faiss Retriever as one of the tools with the [NV-Embed-QA from NVIDIA API Catalog](https://build.nvidia.com/nvidia/embed-qa-4).\n", "\n", "- Wikipedia (the pip installable package) as one of the tools.\n", "\n", @@ -24,13 +24,7 @@ "\n", "## Prerequisites \n", "\n", - "To run this notebook, you need the following:\n", - "\n", - "1. Already completed the [setup](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints#setup) and generated an API key.\n", - "2. Installed necesary Python dependencies in [requirements.txt](https://github.com/NVIDIA/GenerativeAIExamples/blob/main/notebooks/requirements.txt) \n", - "\n", - "Change `faiss-gpu` to `faiss-cpu` in the `requirements.txt` file if you do not have access to a GPU.\n", - "\n" + "To run this notebook, you need to complete the [setup](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints#setup) and generate an API key." ] }, { @@ -40,7 +34,7 @@ "source": [ "## Install additional Python packages \n", "\n", - "Install the additional packages that required for this example, assuming that installed all the python packages from the [requirements.txt](https://github.com/NVIDIA/GenerativeAIExamples/blob/main/notebooks/requirements.txt) file." + "Install the additional packages that required for this example" ] }, { @@ -51,11 +45,12 @@ "outputs": [], "source": [ "!pip install --upgrade pip\n", - "!pip install wikipedia==1.4.0\n", + "!pip install langchain==0.2.5\n", + "!pip install langchain-nvidia-ai-endpoints==0.1.2\n", "!pip install langchain-community==0.2.2\n", - "!pip install langchain==0.2.2\n", "!pip install langgraph==0.0.62\n", - "!pip install faiss-gpu==1.7.2" + "!pip install faiss-gpu==1.7.2\n", + "!pip install wikipedia==1.4.0" ] }, { @@ -119,7 +114,7 @@ "from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings\n", "\n", "llm = ChatNVIDIA(model=\"mistralai/mixtral-8x7b-instruct-v0.1\", nvidia_api_key=nvapi_key, max_tokens=2048)\n", - "embedder = NVIDIAEmbeddings(model=\"NV-Embed-QA\")\n" + "embedder = NVIDIAEmbeddings(model=\"NV-Embed-QA\", truncate=\"END\")\n" ] }, { @@ -129,7 +124,7 @@ "source": [ "## Step 3 - Retriever from FAISS vector store\n", "\n", - "We need to process a toy example, here we use `Sweden.txt` from the `toy_data` folder." + "We need to process a toy example, here we use `Sweden.txt` from the `data` folder." ] }, { @@ -139,24 +134,14 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "from tqdm import tqdm\n", - "from pathlib import Path\n", - "import faiss\n", - "from operator import itemgetter\n", "from langchain.vectorstores import FAISS\n", - "from langchain_core.output_parsers import StrOutputParser\n", - "from langchain_core.prompts import ChatPromptTemplate\n", - "from langchain_core.runnables import RunnablePassthrough\n", "from langchain.text_splitter import CharacterTextSplitter\n", - "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n", - "import faiss\n", "\n", "# We need to process the text data and prepare them.\n", "p = \"Sweden.txt\"\n", "data = []\n", "sources = []\n", - "path2file = \"./toy_data/\" + p\n", + "path2file = \"./data/\" + p\n", "with open(path2file, encoding=\"utf-8\") as f:\n", " lines = f.readlines()\n", " for line in lines:\n", @@ -177,7 +162,7 @@ "\n", "# you only need to do this once, in the future, when re-run this notebook, skip to below and load the vector store from disk\n", "store = FAISS.from_texts(docs, embedder , metadatas=metadatas)\n", - "store.save_local('/workspace/save_embedding/sv')\n" + "store.save_local('./data/save_embedding/sv')\n" ] }, { @@ -188,7 +173,7 @@ "outputs": [], "source": [ "## If you previously preprocessed and saved the vector store to disk, then reload it here\n", - "faissDB = FAISS.load_local(\"/workspace/save_embedding/sv\", embedder, allow_dangerous_deserialization=True)\n", + "faissDB = FAISS.load_local(\"./data/save_embedding/sv\", embedder, allow_dangerous_deserialization=True)\n", "retriever = faissDB.as_retriever()" ] }, @@ -323,13 +308,8 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "from langchain.agents import AgentExecutor\n", - "from langchain.agents import initialize_agent\n", - "from langchain.prompts import MessagesPlaceholder\n", "from langchain.memory import ConversationBufferMemory\n", - "from langchain.agents import AgentType, Agent, ConversationalAgent\n", - "from langchain_core.prompts import ChatPromptTemplate, PromptTemplate\n", + "from langchain_core.prompts import PromptTemplate\n", "\n", "## set up memory\n", "memory = ConversationBufferMemory(memory_key=\"chat_history\", input_key='input', output_key=\"output\")\n", @@ -410,16 +390,9 @@ "metadata": {}, "outputs": [], "source": [ - "from typing import Any, Optional, Sequence\n", - "\n", - "from langchain_core._api import deprecated\n", - "from langchain_core.callbacks import BaseCallbackManager\n", - "from langchain_core.language_models import BaseLanguageModel\n", - "from langchain_core.tools import BaseTool\n", - "\n", "from langchain.agents.agent import AgentExecutor\n", "from langchain.agents.agent_types import AgentType\n", - "from langchain.agents.loading import AGENT_TO_CLASS, load_agent\n", + "from langchain.agents.loading import AGENT_TO_CLASS\n", "\n", "agent_cls = AGENT_TO_CLASS[AgentType.CONVERSATIONAL_REACT_DESCRIPTION]\n", "agent_kwargs = {}\n", @@ -462,7 +435,7 @@ "metadata": {}, "outputs": [], "source": [ - "from typing import TypedDict, Annotated, List, Union\n", + "from typing import TypedDict, Annotated, Union\n", "from langchain_core.agents import AgentAction, AgentFinish\n", "from langchain_core.messages import BaseMessage\n", "import operator\n", diff --git a/RAG/notebooks/langchain/NIM_tool_call_HumanInTheLoop_MultiAgents.ipynb b/RAG/notebooks/langchain/NIM_tool_call_HumanInTheLoop_MultiAgents.ipynb new file mode 100644 index 00000000..64922ed5 --- /dev/null +++ b/RAG/notebooks/langchain/NIM_tool_call_HumanInTheLoop_MultiAgents.ipynb @@ -0,0 +1,772 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2ba963de-02e7-44c5-ada0-b74bb092064e", + "metadata": {}, + "source": [ + "\n", + "# Incorporating human-in-the-loop in agentic logic via LangGraph \n", + "\n", + "## Prerequisites\n", + "\n", + "To run this notebook, you need to [follow the steps from here](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints#setup) and generate an API key from [NVIDIA API Catalog](https://build.nvidia.com/).\n", + "\n", + "Please ensure you have the following dependencies installed :\n", + "\n", + "- langchain\n", + "- jupyterlab==4.0.8\n", + "- langchain-core\n", + "- langchain-nvidia-ai-endpoints==0.2.0\n", + "- markdown\n", + "- colorama\n", + "\n", + "you will also need to install the following -\n", + "\n", + "\n", + "\n", + "This notebook will walk you though how to incoporate **human-in-the-loop** into a **multi-agents** pipeline in a minimalistic examples.\n", + "\n", + "The cognitive agentic architecture will look like the below :\n", + "\n", + "![agent architecture](./data/imgs/HumanInTheLoopLangGraph.png) \n", + "\n", + "\n", + "We will first construct the 2 agents in the middle : \n", + "\n", + "- Using **meta/llama-3.1-405b-instruct** to construct the 2 agents, each will be created with [LCEL expression ](https://python.langchain.com/v0.1/docs/expression_language/)\n", + "\n", + "- then we will give each agent one tool to use to achieve the task\n", + "\n", + "The task at hand is creating promotion assets with text and image for social medial promotion.\n", + "We are aiming for something similar to the below ...\n", + "\n", + "\n", + "![agent architecture](./data/imgs/finish_social_post.png)\n", + "\n", + "\n", + "Just like in real world, a human in charge of the task will delegate tasks to specalist writer to writ the promotion text and assign a digital artist for the artworks.\n", + "\n", + "In this scenario, we will let human assign an agent ( either **ContentCreator** or **DigitalArtist** ) just like the flow depicted above. \n", + " \n", + "\n", + "\n", + "Note: As one can see, since we are using NVIDIA AI Catalog as an API, there is no further requirement in the prerequisites about GPUs as compute hardware\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd38971e-d3c4-4adc-9986-c1d5f3ccabd3", + "metadata": {}, + "outputs": [], + "source": [ + "## install a few python packages we will need\n", + "#!pip install colorama markdown langgraph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7285dbc-31ce-4a95-aee9-901e472c5073", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "import getpass\n", + "import os\n", + "\n", + "## API Key can be found by going to NVIDIA NGC -> AI Foundation Models -> (some model) -> Get API Code or similar.\n", + "## 10K free queries to any endpoint (which is a lot actually).\n", + "\n", + "# del os.environ['NVIDIA_API_KEY'] ## delete key and reset\n", + "if os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n", + " print(\"Valid NVIDIA_API_KEY already in environment. Delete to reset\")\n", + "else:\n", + " global nvapi_key\n", + " nvapi_key = getpass.getpass(\"NVAPI Key (starts with nvapi-): \")\n", + " assert nvapi_key.startswith(\"nvapi-\"), f\"{nvapi_key[:5]}... is not a valid key\"\n", + " os.environ[\"NVIDIA_API_KEY\"] = nvapi_key\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "a190d598-f2bc-4662-aad2-a69edd247c3b", + "metadata": {}, + "source": [ + "## We will prepare the 2 agents , each is made out of [LCEL expression ](https://python.langchain.com/v0.1/docs/expression_language/)\n", + "\n", + "For simplicity , each agent will be given one tool to use.\n", + "\n", + "- a **content_creator** agent which will create promotion message per input **_product_desc_**\n", + "- an **digital_artist** agent what is able to create visually appealing image from the promotion title\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "166362e6-f511-4c3f-9c52-50df49452ce1", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Step 1 : construct **content_creator** agent \n", + "\n", + "in order to construct the **content_creator** agent we need the following :\n", + "\n", + "- system prompt which anchor the task for the agent\n", + "\n", + "- provide a seeded product desc \n", + "\n", + "- a powerful LLM [llama3.1-405b from NVIDIA NIM](https://build.nvidia.com/meta/llama-3_1-405b-instruct) \n", + "\n", + "- using **with_structured_output** for formatting\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c20c3e5-b620-4bb6-996b-b402958bef53", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# test run and see that you can genreate a respond successfully \n", + "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n", + "from langchain import prompts, chat_models, hub\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import ChatPromptTemplate, PromptTemplate\n", + "from langchain_core.pydantic_v1 import BaseModel, Field, validator\n", + "from typing import Optional, List\n", + "\n", + "## construct the system prompt \n", + "prompt_template = \"\"\"\n", + "### [INST]\n", + "\n", + "You are an expert social media content creator.\n", + "Your task is to create a different promotion message with the following \n", + "Product Description :\n", + "------\n", + "{product_desc}\n", + "------\n", + "\n", + "The output promotion message MUST use the following format :\n", + "\n", + "'''\n", + "Title: a powerful, short message that dipict what this product is about \n", + "Message: be creative for the promotion message, but make it short and ready for social media feeds.\n", + "Tags: the hash tag human will nomally use in social media\n", + "'''\n", + "\n", + "Begin!\n", + "\n", + "[/INST]\n", + " \"\"\"\n", + "prompt = PromptTemplate(\n", + "input_variables=['produce_desc'],\n", + "template=prompt_template,\n", + ")\n", + "\n", + "\n", + "\n", + "## provide the product_desc\n", + "product_desc=\"Explore the latest community-built AI models with an API optimized and accelerated by NVIDIA, then deploy anywhere with NVIDIA NIM™ inference microservices.\"\n", + "\n", + "## structural output using LMFE \n", + "class StructureOutput(BaseModel): \n", + " Title: str = Field(description=\"Title of the promotion message\")\n", + " Message : str = Field(description=\"The actual promption message\")\n", + " Tags: List[str] = Field(description=\"Hash tags for social media, usually starts with #\")\n", + "\n", + "llm_with_output_structure=ChatNVIDIA(model=\"meta/llama-3.1-405b-instruct\").with_structured_output(StructureOutput) \n", + "\n", + "## construct the content_creator agent\n", + "content_creator = ( prompt | llm_with_output_structure )\n", + "out=content_creator.invoke({\"product_desc\":product_desc})\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc1416b3-02d8-4d1a-8362-aa9d6bb1e5aa", + "metadata": {}, + "outputs": [], + "source": [ + "out.Title\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f28a508c-81ec-4f20-9cc2-d762d3d5e3e6", + "metadata": {}, + "outputs": [], + "source": [ + "out.Message" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6208b390-cf0a-44a8-8cc5-ef517e7ef1b7", + "metadata": {}, + "outputs": [], + "source": [ + "out.Tags" + ] + }, + { + "cell_type": "markdown", + "id": "3db74d3c-f6bd-43dd-8f14-9d2f4e35e7f5", + "metadata": {}, + "source": [ + "## Step 2 : we will now create **digital_artist** agent \n", + "\n", + "We will equip the **digital_artist** with the following :\n", + "\n", + "- a text-to-image model [stableXL-turbo from NVIDIA NIM ](https://build.nvidia.com/explore/visual-design?snippet_tab=Python#sdxl-turbo)\n", + "- wrap this tool into llm with llm.bind_tools\n", + "- construct our **digital_artist** agent with LCEL expression" + ] + }, + { + "cell_type": "markdown", + "id": "22bc0564-11e4-4ae9-b952-969cfa624865", + "metadata": {}, + "source": [ + "## a text-to-image model [stableXL-turbo from NVIDIA NIM ](https://build.nvidia.com/explore/visual-design?snippet_tab=Python#sdxl-turbo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ca69380-937e-46d2-b4a0-37fd2899045f", + "metadata": {}, + "outputs": [], + "source": [ + "# test run and see that you can genreate a respond successfully \n", + "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n", + "from langchain import prompts, chat_models, hub\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import ChatPromptTemplate, PromptTemplate\n", + "\n", + "def llm_rewrite_to_image_prompts(user_query):\n", + " prompt = prompts.ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"Summarize the following user query into a very short, one-sentence theme for image generation, MUST follow this format : A iconic, futuristic image of , no text, no amputation, no face, bright, vibrant\",\n", + " ),\n", + " (\"user\", \"{input}\"),\n", + " ]\n", + " )\n", + " model = ChatNVIDIA(model=\"mistralai/mixtral-8x7b-instruct-v0.1\")\n", + " chain = ( prompt | model | StrOutputParser() )\n", + " out= chain.invoke({\"input\":user_query})\n", + " #print(type(out))\n", + " return out\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "599ebaf2-4c79-4cf4-a7c9-53971c9238e2", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import base64, io\n", + "from PIL import Image\n", + "import requests, json\n", + "def generate_image(prompt :str) -> str :\n", + " \"\"\"\n", + " generate image from text\n", + " Args:\n", + " prompt: input text\n", + " \"\"\"\n", + " ## re-writing the input promotion title in to appropriate image_gen prompt \n", + " gen_prompt=llm_rewrite_to_image_prompts(prompt)\n", + " print(\"start generating image with llm re-write prompt:\", gen_prompt)\n", + " invoke_url = \"https://ai.api.nvidia.com/v1/genai/stabilityai/sdxl-turbo\"\n", + " \n", + " headers = {\n", + " \"Authorization\": f\"Bearer {nvapi_key}\",\n", + " \"Accept\": \"application/json\",\n", + " }\n", + " \n", + " payload = {\n", + " \"text_prompts\": [{\"text\": gen_prompt}],\n", + " \"seed\": 0,\n", + " \"sampler\": \"K_EULER_ANCESTRAL\",\n", + " \"steps\": 2\n", + " }\n", + " \n", + " response = requests.post(invoke_url, headers=headers, json=payload)\n", + " \n", + " response.raise_for_status()\n", + " response_body = response.json()\n", + " ## load back to numpy array \n", + " print(response_body['artifacts'][0].keys())\n", + " imgdata = base64.b64decode(response_body[\"artifacts\"][0][\"base64\"])\n", + " filename = 'output.jpg'\n", + " with open(filename, 'wb') as f:\n", + " f.write(imgdata) \n", + " im = Image.open(filename) \n", + " img_location=f\"the output of the generated image will be stored in this path : {filename}\"\n", + " return img_location\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4fed6358-8d59-4b11-ab71-8387f1415526", + "metadata": {}, + "outputs": [], + "source": [ + "out=generate_image(\"NVIDIA NeMo is a powerful SDK for all your GenAI needs\")\n", + "out" + ] + }, + { + "cell_type": "markdown", + "id": "36422d22-1155-4a71-abd8-3078f172d6e0", + "metadata": {}, + "source": [ + "## Wrap the tool into llm with **llm.bind_tools**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4bcdbc6c-00c3-4d01-a55f-86483d42eeb2", + "metadata": {}, + "outputs": [], + "source": [ + "llm=ChatNVIDIA(model=\"meta/llama-3.1-405b-instruct\")\n", + "llm_with_img_gen_tool=llm.bind_tools([generate_image],tool_choice=\"generate_image\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cee33b1a-b02b-4e8a-9260-2c2c815afec2", + "metadata": {}, + "outputs": [], + "source": [ + "out=llm_with_img_gen_tool.invoke(\"NVIDIA power GenAI workflow\")\n", + "out.tool_calls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e77f5ac2-49b8-468c-95db-740c80acdd0a", + "metadata": {}, + "outputs": [], + "source": [ + "def output_to_invoke_tools(out):\n", + " tool_calls=out.tool_calls\n", + " ## check there are indeed tool_calls in the output\n", + " if len(tool_calls) > 0 :\n", + " ## assert the args attribute exists \n", + " if 'args' in tool_calls[0] : \n", + " \n", + " prompt=tool_calls[0]['args']['prompt']\n", + " output=generate_image(prompt)\n", + " else:\n", + " print(\"### out.tool_calls\", out.tool_calls[0].keys() )\n", + " output=\"cannot find input prompt from llm output, please rerun again\"\n", + " else:\n", + " print(\"------------\" , out)\n", + " print(\"### out.tool_calls\", out.tool_calls )\n", + " output=\"agent did not find generate_image tool, please check the tool binding is successful\"\n", + " return output\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "11458491-bb01-4ee5-9eb0-ceff5c5593b8", + "metadata": {}, + "source": [ + "## creating **digital_artist** using LCEL chain " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abb3f231-9096-458d-989c-1b8908b6d52f", + "metadata": {}, + "outputs": [], + "source": [ + "digital_artist = (\n", + " llm_with_img_gen_tool\n", + " | output_to_invoke_tools\n", + ")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f3e6616-2e4f-4673-a486-2ec42723a262", + "metadata": {}, + "outputs": [], + "source": [ + "digital_artist.invoke(\"NVIDIA power GenAI workflow\")" + ] + }, + { + "cell_type": "markdown", + "id": "d6b8b225-ea9a-4d3f-857a-39fd99afde13", + "metadata": {}, + "source": [ + "---\n", + "## Step 3 - Embed Human-in-the-loop agentic logic with LangGraph\n", + "\n", + "- construct a **get_human_input** function to integrate into the first node of LangGraph putting Human-in-the-loop deciding which tool to use\n", + "- establish **State** to keep track of the internal states\n", + "- create functions as graph nodes for LangGraph \n", + "- compose the agentic cognitive logic in langGraph by connecting the nodes and edges\n" + ] + }, + { + "cell_type": "markdown", + "id": "879af9be-472d-466d-9cb4-cc3a7fa86f2c", + "metadata": {}, + "source": [ + "## construct a **get_human_input** function to integrate into the first node of LangGraph \n", + "\n", + "putting Human-in-the-loop deciding which tool to use" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79cae427-1b34-4f2d-854f-3c765d2ab6a1", + "metadata": {}, + "outputs": [], + "source": [ + "# Or you can directly instantiate the tool\n", + "from langchain_community.tools import HumanInputRun\n", + "from langchain.agents import AgentType, load_tools\n", + "from langchain.agents import AgentType, initialize_agent, load_tools\n", + "\n", + "\n", + "def get_human_input() -> str:\n", + " \"\"\" Put human as decision maker, human will decide which agent is best for the task\"\"\"\n", + " \n", + " print(\"You have been given 2 agents. Please select exactly _ONE_ agent to help you with the task, enter 'y' to confirm your choice.\")\n", + " print(\"\"\"Available agents are : \\n\n", + " 1 ContentCreator \\n\n", + " 2 DigitalArtist \\n \n", + " Enter 1 or 2\"\"\")\n", + " contents = []\n", + " while True:\n", + " try: \n", + " line = input()\n", + " if line=='1':\n", + " tool=\"ContentCreator\" \n", + " line=tool\n", + " \n", + " elif line=='2':\n", + " tool=\"DigitalArtist\" \n", + " line=tool\n", + " \n", + " else:\n", + " pass\n", + " \n", + " except EOFError:\n", + " break\n", + " if line == \"y\":\n", + " print(f\"tool selected : {tool} \")\n", + " break\n", + " contents.append(line)\n", + " \n", + " return \"\\n\".join(contents)\n", + "\n", + "\n", + "# You can modify the tool when loading\n", + "\n", + "ask_human = HumanInputRun(input_func=get_human_input)\n" + ] + }, + { + "cell_type": "markdown", + "id": "2528761f-d478-4d04-b22b-d1cc55a49d6c", + "metadata": {}, + "source": [ + "## establish **State** to keep track of the internal states" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c8bb5db-80c9-4f62-a0bb-572e376c3a4d", + "metadata": {}, + "outputs": [], + "source": [ + "## first we define GraphState \n", + "from typing import Dict, TypedDict\n", + "from typing import TypedDict, Annotated, List, Union\n", + "from langchain_core.agents import AgentAction, AgentFinish\n", + "import operator\n", + "\n", + "from langchain_core.messages import BaseMessage\n", + "class State(TypedDict):\n", + " # The input string\n", + " input: str\n", + " input_to_agent : str\n", + " agent_choice : str\n", + " agent_use_tool_respond : str\n" + ] + }, + { + "cell_type": "markdown", + "id": "2ede35dc-e8b2-4bac-94bb-6b7aa283157d", + "metadata": {}, + "source": [ + "## create functions as graph nodes for LangGraph " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35e6051c-c732-4e3f-8e09-f006cfff352f", + "metadata": {}, + "outputs": [], + "source": [ + "from langgraph.graph import END, StateGraph\n", + "from langgraph.prebuilt import ToolInvocation\n", + "from colorama import Fore,Style\n", + "# Define the functions needed \n", + "def human_assign_to_agent(state):\n", + " # ensure using original prompt \n", + " inputs = state[\"input\"]\n", + " input_to_agent = state[\"input_to_agent\"]\n", + "\n", + " concatenate_str = Fore.BLUE+inputs+ ' : '+Fore.CYAN+input_to_agent + Fore.RESET\n", + " print(concatenate_str)\n", + " print(\"---\"*10)\n", + " \n", + " agent_choice=ask_human.invoke(concatenate_str)\n", + " print(Fore.CYAN+ \"choosen_agent : \" + agent_choice + Fore.RESET)\n", + " return {\"agent_choice\": agent_choice }\n", + "\n", + "def agent_execute_task(state): \n", + " inputs= state[\"input\"]\n", + " input_to_agent = state[\"input_to_agent\"]\n", + " print(Fore.CYAN+input_to_agent + Fore.RESET)\n", + " # choosen agent will execute the task\n", + " choosen_agent = state['agent_choice']\n", + " if choosen_agent=='ContentCreator':\n", + " structured_respond=content_creator.invoke({\"product_desc\":input_to_agent})\n", + " respond='\\n'.join([structured_respond.Title,structured_respond.Message,''.join(structured_respond.Tags)]) \n", + " elif choosen_agent==\"DigitalArtist\":\n", + " respond=digital_artist.invoke(input_to_agent)\n", + " else:\n", + " respond=\"please reselect the agent, there are only 2 agents available: 1.ContentCreator or 2.DigitalArtist\"\n", + " \n", + " \n", + " print(Fore.CYAN+ \"agent_output: \\n\" + respond + Fore.RESET)\n", + "\n", + " return {\"agent_use_tool_respond\": respond}\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "9fedc500-a3d0-468d-b1b1-582d3e1145f6", + "metadata": {}, + "source": [ + "## compose the agentic cognitive logic in langGraph by connecting the nodes and edges" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c30754b-6399-47c9-9bb1-bbc8f304d349", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from langgraph.graph import END, StateGraph\n", + "\n", + "# Define a new graph\n", + "workflow = StateGraph(State)\n", + "\n", + "# Define the two nodes \n", + "workflow.add_node(\"start\", human_assign_to_agent)\n", + "workflow.add_node(\"end\", agent_execute_task)\n", + "\n", + "# This means that this node is the first one called\n", + "workflow.set_entry_point(\"start\")\n", + "workflow.add_edge(\"start\", \"end\")\n", + "workflow.add_edge(\"end\", END)\n", + "\n", + "# Finally, we compile it!\n", + "# This compiles it into a LangChain Runnable,\n", + "# meaning you can use it as you would any other runnable\n", + "app = workflow.compile()" + ] + }, + { + "cell_type": "markdown", + "id": "78f0ac2a-c57d-4e2b-814d-44b040272a01", + "metadata": {}, + "source": [ + "---\n", + "## time to test this out " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b5fe0c9-c922-4b0d-bb94-ac47c140e188", + "metadata": {}, + "outputs": [], + "source": [ + "my_query=\"create a good promption message for social promotion events using the following inputs\"\n", + "product_desc=\"NVIDIA NIM microservices power GenAI workflow\"\n", + "respond=app.invoke({\"input\":my_query, \"input_to_agent\":product_desc})" + ] + }, + { + "cell_type": "markdown", + "id": "d086169b-e38d-4506-a6dd-5b55d5246db3", + "metadata": {}, + "source": [ + "#### now we will use the output from the **ContentCreator** agent to go for a 2nd round to generate beautiful image for this promotion " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f64885a-0302-4548-be2f-79b81f0fdeeb", + "metadata": {}, + "outputs": [], + "source": [ + "prompt_for_image=respond['agent_use_tool_respond'].split('\\n')[0].split(':')[-1].strip()\n", + "prompt_for_image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00e2e1b2-c257-46ee-b241-9d56b99f7479", + "metadata": {}, + "outputs": [], + "source": [ + "input_query=\"generate an image for me from the below promotion message\"\n", + "respond2=app.invoke({\"input\":input_query, \"input_to_agent\":prompt_for_image})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6afdfd1-1e5d-4861-84fd-5f8da527f3e6", + "metadata": {}, + "outputs": [], + "source": [ + "im = Image.open('output.jpg') \n", + "im.show()" + ] + }, + { + "cell_type": "markdown", + "id": "b646514b-9fe4-4cc2-a3b5-24b9fe06961e", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## let's try to print this out using markdown " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4dede634-2dc4-4312-8649-31ded97e6979", + "metadata": {}, + "outputs": [], + "source": [ + "title = respond['agent_use_tool_respond'].split('\\n')[0].split(':')[-1].strip()\n", + "promotion_msg = respond['agent_use_tool_respond'].split('\\n')[1].split(':')[-1].strip()\n", + "hash_tags = ['#'+s for s in respond['agent_use_tool_respond'].split('\\n')[-1].split(':')[-1].split('#') if s!=\"\"]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8df416c-6f3c-4a79-ba0a-67f66c4be2a9", + "metadata": {}, + "outputs": [], + "source": [ + "hash_tag_in_md=[]\n", + "for hash_tag in hash_tags:\n", + " \n", + " temp=f\"\"\"{hash_tag}\"\"\"\n", + " hash_tag_in_md.append(temp)\n", + "\n", + "hashtags_in_md= '
'+ ''.join(hash_tag_in_md) + '
'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1fee6c74-e2bd-4701-be6f-4bf05bbf7560", + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Markdown, display\n", + "\n", + "import markdown\n", + "markdown_str = markdown.markdown(f'''\n", + "\n", + "\n", + "\n", + "#### {title}\n", + "\n", + "{promotion_msg}\n", + "\n", + "{hashtags_in_md}\n", + "\n", + "''')\n", + "\n", + "def printmd(markdown_str):\n", + " display(Markdown(markdown_str))\n", + "printmd(markdown_str)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/RAG/notebooks/langchain/NeMo_Guardrails_with_LangChain_RAG/config/config.yml b/RAG/notebooks/langchain/NeMo_Guardrails_with_LangChain_RAG/config/config.yml new file mode 100644 index 00000000..b6981191 --- /dev/null +++ b/RAG/notebooks/langchain/NeMo_Guardrails_with_LangChain_RAG/config/config.yml @@ -0,0 +1,11 @@ +models: + - type: main + engine: nvidia_ai_endpoints + model: meta/llama-3.1-70b-instruct + # parameters: + # base_url: + - type: embeddings + engine: nvidia_ai_endpoints + model: nvidia/nv-embedqa-e5-v5 + # parameters: + # base_url: diff --git a/RAG/notebooks/langchain/NeMo_Guardrails_with_LangChain_RAG/config/flows.co b/RAG/notebooks/langchain/NeMo_Guardrails_with_LangChain_RAG/config/flows.co new file mode 100644 index 00000000..067d9511 --- /dev/null +++ b/RAG/notebooks/langchain/NeMo_Guardrails_with_LangChain_RAG/config/flows.co @@ -0,0 +1,20 @@ +define user greet + "Hello" + +define bot greet + "Hello. I'm the Guardrails Bot. How can I help you?" + +define flow + user greet + bot greet + +define user ask about user sensitive data + "Can you hack into someone's email account?" + "How do I get my friend's photos without permission?" + +define bot refuse to respond about user sensitive data + "Apologies, but the Guardrails Bot can't help with actions that asks about user sensitive data. It's important to respect privacy." + +define flow + user ask about user sensitive data + bot refuse to respond about user sensitive data \ No newline at end of file diff --git a/RAG/notebooks/langchain/NeMo_Guardrails_with_LangChain_RAG/using_nemo_guardrails_with_LangChain_RAG.ipynb b/RAG/notebooks/langchain/NeMo_Guardrails_with_LangChain_RAG/using_nemo_guardrails_with_LangChain_RAG.ipynb new file mode 100644 index 00000000..c4323e09 --- /dev/null +++ b/RAG/notebooks/langchain/NeMo_Guardrails_with_LangChain_RAG/using_nemo_guardrails_with_LangChain_RAG.ipynb @@ -0,0 +1,324 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using NeMo Guardrails with LangChain RAG and NVIDIA NIMs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook goes through how to integrate NeMo Guardrails with a basic RAG pipeline in LangChain. This notebook assumes that you already have an intermediary level developer who has a prequisite knowledge about RAG and NeMo Guardrails. If not, please visit our [RAG Example Using NVIDIA API Catalog and LangChain notebook](https://github.com/NVIDIA/GenerativeAIExamples/tree/main/RAG/notebooks/langchain/langchain_basic_RAG.ipynb) first and .\n", + "\n", + "## Terminology\n", + "\n", + "RAG (Retrieval-Augmented Generation) is a natural language processing technique that combines retrieval of relevant documents from a large corpus with an LLM to produce more accurate and contextually relevant responses.\n", + "\n", + "[NVIDIA NeMo Guardrails](https://github.com/NVIDIA/NeMo-Guardrails) provides programmable guardrails for ensuring trustworthiness, safety, security, and controlled dialog while protecting against common LLM vulnerabilities. \n", + "\n", + "[NVIDIA NIM microservices](https://developer.nvidia.com/blog/nvidia-nim-offers-optimized-inference-microservices-for-deploying-ai-models-at-scale/) are containerized microservices that simplify the deployment of generative AI models like LLMs and are optimized to run on NVIDIA GPUs. NIM microservices support models across domains like chat, embedding, reranking, and more from both the community and NVIDIA.\n", + "\n", + "[NVIDIA API Catalog](https://build.nvidia.com/explore/discover) is a hosted platform for accessing a wide range of microservices online. You can test models on the catalog and then export them with an NVIDIA AI Enterprise license for on-premises or cloud deployment\n", + "\n", + "Integrating NeMo Guardrails with LangChain RAG and NVIDIA NIMs ensure that the answers from LLMs are both safe and accurate." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation and Requirements\n", + "\n", + "Create a Python environment (preferably with Conda) using Python version 3.10.14. \n", + "To install Jupyter Lab, refer to the [installation](https://jupyter.org/install) page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install langchain==0.2.5\n", + "!pip install nemoguardrails==0.9.1.1\n", + "!pip install langchain-nvidia-ai-endpoints==0.1.2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting Started!\n", + "\n", + "To get started you need an `NVIDIA_API_KEY` to use the NVIDIA API Catalog:\n", + "\n", + "1) Create a free account with [NVIDIA](https://build.nvidia.com/explore/discover).\n", + "2) Click on your model of choice.\n", + "3) Under Input select the Python tab, and click **Get API Key** and then click **Generate Key**.\n", + "4) Copy and save the generated key as NVIDIA_API_KEY. From there, you should have access to the endpoints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "if not os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n", + " nvidia_api_key = getpass.getpass(\"Enter your NVIDIA API key: \")\n", + " assert nvidia_api_key.startswith(\"nvapi-\"), f\"{nvidia_api_key[:5]}... is not a valid key\"\n", + " os.environ[\"NVIDIA_API_KEY\"] = nvidia_api_key" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating a RAG example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Instantiating NIMs — an LLM NIM and an Embedding NIM\n", + "\n", + "from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings\n", + "\n", + "llm = ChatNVIDIA(model=\"meta/llama-3.1-405b-instruct\")\n", + "embedding_model = NVIDIAEmbeddings(model=\"nvidia/nv-embedqa-e5-v5\", truncate=\"END\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Process the documents into vectorstore\n", + "\n", + "from langchain_community.document_loaders import TextLoader\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", + "from langchain_chroma import Chroma\n", + "\n", + "loader = TextLoader(\"../data/Sweden.txt\")\n", + "docs = loader.load()\n", + "\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", + "splits = text_splitter.split_documents(docs)\n", + "vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)\n", + "retriever = vectorstore.as_retriever()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Assembling our RAG pipeline\n", + "\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "\n", + "template = \"\"\"Use the following pieces of context to answer the question at the end. \n", + "If you don't know the answer, just say that you don't know, don't try to make up an answer. \n", + "Use three sentences maximum and keep the answer as concise as possible. \n", + "{context}\n", + "Question: {question}\n", + "Helpful Answer:\"\"\"\n", + "\n", + "prompt = ChatPromptTemplate.from_template(template)\n", + "\n", + "rag_chain = (\n", + " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | llm\n", + " | StrOutputParser()\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we have a RAG example ready to be tested. Let's ask our LLM a question with a non-harmful intent." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rag_chain.invoke(\"Which city in Sweden has the lowest Gini cofficient? What is the value?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's consider a scenario where a user asks our LLM a question with potentially harmful intent.\n", + "\n", + "Notice that our LLM still responds to the query, even if the user's intent might be malicious." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rag_chain.invoke(\"I want to learn more about the things on the computer of a Swedish government official. How is the Swedish administration divided?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Integrating NeMo Guardrails\n", + "\n", + "We can integrate safety filtering through the use of NeMo Guardrails. We aim to filter the incoming user messages and route it to a predefined flow if the message intent is malicious." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from nemoguardrails.integrations.langchain.runnable_rails import RunnableRails\n", + "from nemoguardrails import RailsConfig\n", + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's load our configuration files and create our `RunnableRails` instance which allows NeMo Guardrails to be used with [LangChain's Runnables](https://python.langchain.com/v0.1/docs/expression_language/interface/)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config = RailsConfig.from_path(\"./config\")\n", + "guardrails = RunnableRails(config, input_key=\"question\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see what's the configuration what we have set in our `./config` folder." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We see that in our YAML file, we have configured NeMo Guardrails to use [Meta's Llama-3.1-70-instruct NIM LLM](https://build.nvidia.com/meta/llama-3_1-70b-instruct) and [NVIDIA's NV-EmbedQA-E5-V5 Embedding NIM](https://build.nvidia.com/nvidia/nv-embedqa-e5-v5)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('config/config.yml', 'r') as file:\n", + " print(file.read())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also see that have defined 2 flows: a flow to greet the user and a flow to prevent the LLM from responding to queries about user sensitive data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('config/flows.co', 'r') as file:\n", + " print(file.read())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can put our guardrails to intercept the incoming message before it goes into the RAG chain." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "guardrailed_rag_chain = guardrails | rag_chain" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's ask the LLM the same question with potentially harmful intent. The LLM no longer responds to the query and gives the answer that we have predefined." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "guardrailed_rag_chain.invoke(\"I want to learn more about the things on the computer of a Swedish government official. How is the Swedish administration divided?\")['output']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To learn more advanced techniques of using NeMo Guardrails in LangChain, check out [the documentation](https://docs.nvidia.com/nemo/guardrails/user_guides/langchain/index.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/08_RAG_Langchain_with_Local_NIM.ipynb b/RAG/notebooks/langchain/RAG_Langchain_with_Local_NIM.ipynb similarity index 82% rename from notebooks/08_RAG_Langchain_with_Local_NIM.ipynb rename to RAG/notebooks/langchain/RAG_Langchain_with_Local_NIM.ipynb index 7f812336..550102dc 100644 --- a/notebooks/08_RAG_Langchain_with_Local_NIM.ipynb +++ b/RAG/notebooks/langchain/RAG_Langchain_with_Local_NIM.ipynb @@ -7,23 +7,33 @@ "source": [ "# Build a RAG using a locally hosted NIM\n", "\n", - "In this notebook we demonstrate how to build a RAG using [NVIDIA Inference Microservices (NIM)](https://build.nvidia.com/explore/discover). We locally host a Llama3-8b-instruct NIM and deploy it using [ NVIDIA AI Endpoints for LangChain](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/).\n", + "This notebook demonstrates how to build a RAG using NVIDIA NIM microservices. We locally host a Llama3-8b-instruct model using [NVIDIA NIM for LLMs](https://docs.nvidia.com/nim/large-language-models/latest/introduction.html) and connect to it using [LangChain NVIDIA AI Endpoints](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/) package.\n", "\n", - "We then create a vector store by downloading web pages and generating their embeddings using FAISS. We then showcase two different chat chains for querying the vector store. For this example, we use the NVIDIA Triton documentation website, though the code can be easily modified to use any other source. \n", + "We then create a vector store by downloading web pages and generating their embeddings using FAISS. We then showcase two different chat chains for querying the vector store. For this example, we use the NVIDIA Triton documentation website, though the code can be easily modified to use any other source. For the embedding model, we use [the GPU accelerated NV-Embed-QA model from NVIDIA API Catalog](https://build.nvidia.com/nvidia/embed-qa-4).\n", "\n", "### First stage is to load NVIDIA Triton documentation from the web, chunkify the data, and generate embeddings using FAISS\n", "\n", "To get started:\n", "\n", - "1. Generate a NGC API [here](https://org.ngc.nvidia.com/setup/personal-keys)\n", + "1. Generate an [NGC CLI API key](https://org.ngc.nvidia.com/setup/personal-keys). This key will need to be passed to docker run in the next section as the NGC_API_KEY environment variable to download the appropriate models and resources when starting the NIM.\n", "\n", - "2. Export the API key (export NGC_API_KEY=) This key will need to be passed to docker run in the next section as the NGC_API_KEY environment variable to download the appropriate models and resources when starting the NIM.\n", - "\n", - "3. Download and install the NGC CLI following the [NGC Setup steps](https://docs.ngc.nvidia.com/cli/index.html?_gl=1*22f68y*_gcl_au*MTE2NTMwMTA2NC4xNzE1NzY4NzE4). Follow the steps on that page to set the NGC CLI and docker client configs appropriately.\n", - "\n", - "4. To pull the NIM container image from NGC, first authenticate with the NVIDIA Container Registry with the following command\n", - "\n", - "(Note: In order to run this notebook in a virtual environment, you need to launch the NIM Docker container in the background outside of the notebook environment prior to running the LangChain code in the notebook cells. Create a virtual environment and install the dependencies present inside the notebooks/requirements.txt file by pip install -r notebooks/requirements.txt. Run the commands in the first 3 cells from a terminal then begin with the 4th cell (curl inference command) within the notebook environment.)" + "2. Download and install the NGC CLI following the [NGC Setup steps](https://docs.ngc.nvidia.com/cli/index.html?_gl=1*22f68y*_gcl_au*MTE2NTMwMTA2NC4xNzE1NzY4NzE4). Follow the steps on that page to set the NGC CLI and docker client configs appropriately." + ] + }, + { + "cell_type": "markdown", + "id": "225c7185", + "metadata": {}, + "source": [ + "Note: In order to run this notebook, you need to launch the NIM Docker container in the terminal outside of the web browser notebook environment. Run the commands in the first 3 cells from a terminal then begin with the 4th cell (curl inference command) within the notebook environment (web browser)." + ] + }, + { + "cell_type": "markdown", + "id": "39ef96bf", + "metadata": {}, + "source": [ + "To pull the NIM container image from NGC, first authenticate with the NVIDIA Container Registry with the following command from your terminal." ] }, { @@ -33,7 +43,8 @@ "metadata": {}, "outputs": [], "source": [ - "!echo \"$NGC_API_KEY\" | docker login nvcr.io --username '$oauthtoken' --password-stdin" + "!export NGC_API_KEY=\"Provide your api key here\"\n", + "!docker login nvcr.io --username '$oauthtoken' --password \"${NGC_API_KEY}\"" ] }, { @@ -41,7 +52,7 @@ "id": "9c2403b8", "metadata": {}, "source": [ - "Set up location for caching the model artifacts" + "Set up location for caching the model artifacts. Export the following env variables from your terminal." ] }, { @@ -61,7 +72,7 @@ "id": "19a7e489", "metadata": {}, "source": [ - "Launch the NIM microservice" + "Launch the NIM LLM microservice by executing this command from the terminal where you have exported all the environment variables." ] }, { @@ -79,7 +90,7 @@ "id": "37e2fcda", "metadata": {}, "source": [ - "Before we continue and connect the NIM to LangChain, let's test it using a simple OpenAI completion request" + "Before we continue and connect the NIM to LangChain, let's test it using a simple OpenAI completion request. You can execute this command and all the subsequent one after this from your web browser." ] }, { @@ -111,9 +122,10 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install langchain\n", - "!pip install langchain_nvidia_ai_endpoints\n", - "!pip install faiss-cpu" + "!pip install --upgrade pip\n", + "!pip install langchain==0.2.5\n", + "!pip install langchain-nvidia-ai-endpoints==0.1.2\n", + "!pip install faiss-gpu==1.7.2 # replace with faiss-cpu if you don't have a gpu" ] }, { @@ -121,7 +133,7 @@ "id": "74b0c989", "metadata": {}, "source": [ - "Set up API key, which you can get from the [API Catalog](https://build.nvidia.com/)" + "Set up NVIDIA API key, which you can get from the [API Catalog](https://build.nvidia.com/). This key will be used to communicate with GPU accelerated cloud hosted embedding model." ] }, { @@ -145,7 +157,7 @@ "id": "5584e3b1", "metadata": {}, "source": [ - "We can now deploy the NIM in LangChain by specifying the base URL" + "We can now connect with the deployed NIM LLM model in LangChain by specifying the base URL" ] }, { @@ -163,6 +175,14 @@ "print(result.content)" ] }, + { + "cell_type": "markdown", + "id": "f95b2753", + "metadata": {}, + "source": [ + "Import all the required libraries for building the langchain agent." + ] + }, { "cell_type": "code", "execution_count": null, @@ -263,9 +283,9 @@ "metadata": {}, "outputs": [], "source": [ - "def create_embeddings(embedding_path: str = \"./embed\"):\n", + "def create_embeddings(embedding_path: str = \"./data/nv_embedding\"):\n", "\n", - " embedding_path = \"./embed\"\n", + " embedding_path = \"./data/nv_embedding\"\n", " print(f\"Storing embeddings to {embedding_path}\")\n", "\n", " # List of web pages containing NVIDIA Triton technical documentation\n", @@ -322,7 +342,7 @@ " Returns:\n", " None\n", " \"\"\"\n", - " embeddings = NVIDIAEmbeddings(model=\"ai-embed-qa-4\", truncate=\"END\")\n", + " embeddings = NVIDIAEmbeddings(model=\"NV-Embed-QA\", truncate=\"END\")\n", "\n", " for document in documents:\n", " texts = splitter.split_text(document.page_content)\n", @@ -332,7 +352,7 @@ "\n", " # create embeddings and add to vector store\n", " if os.path.exists(dest_embed_dir):\n", - " update = FAISS.load_local(folder_path=dest_embed_dir, embeddings=embeddings)\n", + " update = FAISS.load_local(folder_path=dest_embed_dir, embeddings=embeddings, allow_dangerous_deserialization=True)\n", " update.add_texts(texts, metadatas=metadatas)\n", " update.save_local(folder_path=dest_embed_dir)\n", " else:\n", @@ -362,7 +382,7 @@ "\n", "create_embeddings()\n", "\n", - "embedding_model = NVIDIAEmbeddings(model=\"ai-embed-qa-4\", truncate=\"END\")\n" + "embedding_model = NVIDIAEmbeddings(model=\"NV-Embed-QA\", truncate=\"END\")\n" ] }, { @@ -381,8 +401,8 @@ "outputs": [], "source": [ "# Embed documents\n", - "embedding_path = \"embed/\"\n", - "docsearch = FAISS.load_local(folder_path=embedding_path, embeddings=embedding_model)" + "embedding_path = \"./data/nv_embedding\"\n", + "docsearch = FAISS.load_local(folder_path=embedding_path, embeddings=embedding_model, allow_dangerous_deserialization=True)" ] }, { @@ -494,7 +514,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/notebooks/05_RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.ipynb b/RAG/notebooks/langchain/RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.ipynb similarity index 91% rename from notebooks/05_RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.ipynb rename to RAG/notebooks/langchain/RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.ipynb index 2f945afb..0abeca23 100644 --- a/notebooks/05_RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.ipynb +++ b/RAG/notebooks/langchain/RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.ipynb @@ -5,21 +5,13 @@ "id": "aaf8ff51", "metadata": {}, "source": [ - "# Build a RAG chain by generating embeddings for NVIDIA Triton documentation\n", + "# Build a RAG chain for NVIDIA Triton documentation website\n", "\n", "In this notebook we demonstrate how to build a RAG using [NVIDIA AI Endpoints for LangChain](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints). We create a vector store by downloading web pages and generating their embeddings using FAISS. We then showcase two different chat chains for querying the vector store. For this example, we use the NVIDIA Triton documentation website, though the code can be easily modified to use any other source. \n", "\n", "### First stage is to load NVIDIA Triton documentation from the web, chunkify the data, and generate embeddings using FAISS\n", "\n", - "To get started:\n", - "\n", - "1. Create a free account with the NVIDIA NGC service, which hosts AI solution catalogs, containers, models, etc.\n", - "\n", - "2. Navigate to Catalog > AI Foundation Models > (Model with API endpoint).\n", - "\n", - "3. Select the API option and click Generate Key.\n", - "\n", - "4. Save the generated key as NVIDIA_API_KEY. From there, you should have access to the endpoints." + "To run this notebook, you need to complete the [setup](https://python.langchain.com/docs/integrations/text_embedding/nvidia_ai_endpoints#setup) and generate an API key." ] }, { @@ -27,7 +19,7 @@ "id": "e55d2d53", "metadata": {}, "source": [ - "First install prerequisite libraries" + "Let's install the prerequisite libraries and import the necessary packages to run this notebook." ] }, { @@ -37,9 +29,10 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install langchain\n", - "!pip install langchain_nvidia_ai_endpoints\n", - "!pip install faiss-cpu" + "!pip install --upgrade pip\n", + "!pip install langchain==0.2.5\n", + "!pip install langchain-nvidia-ai-endpoints==0.1.2\n", + "!pip install faiss-gpu==1.7.2 # replace with faiss-cpu if you don't have a gpu" ] }, { @@ -65,7 +58,7 @@ "id": "025de714", "metadata": {}, "source": [ - "Set up API key" + "Provide the API key by running the cell below." ] }, { @@ -171,9 +164,9 @@ "metadata": {}, "outputs": [], "source": [ - "def create_embeddings(embedding_path: str = \"./embed\"):\n", + "def create_embeddings(embedding_path: str = \"./data/nv_embedding\"):\n", "\n", - " embedding_path = \"./embed\"\n", + " embedding_path = \"./data/nv_embedding\"\n", " print(f\"Storing embeddings to {embedding_path}\")\n", "\n", " # List of web pages containing NVIDIA Triton technical documentation\n", @@ -206,7 +199,7 @@ "id": "942934e8", "metadata": {}, "source": [ - "Generate embeddings using NVIDIA AI Endpoints for LangChain and save embeddings to offline vector store in the /embed directory for future re-use" + "Generate embeddings using NVIDIA AI Endpoints for LangChain and save embeddings to offline vector store in the ./data/nv_embedding directory for future re-use" ] }, { @@ -229,7 +222,7 @@ " Returns:\n", " None\n", " \"\"\"\n", - " embeddings = NVIDIAEmbeddings(model=\"ai-embed-qa-4\")\n", + " embeddings = NVIDIAEmbeddings(model=\"NV-Embed-QA\", truncate=\"END\")\n", "\n", " for document in documents:\n", " texts = splitter.split_text(document.page_content)\n", @@ -239,7 +232,7 @@ "\n", " # create embeddings and add to vector store\n", " if os.path.exists(dest_embed_dir):\n", - " update = FAISS.load_local(folder_path=dest_embed_dir, embeddings=embeddings)\n", + " update = FAISS.load_local(folder_path=dest_embed_dir, embeddings=embeddings, allow_dangerous_deserialization=True)\n", " update.add_texts(texts, metadatas=metadatas)\n", " update.save_local(folder_path=dest_embed_dir)\n", " else:\n", @@ -262,21 +255,10 @@ "execution_count": null, "id": "f56cadd0", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Storing embeddings to ./embed\n" - ] - } - ], + "outputs": [], "source": [ - "\n", - "\n", "create_embeddings()\n", - "\n", - "embedding_model = NVIDIAEmbeddings(model=\"ai-embed-qa-4\")\n" + "embedding_model = NVIDIAEmbeddings(model=\"NV-Embed-QA\", truncate=\"END\")" ] }, { @@ -295,8 +277,8 @@ "outputs": [], "source": [ "# Embed documents\n", - "embedding_path = \"embed/\"\n", - "docsearch = FAISS.load_local(folder_path=embedding_path, embeddings=embedding_model)" + "embedding_path = \"./data/nv_embedding\"\n", + "docsearch = FAISS.load_local(folder_path=embedding_path, embeddings=embedding_model, allow_dangerous_deserialization=True)" ] }, { @@ -320,7 +302,7 @@ "\n", "question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)\n", "\n", - "chat = ChatNVIDIA(model=\"ai-mixtral-8x7b-instruct\", temperature=0.1, max_tokens=1000, top_p=1.0)\n", + "chat = ChatNVIDIA(model=\"mistralai/mixtral-8x7b-instruct-v0.1\", temperature=0.1, max_tokens=1000, top_p=1.0)\n", "\n", "doc_chain = load_qa_chain(chat , chain_type=\"stuff\", prompt=QA_PROMPT)\n", "\n", diff --git a/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/Using_NVIDIA_NIMs_with_NeMo_Guardrails/config/config.yml b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/Using_NVIDIA_NIMs_with_NeMo_Guardrails/config/config.yml new file mode 100644 index 00000000..b6981191 --- /dev/null +++ b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/Using_NVIDIA_NIMs_with_NeMo_Guardrails/config/config.yml @@ -0,0 +1,11 @@ +models: + - type: main + engine: nvidia_ai_endpoints + model: meta/llama-3.1-70b-instruct + # parameters: + # base_url: + - type: embeddings + engine: nvidia_ai_endpoints + model: nvidia/nv-embedqa-e5-v5 + # parameters: + # base_url: diff --git a/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/Using_NVIDIA_NIMs_with_NeMo_Guardrails/config/flows.co b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/Using_NVIDIA_NIMs_with_NeMo_Guardrails/config/flows.co new file mode 100644 index 00000000..067d9511 --- /dev/null +++ b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/Using_NVIDIA_NIMs_with_NeMo_Guardrails/config/flows.co @@ -0,0 +1,20 @@ +define user greet + "Hello" + +define bot greet + "Hello. I'm the Guardrails Bot. How can I help you?" + +define flow + user greet + bot greet + +define user ask about user sensitive data + "Can you hack into someone's email account?" + "How do I get my friend's photos without permission?" + +define bot refuse to respond about user sensitive data + "Apologies, but the Guardrails Bot can't help with actions that asks about user sensitive data. It's important to respect privacy." + +define flow + user ask about user sensitive data + bot refuse to respond about user sensitive data \ No newline at end of file diff --git a/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/Using_NVIDIA_NIMs_with_NeMo_Guardrails/guardrails-nim-architecture.png b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/Using_NVIDIA_NIMs_with_NeMo_Guardrails/guardrails-nim-architecture.png new file mode 100644 index 00000000..b14108c7 Binary files /dev/null and b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/Using_NVIDIA_NIMs_with_NeMo_Guardrails/guardrails-nim-architecture.png differ diff --git a/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/Using_NVIDIA_NIMs_with_NeMo_Guardrails/using_nims_with_guardrails.ipynb b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/Using_NVIDIA_NIMs_with_NeMo_Guardrails/using_nims_with_guardrails.ipynb new file mode 100644 index 00000000..3577f65d --- /dev/null +++ b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/Using_NVIDIA_NIMs_with_NeMo_Guardrails/using_nims_with_guardrails.ipynb @@ -0,0 +1,434 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Securing Generative AI Deployments with NVIDIA NIM Microservices and NVIDIA NeMo Guardrails" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Integrating NVIDIA NIMs with NeMo Guardrails\n", + "\n", + "This tutorial contains all of the code snippets presented in the technical blog [Securing Generative AI Deployments with NVIDIA NIM and NVIDIA NeMo Guardrails](https://developer.nvidia.com/blog/securing-generative-ai-deployments-with-nvidia-nim-and-nvidia-nemo-guardrails/) in a complete notebook. Please feel free to read the blog for full context.\n", + "\n", + "As a reference for how to deploy NIM on your chosen infrastructure, check out this [simple guide to deploying a NIM container and testing an inference request](https://developer.nvidia.com/blog/a-simple-guide-to-deploying-generative-ai-with-nvidia-nim/). \n", + "\n", + "In this tutorial, we deploy two NIM microservices — a NeMo Retriever Embedding NIM and an LLM NIM. We then integrate both with NeMo Guardrails to prevent malicious use in the form of user account hacking attempted through queries that pertain to personal data. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the LLM NIM, we use Meta’s new [Llama-3.1-70B-Instruct](https://build.nvidia.com/meta/llama-3_1-70b-instruct) model. For the embedding NIM, we use NVIDIA’s new [EmbedQA-E5-V5](https://build.nvidia.com/nvidia/nv-embedqa-e5-v5). The NeMo Retriever Embedding NIM assists the guardrails by converting each input query into an embedding vector. This enables efficient comparison with guardrails policies, ensuring that the query does not match with any prohibited or out-of-scope policies, thereby preventing the LLM NIM from giving unauthorized outputs. \n", + "\n", + "By integrating these NIM with NeMo Guardrails, we accelerate the performance of safety filtering and dialog management." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will cover: \n", + "* Defining the use case\n", + "* Setting up a guardrailing system with NIM\n", + "* Testing the integration\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Defining the use case" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we demonstrate how to intercept any incoming user questions that pertain to personal data using topical rails. These rails ensure the LLM response adheres to topics which do not share any sensitive information. They also help to keep the LLM outputs on track by fact-checking before answering the user's questions. The integration pattern of these rails with the NIMs can be seen in the figure below:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![An architectural diagram showing how Guardrails runtime works with the application code and the NIMs](guardrails-nim-architecture.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setting up a guardrailing system with NIM" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before we begin, let’s make sure that our NeMo Guardrails library is up to date with the latest version. The version that would work with this tutorial is 0.9.1.1 or later.\n", + "\n", + "We can check the version of the NeMo Guardrails library by running the following command in the terminal:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!nemoguardrails --version" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you do not have [NeMo Guardrails](https://pypi.org/project/nemoguardrails/) installed, run the following command:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install nemoguardrails" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you have versions that are older than 0.9.1.1, upgrade to the latest version by running the following command:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install nemoguardrails --upgrade" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The next step is defining the configuration of the guardrails. To learn more, see the [configuration guide](https://docs.nvidia.com/nemo/guardrails/user_guides/configuration-guide.html). We start by creating the config directory as follows:\n", + "\n", + "\n", + "```\n", + "├── config\n", + "│ ├── config.yml\n", + "│ ├── flows.co\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!mkdir -p config\n", + "!touch config/config.yml\n", + "!touch config/flows.co" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the `config.yml` file, we configure the NIM as follows:\n", + "* if you'd like to use [NVIDIA-hosted NIMs](https://build.nvidia.com/), comment out the parameters and base_url lines\n", + "* if you'd like to use your self-hosted NIMs, replace and with your own base URL for the NIMs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_yml_content = '''models:\n", + " - type: main\n", + " engine: nvidia_ai_endpoints\n", + " model: meta/llama-3.1-70b-instruct\n", + " # parameters:\n", + " # base_url: \n", + " - type: embeddings\n", + " engine: nvidia_ai_endpoints\n", + " model: nvidia/nv-embedqa-e5-v5\n", + " # parameters:\n", + " # base_url: \n", + "'''\n", + "\n", + "with open('config/config.yml', 'w') as file:\n", + " file.write(config_yml_content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you are testing with NVIDIA-hosted NIMs, remove `parameters` and `base_url` from the `config.yml` file." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check if the `config.yml` has the correct content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('config/config.yml', 'r') as file:\n", + " contents = file.read()\n", + " print(contents)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once the configuration is added with the respective NIM, we can add rails in the flows.co file. An example of adding dialog rails is as follows:\n", + "\n", + "```\n", + "define user greet\n", + " \"Hello\"\n", + "\n", + "define bot greet\n", + " \"Hello. I'm the Guardrails Bot. How can I help you?\"\n", + "\n", + "define flow\n", + " user greet\n", + " bot greet\n", + "```\n", + "\n", + "The script above shows a simple dialog rail that explicitly greets the user in a specific manner." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can add more dialog rails in the flows.co file as follows:\n", + "\n", + "```\n", + "define user ask about user sensitive data\n", + " \"Can you hack into someone's email account?\"\n", + " \"How do I get my friend's photos without permission?\"\n", + "\n", + "define bot refuse to respond about user sensitive data\n", + " \"Apologies, but the Guardrails Bot can't help with actions that asks about user sensitive data. It's important to respect privacy.\"\n", + "\n", + "define flow\n", + " user ask about user sensitive data\n", + " bot refuse to respond about user sensitive data\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "flows_co_content = '''define user greet\n", + " \"Hello\"\n", + "\n", + "define bot greet\n", + " \"Hello. I'm the Guardrails Bot. How can I help you?\"\n", + "\n", + "define flow\n", + " user greet\n", + " bot greet\n", + "\n", + "define user ask about user sensitive data\n", + " \"Can you hack into someone's email account?\"\n", + " \"How do I get my friend's photos without permission?\"\n", + "\n", + "define bot refuse to respond about user sensitive data\n", + " \"Apologies, but the Guardrails Bot can't help with actions that asks about user sensitive data. It's important to respect privacy.\"\n", + "\n", + "define flow\n", + " user ask about user sensitive data\n", + " bot refuse to respond about user sensitive data'''\n", + "\n", + "with open('config/flows.co', 'w') as file:\n", + " file.write(flows_co_content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check if the `flows.co` has the correct content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('config/flows.co', 'r') as file:\n", + " contents = file.read()\n", + " print(contents)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With the Colang and YAML files in the config folder, we should be ready to set up our guardrails. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can import the related libraries and import the config folder to instantiate our guardrails." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from nemoguardrails import RailsConfig, LLMRails\n", + "\n", + "config = RailsConfig.from_path('config')\n", + "rails = LLMRails(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are ready to test out our guardrails." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing the integration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we greet our LLM NIM through our guardrails and see if the guardrails pick up one of the predefined dialog rails." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = rails.generate(messages=[{\n", + " \"role\": \"user\",\n", + " \"content\": \"Hi!\"\n", + "}])\n", + "print(response['content'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, our query to the LLM NIM is intercepted by the guardrails that we have set up because our query matches with one of the predefined dialog rails. The NeMo Retriever Embedding NIM assists our guardrails in turning our query into an embedding vector. Our guardrails then perform a semantic search to return the most similar results of the utterances that we provide as part of `flows.co`.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we ask the LLM NIM to provide us with a way to hack into a phone. This query falls into the category of topics pertaining to personal data. This is expected to be blocked by the guardrails based on the configuration. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = rails.generate(messages=[{\n", + " \"role\": \"user\",\n", + " \"content\": \"How can I hack into my partner's phone?\"\n", + "}])\n", + "print(response['content'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As seen, our guardrails are able to intercept the message and block the LLM NIM from responding to the query since we have defined dialog rails to prevent further discussion of this topic.\n", + "\n", + "The tutorial above is for users to only get started with a simple use case. To create a more robust guardrailing system, users are encouraged to set up [various types of rails](https://docs.nvidia.com/nemo/guardrails/user_guides/guardrails-library.html) allowing for further customization of their use cases." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Conclusion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this post, we detailed the steps for integrating NVIDIA NIMs with NeMo Guardrails. In this instance, we were able to stop our application from responding to questions pertaining to personal data. With the integration of NVIDIA NIMs and NeMo Guardrails, developers are able to deploy AI models to production quickly and safely. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/config/config.yml b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/config/config.yml new file mode 100644 index 00000000..b6981191 --- /dev/null +++ b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/config/config.yml @@ -0,0 +1,11 @@ +models: + - type: main + engine: nvidia_ai_endpoints + model: meta/llama-3.1-70b-instruct + # parameters: + # base_url: + - type: embeddings + engine: nvidia_ai_endpoints + model: nvidia/nv-embedqa-e5-v5 + # parameters: + # base_url: diff --git a/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/config/flows.co b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/config/flows.co new file mode 100644 index 00000000..067d9511 --- /dev/null +++ b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/config/flows.co @@ -0,0 +1,20 @@ +define user greet + "Hello" + +define bot greet + "Hello. I'm the Guardrails Bot. How can I help you?" + +define flow + user greet + bot greet + +define user ask about user sensitive data + "Can you hack into someone's email account?" + "How do I get my friend's photos without permission?" + +define bot refuse to respond about user sensitive data + "Apologies, but the Guardrails Bot can't help with actions that asks about user sensitive data. It's important to respect privacy." + +define flow + user ask about user sensitive data + bot refuse to respond about user sensitive data \ No newline at end of file diff --git a/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/guardrails-nim-architecture.png b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/guardrails-nim-architecture.png new file mode 100644 index 00000000..b14108c7 Binary files /dev/null and b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/guardrails-nim-architecture.png differ diff --git a/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/using_nims_with_guardrails.ipynb b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/using_nims_with_guardrails.ipynb new file mode 100644 index 00000000..3577f65d --- /dev/null +++ b/RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/using_nims_with_guardrails.ipynb @@ -0,0 +1,434 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Securing Generative AI Deployments with NVIDIA NIM Microservices and NVIDIA NeMo Guardrails" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Integrating NVIDIA NIMs with NeMo Guardrails\n", + "\n", + "This tutorial contains all of the code snippets presented in the technical blog [Securing Generative AI Deployments with NVIDIA NIM and NVIDIA NeMo Guardrails](https://developer.nvidia.com/blog/securing-generative-ai-deployments-with-nvidia-nim-and-nvidia-nemo-guardrails/) in a complete notebook. Please feel free to read the blog for full context.\n", + "\n", + "As a reference for how to deploy NIM on your chosen infrastructure, check out this [simple guide to deploying a NIM container and testing an inference request](https://developer.nvidia.com/blog/a-simple-guide-to-deploying-generative-ai-with-nvidia-nim/). \n", + "\n", + "In this tutorial, we deploy two NIM microservices — a NeMo Retriever Embedding NIM and an LLM NIM. We then integrate both with NeMo Guardrails to prevent malicious use in the form of user account hacking attempted through queries that pertain to personal data. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the LLM NIM, we use Meta’s new [Llama-3.1-70B-Instruct](https://build.nvidia.com/meta/llama-3_1-70b-instruct) model. For the embedding NIM, we use NVIDIA’s new [EmbedQA-E5-V5](https://build.nvidia.com/nvidia/nv-embedqa-e5-v5). The NeMo Retriever Embedding NIM assists the guardrails by converting each input query into an embedding vector. This enables efficient comparison with guardrails policies, ensuring that the query does not match with any prohibited or out-of-scope policies, thereby preventing the LLM NIM from giving unauthorized outputs. \n", + "\n", + "By integrating these NIM with NeMo Guardrails, we accelerate the performance of safety filtering and dialog management." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will cover: \n", + "* Defining the use case\n", + "* Setting up a guardrailing system with NIM\n", + "* Testing the integration\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Defining the use case" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, we demonstrate how to intercept any incoming user questions that pertain to personal data using topical rails. These rails ensure the LLM response adheres to topics which do not share any sensitive information. They also help to keep the LLM outputs on track by fact-checking before answering the user's questions. The integration pattern of these rails with the NIMs can be seen in the figure below:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![An architectural diagram showing how Guardrails runtime works with the application code and the NIMs](guardrails-nim-architecture.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setting up a guardrailing system with NIM" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before we begin, let’s make sure that our NeMo Guardrails library is up to date with the latest version. The version that would work with this tutorial is 0.9.1.1 or later.\n", + "\n", + "We can check the version of the NeMo Guardrails library by running the following command in the terminal:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!nemoguardrails --version" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you do not have [NeMo Guardrails](https://pypi.org/project/nemoguardrails/) installed, run the following command:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install nemoguardrails" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you have versions that are older than 0.9.1.1, upgrade to the latest version by running the following command:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install nemoguardrails --upgrade" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The next step is defining the configuration of the guardrails. To learn more, see the [configuration guide](https://docs.nvidia.com/nemo/guardrails/user_guides/configuration-guide.html). We start by creating the config directory as follows:\n", + "\n", + "\n", + "```\n", + "├── config\n", + "│ ├── config.yml\n", + "│ ├── flows.co\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!mkdir -p config\n", + "!touch config/config.yml\n", + "!touch config/flows.co" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the `config.yml` file, we configure the NIM as follows:\n", + "* if you'd like to use [NVIDIA-hosted NIMs](https://build.nvidia.com/), comment out the parameters and base_url lines\n", + "* if you'd like to use your self-hosted NIMs, replace and with your own base URL for the NIMs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config_yml_content = '''models:\n", + " - type: main\n", + " engine: nvidia_ai_endpoints\n", + " model: meta/llama-3.1-70b-instruct\n", + " # parameters:\n", + " # base_url: \n", + " - type: embeddings\n", + " engine: nvidia_ai_endpoints\n", + " model: nvidia/nv-embedqa-e5-v5\n", + " # parameters:\n", + " # base_url: \n", + "'''\n", + "\n", + "with open('config/config.yml', 'w') as file:\n", + " file.write(config_yml_content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you are testing with NVIDIA-hosted NIMs, remove `parameters` and `base_url` from the `config.yml` file." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check if the `config.yml` has the correct content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('config/config.yml', 'r') as file:\n", + " contents = file.read()\n", + " print(contents)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once the configuration is added with the respective NIM, we can add rails in the flows.co file. An example of adding dialog rails is as follows:\n", + "\n", + "```\n", + "define user greet\n", + " \"Hello\"\n", + "\n", + "define bot greet\n", + " \"Hello. I'm the Guardrails Bot. How can I help you?\"\n", + "\n", + "define flow\n", + " user greet\n", + " bot greet\n", + "```\n", + "\n", + "The script above shows a simple dialog rail that explicitly greets the user in a specific manner." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can add more dialog rails in the flows.co file as follows:\n", + "\n", + "```\n", + "define user ask about user sensitive data\n", + " \"Can you hack into someone's email account?\"\n", + " \"How do I get my friend's photos without permission?\"\n", + "\n", + "define bot refuse to respond about user sensitive data\n", + " \"Apologies, but the Guardrails Bot can't help with actions that asks about user sensitive data. It's important to respect privacy.\"\n", + "\n", + "define flow\n", + " user ask about user sensitive data\n", + " bot refuse to respond about user sensitive data\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "flows_co_content = '''define user greet\n", + " \"Hello\"\n", + "\n", + "define bot greet\n", + " \"Hello. I'm the Guardrails Bot. How can I help you?\"\n", + "\n", + "define flow\n", + " user greet\n", + " bot greet\n", + "\n", + "define user ask about user sensitive data\n", + " \"Can you hack into someone's email account?\"\n", + " \"How do I get my friend's photos without permission?\"\n", + "\n", + "define bot refuse to respond about user sensitive data\n", + " \"Apologies, but the Guardrails Bot can't help with actions that asks about user sensitive data. It's important to respect privacy.\"\n", + "\n", + "define flow\n", + " user ask about user sensitive data\n", + " bot refuse to respond about user sensitive data'''\n", + "\n", + "with open('config/flows.co', 'w') as file:\n", + " file.write(flows_co_content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check if the `flows.co` has the correct content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('config/flows.co', 'r') as file:\n", + " contents = file.read()\n", + " print(contents)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With the Colang and YAML files in the config folder, we should be ready to set up our guardrails. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can import the related libraries and import the config folder to instantiate our guardrails." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from nemoguardrails import RailsConfig, LLMRails\n", + "\n", + "config = RailsConfig.from_path('config')\n", + "rails = LLMRails(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are ready to test out our guardrails." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing the integration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we greet our LLM NIM through our guardrails and see if the guardrails pick up one of the predefined dialog rails." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = rails.generate(messages=[{\n", + " \"role\": \"user\",\n", + " \"content\": \"Hi!\"\n", + "}])\n", + "print(response['content'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, our query to the LLM NIM is intercepted by the guardrails that we have set up because our query matches with one of the predefined dialog rails. The NeMo Retriever Embedding NIM assists our guardrails in turning our query into an embedding vector. Our guardrails then perform a semantic search to return the most similar results of the utterances that we provide as part of `flows.co`.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we ask the LLM NIM to provide us with a way to hack into a phone. This query falls into the category of topics pertaining to personal data. This is expected to be blocked by the guardrails based on the configuration. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = rails.generate(messages=[{\n", + " \"role\": \"user\",\n", + " \"content\": \"How can I hack into my partner's phone?\"\n", + "}])\n", + "print(response['content'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As seen, our guardrails are able to intercept the message and block the LLM NIM from responding to the query since we have defined dialog rails to prevent further discussion of this topic.\n", + "\n", + "The tutorial above is for users to only get started with a simple use case. To create a more robust guardrailing system, users are encouraged to set up [various types of rails](https://docs.nvidia.com/nemo/guardrails/user_guides/guardrails-library.html) allowing for further customization of their use cases." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Conclusion" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this post, we detailed the steps for integrating NVIDIA NIMs with NeMo Guardrails. In this instance, we were able to stop our application from responding to questions pertaining to personal data. With the integration of NVIDIA NIMs and NeMo Guardrails, developers are able to deploy AI models to production quickly and safely. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/agentic_rag_with_nemo_retriever_nims.ipynb b/RAG/notebooks/langchain/agentic_rag_with_nemo_retriever_nim.ipynb similarity index 93% rename from notebooks/agentic_rag_with_nemo_retriever_nims.ipynb rename to RAG/notebooks/langchain/agentic_rag_with_nemo_retriever_nim.ipynb index a1c65c6c..21a6dbbd 100644 --- a/notebooks/agentic_rag_with_nemo_retriever_nims.ipynb +++ b/RAG/notebooks/langchain/agentic_rag_with_nemo_retriever_nim.ipynb @@ -6,7 +6,7 @@ "id": "919fe33c-0149-4f7d-b200-544a18986c9a", "metadata": {}, "source": [ - "# Agentic RAG pipeline with Nemo Retriever and LLM NIMs \n", + "# Agentic RAG pipeline with Nemo Retriever and NIM for LLMs\n", "\n", "## Overview\n", "\n", @@ -14,9 +14,9 @@ "\n", "Various retrieval strategies have been proposed that can improve the recall of documents for generation. There is no one-size-fits-all all. The strategy (for example: chunk size, number of documents returned, semantic search vs graph retrieval, etc.) depends on your data. Although the retrieval strategies might differ, an agentic framework designed on top of your retrieval system that does reasoning, decision-making, and reflection on your retrieved data is becoming more common in modern RAG systems. An agent can be described as a system that can use an LLM to reason through a problem, create a plan to solve the problem, and execute the plan with the help of a set of tools. For example, LLMs are notoriously bad at solving math problems, giving an LLM a calculator “tool” that it can use to perform mathematical tasks while it reasons through a larger problem of calculating YoY increase of a company’s revenue can be described as an agentic workflow. \n", "\n", - "As generative AI systems start transitioning towards entities capable of performing \"agentic\" tasks, we need robust models that have been trained on the ability to break down tasks, act as central planners, and have multi-step reasoning capabilities with model and system-level safety checks. With the Llama 3.1 family, Meta is launching a suite of LLMs spanning 8B, 70B, and 405B parameters with these tool-calling capabilities for agentic workloads. NVIDIA has partnered with Meta to make sure the latest Llama models can be deployed optimally through NVIDIA NIMs.\n", + "As generative AI systems start transitioning towards entities capable of performing \"agentic\" tasks, we need robust models that have been trained on the ability to break down tasks, act as central planners, and have multi-step reasoning capabilities with model and system-level safety checks. With the Llama 3.1 family, Meta is launching a suite of LLMs spanning 8B, 70B, and 405B parameters with these tool-calling capabilities for agentic workloads. NVIDIA has partnered with Meta to make sure the latest Llama models can be deployed optimally through NVIDIA NIM microservices.\n", "\n", - "Further, with the general availability of the NVIDIA NeMo Retriever collection of NIM microservices, enterprises have access to scalable software to customize their data-dependent RAG pipelines. The NeMo Retriever NIMs can be easily plugged into existing RAG pipelines and interfaces with open source LLM frameworks like LangChain or LlamaIndex, so you can easily integrate retriever models into generative AI applications.\n" + "Further, with the general availability of the NVIDIA NeMo Retriever collection of NIM microservices, enterprises have access to scalable software to customize their data-dependent RAG pipelines. The NeMo Retriever NIM can be easily plugged into existing RAG pipelines and interfaces with open source LLM frameworks like LangChain or LlamaIndex, so you can easily integrate retriever models into generative AI applications.\n" ] }, { @@ -48,21 +48,21 @@ "id": "385d9099-2737-4e51-88ad-87f701fd89d4", "metadata": {}, "source": [ - "### NeMo Retriever NIMs\n", + "### NeMo Retriever NIM\n", "\n", "NeMo Retriever microservices can be used for embedding and reranking. These microservices can be deployed within the enterprise locally, and are packaged together with NVIDIA Triton Inference Server and NVIDIA TensorRT for optimized inference of text for embedding and reranking. Additional enterprise benefits include:\n", "\n", - "**Scalable deployment**: Whether you're catering to a few users or millions, NeMo Retriever embedding and reranking NIMs can be scaled seamlessly to meet your demands.\n", + "**Scalable deployment**: Whether you're catering to a few users or millions, NeMo Retriever embedding and reranking microservices can be scaled seamlessly to meet your demands.\n", "\n", - "**Flexible integration**: Easily incorporate NeMo Retriever embedding and reranking NIMs into existing workflows and applications, thanks to the OpenAI-compliant API endpoints–and deploy anywhere your data resides.\n", + "**Flexible integration**: Easily incorporate NeMo Retriever embedding and reranking microservices into existing workflows and applications, thanks to the OpenAI-compliant API endpoints–and deploy anywhere your data resides.\n", "\n", - "**Secure processing**: Your data privacy is paramount. NeMo Retriever embedding and reranking NIMs ensure that all inferences are processed securely, with rigorous data.\n", + "**Secure processing**: Your data privacy is paramount. NeMo Retriever embedding and reranking microservices ensure that all inferences are processed securely, with rigorous data.\n", "\n", "NeMo Retriever embedding and reranking NIM microservices are available today. Developers can download and deploy docker containers locally.\n", "\n", "#### Access the Llama 3.1 405B model\n", "\n", - "The new Llama 3.1 set of models can be seen as the first big push of open-source models towards serious agentic capabilities. These models can now become part of a larger automation system, with LLMs doing the planning and picking the right tools to solve a larger problem. Since NVIDIA Llama 3.1 NIMs have the necessary support for OpenAI style tool calling, libraries like LangChain can now be used with NIMs to bind LLMs to Pydantic classes and fill in objects/dictionaries. This combination makes it easier for developers to get structured outputs from NIM LLMs without having to resort to regex parsing. You can access Llama 3.1 405B at ai.nvidia.com. Follow these instructions to generate the API key\n" + "The new Llama 3.1 set of models can be seen as the first big push of open-source models towards serious agentic capabilities. These models can now become part of a larger automation system, with LLMs doing the planning and picking the right tools to solve a larger problem. Since NVIDIA Llama 3.1 NIM has the necessary support for OpenAI style tool calling, libraries like LangChain can now be used with NIM microservices to bind LLMs to Pydantic classes and fill in objects/dictionaries. This combination makes it easier for developers to get structured outputs from NIM LLMs without having to resort to regex parsing. You can access Llama 3.1 405B at ai.nvidia.com. Follow these instructions to generate the API key\n" ] }, { @@ -169,7 +169,7 @@ "### Step-2: Initialize the Embedding, Reranking and LLM connectors\n", "\n", "#### Embedding and Reranking NIM\n", - "Use the NVIDIA OSS connectors to langchain to initialize the embedding, reranking and LLM models, after setting up the embedding and reranking NIMs locally using instructions here and here. point the ```base_url``` below to the ip address for your local machine. \n", + "Use the NVIDIA OSS connectors to langchain to initialize the embedding, reranking and LLM models, after setting up the embedding and reranking microservices locally using instructions here and here. point the ```base_url``` below to the ip address for your local machine. \n", "\n", "#### Llama 3.1 405B LLM\n", "The latest Llama 3.1 405B model is hosted on ai.nvidia.com. Use the instruction here to obtain the API Key for access " @@ -254,9 +254,9 @@ "source": [ "#### Step-4: Query decompostion with structured generation\n", "\n", - "The new Llama 3.1 set of models can be seen as the first big push of open-source models towards serious agentic capabilities. These models can now become part of a larger automation system, with LLMs doing the planning and picking the right tools to solve a larger problem. Since NVIDIA Llama 3.1 NIMs have the necessary support for OpenAI style tool calling, libraries like LangChain can now be used with NIMs to bind LLMs to Pydantic classes and fill in objects/dictionaries. This combination makes it easier for developers to get structured outputs from NIM LLMs without having to resort to regex parsing. \n", + "The new Llama 3.1 set of models can be seen as the first big push of open-source models towards serious agentic capabilities. These models can now become part of a larger automation system, with LLMs doing the planning and picking the right tools to solve a larger problem. Because the NVIDIA Llama 3.1 NIM has the necessary support for OpenAI style tool calling, libraries like LangChain can now be used with microservices to bind LLMs to Pydantic classes and fill in objects/dictionaries. This combination makes it easier for developers to get structured outputs from NIM LLMs without having to resort to regex parsing. \n", "\n", - "Here we user Llama 3.1 NIMs tool calling capability to split the initial query intp sub-queries" + "Here we use Llama 3.1 NIM tool calling capability to split the initial query intp sub-queries" ] }, { diff --git a/notebooks/toy_data/Sweden.txt b/RAG/notebooks/langchain/data/Sweden.txt similarity index 100% rename from notebooks/toy_data/Sweden.txt rename to RAG/notebooks/langchain/data/Sweden.txt diff --git a/RAG/notebooks/langchain/data/imgs/HumanInTheLoopLangGraph.png b/RAG/notebooks/langchain/data/imgs/HumanInTheLoopLangGraph.png new file mode 100644 index 00000000..b54eaa18 Binary files /dev/null and b/RAG/notebooks/langchain/data/imgs/HumanInTheLoopLangGraph.png differ diff --git a/notebooks/imgs/chrome_flags_fix_media_device_access_error.png b/RAG/notebooks/langchain/data/imgs/chrome_flags_fix_media_device_access_error.png similarity index 100% rename from notebooks/imgs/chrome_flags_fix_media_device_access_error.png rename to RAG/notebooks/langchain/data/imgs/chrome_flags_fix_media_device_access_error.png diff --git a/notebooks/imgs/data_connection_langchain.jpeg b/RAG/notebooks/langchain/data/imgs/data_connection_langchain.jpeg similarity index 100% rename from notebooks/imgs/data_connection_langchain.jpeg rename to RAG/notebooks/langchain/data/imgs/data_connection_langchain.jpeg diff --git a/RAG/notebooks/langchain/data/imgs/finish_social_post.png b/RAG/notebooks/langchain/data/imgs/finish_social_post.png new file mode 100644 index 00000000..26a7f172 Binary files /dev/null and b/RAG/notebooks/langchain/data/imgs/finish_social_post.png differ diff --git a/notebooks/imgs/grace_answer.png b/RAG/notebooks/langchain/data/imgs/grace_answer.png similarity index 100% rename from notebooks/imgs/grace_answer.png rename to RAG/notebooks/langchain/data/imgs/grace_answer.png diff --git a/notebooks/imgs/grace_answer_with_riva.png b/RAG/notebooks/langchain/data/imgs/grace_answer_with_riva.png similarity index 100% rename from notebooks/imgs/grace_answer_with_riva.png rename to RAG/notebooks/langchain/data/imgs/grace_answer_with_riva.png diff --git a/notebooks/imgs/grace_noanswer.png b/RAG/notebooks/langchain/data/imgs/grace_noanswer.png similarity index 100% rename from notebooks/imgs/grace_noanswer.png rename to RAG/notebooks/langchain/data/imgs/grace_noanswer.png diff --git a/notebooks/imgs/grace_noanswer_with_riva.png b/RAG/notebooks/langchain/data/imgs/grace_noanswer_with_riva.png similarity index 100% rename from notebooks/imgs/grace_noanswer_with_riva.png rename to RAG/notebooks/langchain/data/imgs/grace_noanswer_with_riva.png diff --git a/notebooks/imgs/inference_runtime.png b/RAG/notebooks/langchain/data/imgs/inference_runtime.png similarity index 100% rename from notebooks/imgs/inference_runtime.png rename to RAG/notebooks/langchain/data/imgs/inference_runtime.png diff --git a/notebooks/imgs/llama_hub.png b/RAG/notebooks/langchain/data/imgs/llama_hub.png similarity index 100% rename from notebooks/imgs/llama_hub.png rename to RAG/notebooks/langchain/data/imgs/llama_hub.png diff --git a/notebooks/imgs/media_device_access_error.png b/RAG/notebooks/langchain/data/imgs/media_device_access_error.png similarity index 100% rename from notebooks/imgs/media_device_access_error.png rename to RAG/notebooks/langchain/data/imgs/media_device_access_error.png diff --git a/notebooks/imgs/nvidianews.png b/RAG/notebooks/langchain/data/imgs/nvidianews.png similarity index 100% rename from notebooks/imgs/nvidianews.png rename to RAG/notebooks/langchain/data/imgs/nvidianews.png diff --git a/notebooks/imgs/preprocessing.png b/RAG/notebooks/langchain/data/imgs/preprocessing.png similarity index 100% rename from notebooks/imgs/preprocessing.png rename to RAG/notebooks/langchain/data/imgs/preprocessing.png diff --git a/notebooks/imgs/vector_stores.jpeg b/RAG/notebooks/langchain/data/imgs/vector_stores.jpeg similarity index 100% rename from notebooks/imgs/vector_stores.jpeg rename to RAG/notebooks/langchain/data/imgs/vector_stores.jpeg diff --git a/notebooks/imgs/visual_reasoning.png b/RAG/notebooks/langchain/data/imgs/visual_reasoning.png similarity index 100% rename from notebooks/imgs/visual_reasoning.png rename to RAG/notebooks/langchain/data/imgs/visual_reasoning.png diff --git a/RAG/notebooks/langchain/langchain_basic_RAG.ipynb b/RAG/notebooks/langchain/langchain_basic_RAG.ipynb new file mode 100644 index 00000000..5384879c --- /dev/null +++ b/RAG/notebooks/langchain/langchain_basic_RAG.ipynb @@ -0,0 +1,519 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c5fb0b9e-f9cd-404f-bd8d-0273e94ac1fe", + "metadata": {}, + "source": [ + "# RAG Example Using NVIDIA API Catalog and LangChain" + ] + }, + { + "cell_type": "markdown", + "id": "2969cdab-82fc-4ce5-bde1-b4f629691f27", + "metadata": {}, + "source": [ + "This notebook introduces how to use LangChain to interact with NVIDIA hosted NIM microservices like chat, embedding, and reranking models to build a simple retrieval-augmented generation (RAG) application." + ] + }, + { + "cell_type": "markdown", + "id": "e4253bd0-4313-4056-95f5-899a180879c2", + "metadata": {}, + "source": [ + "## Terminology" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "5a084a00-b65d-483a-a7c6-b4c12e4272dd", + "metadata": {}, + "source": [ + "#### RAG\n", + "\n", + "- RAG is a technique for augmenting LLM knowledge with additional data.\n", + "- LLMs can reason about wide-ranging topics, but their knowledge is limited to the public data up to a specific point in time that they were trained on.\n", + "- If you want to build AI applications that can reason about private data or data introduced after a model's cutoff date, you need to augment the knowledge of the model with the specific information it needs.\n", + "- The process of bringing the appropriate information and inserting it into the model prompt is known as retrieval augmented generation (RAG).\n", + "\n", + "The preceding summary of RAG originates in the LangChain v0.2 tutorial [Build a RAG App](https://python.langchain.com/v0.2/docs/tutorials/rag/) tutorial in the LangChain v0.2 documentation.\n", + "\n", + "#### NIM\n", + "\n", + "- [NIM microservices](https://developer.nvidia.com/blog/nvidia-nim-offers-optimized-inference-microservices-for-deploying-ai-models-at-scale/) are containerized microservices that simplify the deployment of generative AI models like LLMs and are optimized to run on NVIDIA GPUs. \n", + "- NIM microservices support models across domains like chat, embedding, reranking, and more from both the community and NVIDIA.\n", + "\n", + "#### NVIDIA API Catalog\n", + "\n", + "- [NVIDIA API Catalog](https://build.nvidia.com/explore/discover) is a hosted platform for accessing a wide range of microservices online.\n", + "- You can test models on the catalog and then export them with an NVIDIA AI Enterprise license for on-premises or cloud deployment\n", + "\n", + "#### langchain-nvidia-ai-endpoints\n", + "\n", + "- The [`langchain-nvidia-ai-endpoints`](https://pypi.org/project/langchain-nvidia-ai-endpoints/) Python package contains LangChain integrations for building applications that communicate with NVIDIA NIM microservices." + ] + }, + { + "cell_type": "markdown", + "id": "ca300278-5ff4-47c4-ab70-c6584ef73c9f", + "metadata": {}, + "source": [ + "## Installation and Requirements\n", + "\n", + "Create a Python environment (preferably with Conda) using Python version 3.10.14. \n", + "To install Jupyter Lab, refer to the [installation](https://jupyter.org/install) page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5de8e53a-7940-4e72-87f3-e1c014128806", + "metadata": {}, + "outputs": [], + "source": [ + "# Requirements\n", + "!pip install langchain==0.2.5\n", + "!pip install langchain_community==0.2.5\n", + "!pip install faiss-cpu==1.8.0 # replace with faiss-gpu if you are using GPU\n", + "!pip install langchain-nvidia-ai-endpoints==0.1.2" + ] + }, + { + "cell_type": "markdown", + "id": "1b7a52a0-7e5e-4064-9665-cb947d600f84", + "metadata": {}, + "source": [ + "## Getting Started!" + ] + }, + { + "cell_type": "markdown", + "id": "04495732-c2db-4c97-91d0-96708814334d", + "metadata": {}, + "source": [ + "To get started you need an `NVIDIA_API_KEY` to use the NVIDIA API Catalog:\n", + "\n", + "1) Create a free account with [NVIDIA](https://build.nvidia.com/explore/discover).\n", + "2) Click on your model of choice.\n", + "3) Under Input select the Python tab, and click **Get API Key** and then click **Generate Key**.\n", + "4) Copy and save the generated key as NVIDIA_API_KEY. From there, you should have access to the endpoints." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "bbb51115-79f8-48c3-b3ee-d434916945f6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Enter your NVIDIA API key: ········\n" + ] + } + ], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "if not os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n", + " nvidia_api_key = getpass.getpass(\"Enter your NVIDIA API key: \")\n", + " assert nvidia_api_key.startswith(\"nvapi-\"), f\"{nvidia_api_key[:5]}... is not a valid key\"\n", + " os.environ[\"NVIDIA_API_KEY\"] = nvidia_api_key" + ] + }, + { + "cell_type": "markdown", + "id": "25656ab5-0046-4e27-be65-b3d3d547b4c6", + "metadata": {}, + "source": [ + "## RAG Example using LLM & Embedding" + ] + }, + { + "cell_type": "markdown", + "id": "54e86bc0-e9c5-4a2b-be0e-7fca0331e886", + "metadata": {}, + "source": [ + "### 1) Initialize the LLM\n", + "\n", + "The ChatNVIDIA class is part of LangChain's integration (langchain_nvidia_ai_endpoints) with NVIDIA NIM microservices. \n", + "It allows access to NVIDIA NIM for chat applications, connecting to hosted or locally-deployed microservices.\n", + "\n", + "Here we will use **mixtral-8x7b-instruct-v0.1** " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "88c2fafe-5ded-4238-82de-f094232bf6fb", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n", + "\n", + "llm = ChatNVIDIA(model=\"mistralai/mixtral-8x7b-instruct-v0.1\", max_tokens=1024)\n", + "\n", + "# Here we are using mixtral-8x7b-instruct-v0.1 model\n", + "# But you are free to choose any model hosted at Nvidia API Catalog\n", + "# Uncomment the below code to list the availabe models\n", + "# ChatNVIDIA.get_available_models()" + ] + }, + { + "cell_type": "markdown", + "id": "35cc87a6-2f83-4652-95f1-cf349db8bad6", + "metadata": {}, + "source": [ + "### 2) Intiatlize the embedding\n", + "NVIDIAEmbeddings is a client to NVIDIA embeddings models that provides access to a NVIDIA NIM for embedding. It can connect to a hosted NIM or a local NIM using a base URL\n", + "\n", + "We selected **NV-Embed-QA** as the embedding" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d88f7838-b9f9-4fc5-8779-84df6cb26017", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings\n", + "\n", + "embedder = NVIDIAEmbeddings(model=\"NV-Embed-QA\", truncate=\"END\")" + ] + }, + { + "cell_type": "markdown", + "id": "b9862f2e-5055-4fe4-818d-708091243d74", + "metadata": {}, + "source": [ + "### 3) Obtain some toy text dataset\n", + "Here we are loading a toy data from a text documents and in real-time data can be loaded from various sources. \n", + "Read [here](https://python.langchain.com/v0.2/docs/tutorials/rag/#go-deeper) for loading data from different sources" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "9c615b9c-527e-4e3b-86b7-49ef258e2d57", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "\n", + "# For this example we load a toy data set (it's a simple text file with some information about Sweden)\n", + "TOY_DATA_PATH = \"./data/\"\n", + "# We read in the text data and prepare them into vectorstore\n", + "ps = os.listdir(TOY_DATA_PATH)\n", + "data = []\n", + "sources = []\n", + "for p in ps:\n", + " if p.endswith('.txt'):\n", + " path2file=TOY_DATA_PATH+p\n", + " with open(path2file,encoding=\"utf-8\") as f:\n", + " lines=f.readlines()\n", + " for line in lines:\n", + " if len(line)>=1:\n", + " data.append(line)\n", + " sources.append(path2file)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "d0a1447d-444a-4ae9-9484-4546424c047d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(400,\n", + " 230,\n", + " 'Sweden, formally the Kingdom of Sweden, is a Nordic country located on the Scandinavian Peninsula in Northern Europe. It borders Norway to the west and north, Finland to the east, and is connected to Denmark in the southwest by a bridge–tunnel across the Öresund. At 447,425 square kilometres (172,752 sq mi), Sweden is the largest Nordic country, the third-largest country in the European Union, and the fifth-largest country in Europe. The capital and largest city is Stockholm. Sweden has a total population of 10.5 million, and a low population density of 25.5 inhabitants per square kilometre (66/sq mi), with around 87% of Swedes residing in urban areas, which cover 1.5% of the entire land area, in the central and southern half of the country.\\n')" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Do some basic cleaning and remove empty lines\n", + "documents=[d for d in data if d != '\\n']\n", + "len(data), len(documents), data[0]" + ] + }, + { + "cell_type": "markdown", + "id": "851b16b3-43ac-4269-9f37-05a33efe24fb", + "metadata": {}, + "source": [ + "### 4) Process the documents into vectorstore and save it to disk\n", + "\n", + "Real world documents can be very long, this makes it hard to fit in the context window of many models. Even for those models that could fit the full post in their context window, models can struggle to find information in very long inputs.\n", + "\n", + "To handle this we’ll split the Document into chunks for embedding and vector storage. More on text splitting [here](https://python.langchain.com/v0.2/docs/concepts/#text-splitters)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "804c85f6-181b-4291-a685-d6b378015544", + "metadata": {}, + "outputs": [], + "source": [ + "# Here we create a faiss vector store from the documents and save it to disk.\n", + "from langchain_community.vectorstores import FAISS\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "\n", + "text_splitter = CharacterTextSplitter(chunk_size=400, separator=\" \", chunk_overlap=80)\n", + "docs = []\n", + "metadatas = []\n", + "\n", + "for i, d in enumerate(documents):\n", + " splits = text_splitter.split_text(d)\n", + " docs.extend(splits)\n", + " metadatas.extend([{\"source\": sources[i]}] * len(splits))" + ] + }, + { + "cell_type": "markdown", + "id": "f867df18-11c8-45ea-b81c-1603459431f9", + "metadata": {}, + "source": [ + "To enable runtime search, we index text chunks by embedding each document split and storing these embeddings in a vector database. Later to search, we embed the query and perform a similarity search to find the stored splits with embeddings most similar to the query." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "2d7b2fbd-8cb1-4d68-9659-2426b9ecffe3", + "metadata": {}, + "outputs": [], + "source": [ + "# you will only need to do this once, later on we will restore the already saved vectorstore\n", + "store = FAISS.from_texts(docs, embedder , metadatas=metadatas)\n", + "VECTOR_STORE = './data/nv_embedding'\n", + "store.save_local(VECTOR_STORE)" + ] + }, + { + "cell_type": "markdown", + "id": "3fe85dad-12bb-47d2-a407-9b89b5270d4e", + "metadata": {}, + "source": [ + "### 5) Read the previously processed & saved vectore store back" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "5de3e07d-5fbe-4fe7-8f23-ed0b082f2413", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the FAISS vectorestore back.\n", + "store = FAISS.load_local(VECTOR_STORE, embedder, allow_dangerous_deserialization=True)" + ] + }, + { + "cell_type": "markdown", + "id": "4a41ff63-6adc-4055-8bc4-e7ecaad0fb4d", + "metadata": {}, + "source": [ + "### 6) Wrap the restored vectorsore into a retriever and ask our question " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "5aa362c9-48ab-4646-bc29-bc2aca92505d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\" Sweden is the 55th-largest country in the world, the fifth-largest country in Europe, and the largest country in Northern Europe, with a total area of 449,964 km2 (173,732 sq mi). In terms of elevation, the lowest point in Sweden is in the bay of Lake Hammarsjön, near Kristianstad, at -2.41 m (-7.91 ft) below sea level, while the highest point is Kebnekaise, which is 2,111 m (6,926 ft) above sea level.\\n\\nSweden has a Nordic social welfare system that provides universal health care and tertiary education for its citizens. The country has a high standard of living and ranks very highly in various international metrics, including quality of life, health, education, protection of civil liberties, economic competitiveness, income equality, gender equality, and prosperity. Sweden's GDP per capita is the world's 14th highest.\\n\\nHistorically, Sweden has been both a kingdom and an empire. Currently, it is a constitutional monarchy and a parliamentary democracy, with a popularly elected parliament and a monarch who serves a ceremonial role. Sweden is a member of the European Union but has opted to remain outside the Eurozone.\"" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever = store.as_retriever()\n", + "\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"Answer solely based on the following context:\\n\\n{context}\\n\",\n", + " ),\n", + " (\"user\", \"{question}\"),\n", + " ]\n", + ")\n", + "\n", + "# Langchain's LCEL(LangChain Expression Language) Runnable protocol is used to define the chain\n", + "# LCEL allows pipe together components and functions\n", + "chain = (\n", + " {\"context\": retriever, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | llm\n", + " | StrOutputParser()\n", + ")\n", + "\n", + "chain.invoke(\"Tell me about Sweden.\")" + ] + }, + { + "cell_type": "markdown", + "id": "c29478b0-0fb1-4678-93cd-b159dc9884a7", + "metadata": {}, + "source": [ + "## RAG Example with LLM, Embedding & Reranking" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "972c310b-5333-4b41-a6dd-ce83e739e6dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\" The documents provided do not include information about Gustav's grandson ascending the throne. Gustav had several grandchildren, and the documents do not specify which one you are referring to. Moreover, the documents do not provide enough information about the timeline of Gustav's grandson's ascension to the throne. Therefore, it is not possible to answer this question without additional context.\"" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Let's test a more complex query using the above LLM Embedding chain and see if the reranker can help.\n", + "chain.invoke(\"In which year Gustav's grandson ascended the throne?\")" + ] + }, + { + "cell_type": "markdown", + "id": "9d3854c7-68a3-45b4-9e69-2c4e583d651f", + "metadata": {}, + "source": [ + "### Enhancing accuracy for single data sources\n", + "\n", + "This example demonstrates how a re-ranking model can be used to combine retrieval results and improve accuracy during retrieval of documents.\n", + "\n", + "Typically, reranking is a critical piece of high-accuracy, efficient retrieval pipelines. Generally, there are two important use cases:\n", + "\n", + "- Combining results from multiple data sources\n", + "- Enhancing accuracy for single data sources\n", + "\n", + "Here, we focus on demonstrating only the second use case. If you want to know more, check [here](https://github.com/langchain-ai/langchain-nvidia/blob/main/libs/ai-endpoints/docs/retrievers/nvidia_rerank.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "b7e8677e-a37f-42e2-8fea-4c4413f7d682", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\" Gustav's grandson, Sigismund, ascended the throne in 1592.\"" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_nvidia_ai_endpoints import NVIDIARerank\n", + "from langchain_core.runnables import RunnableParallel\n", + "\n", + "# We will narrow the collection to 100 results and further narrow it to 10 with the reranker.\n", + "retriever = store.as_retriever(search_kwargs={'k':100}) # typically k will be 1000 for real world use-cases\n", + "ranker = NVIDIARerank(model='nv-rerank-qa-mistral-4b:1', top_n=10)\n", + "\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"Answer solely based on the following context:\\n\\n{context}\\n\",\n", + " ),\n", + " (\"user\", \"{question}\"),\n", + " ]\n", + ")\n", + "\n", + "reranker = lambda input: ranker.compress_documents(query=input['question'], documents=input['context'])\n", + "\n", + "chain_with_ranker = (\n", + " RunnableParallel({\"context\": retriever, \"question\": RunnablePassthrough()})\n", + " | {\"context\": reranker, \"question\": lambda input: input['question']}\n", + " | prompt\n", + " | llm\n", + " | StrOutputParser()\n", + ")\n", + "chain_with_ranker.invoke(\"In which year Gustav's grandson ascended the throne?\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "79d5186f-12c0-47c9-9e85-5987fedf7b97", + "metadata": {}, + "source": [ + "#### Note:\n", + "- In this notebook, we have used NVIDIA NIM microservices from the NVIDIA API Catalog.\n", + "- The above APIs, ChatNVIDIA, NVIDIAEmbedding, and NVIDIARerank, also support self-hosted NIM microservices.\n", + "- Change the `base_url` to your deployed NIM URL.\n", + "- Example: `llm = ChatNVIDIA(base_url=\"http://localhost:8000/v1\", model=\"meta/llama3-8b-instruct\")`\n", + "- NIM can be hosted locally using Docker, following the [NVIDIA NIM for LLMs](https://docs.nvidia.com/nim/large-language-models/latest/getting-started.html) documentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61236a22-922f-403f-89d1-1172251aeb0c", + "metadata": {}, + "outputs": [], + "source": [ + "# Example Code snippet if you want to use a self-hosted NIM\n", + "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n", + "\n", + "# connect to an LLM NIM running at localhost:8000, specifying a specific model\n", + "llm = ChatNVIDIA(base_url=\"http://localhost:8000/v1\", model=\"meta/llama3-8b-instruct\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (rag_notebooks)", + "language": "python", + "name": "rag_notebooks" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/RAG/notebooks/llamaindex/data/Sweden.txt b/RAG/notebooks/llamaindex/data/Sweden.txt new file mode 100644 index 00000000..2e480f65 --- /dev/null +++ b/RAG/notebooks/llamaindex/data/Sweden.txt @@ -0,0 +1,400 @@ +Sweden, formally the Kingdom of Sweden, is a Nordic country located on the Scandinavian Peninsula in Northern Europe. It borders Norway to the west and north, Finland to the east, and is connected to Denmark in the southwest by a bridge–tunnel across the Öresund. At 447,425 square kilometres (172,752 sq mi), Sweden is the largest Nordic country, the third-largest country in the European Union, and the fifth-largest country in Europe. The capital and largest city is Stockholm. Sweden has a total population of 10.5 million, and a low population density of 25.5 inhabitants per square kilometre (66/sq mi), with around 87% of Swedes residing in urban areas, which cover 1.5% of the entire land area, in the central and southern half of the country. +Nature in Sweden is dominated by forests and many lakes, including some of the largest in Europe. Many long rivers run from the Scandes range through the landscape, primarily emptying into the northern tributaries of the Baltic Sea. It has an extensive coastline and most of the population lives near a major body of water. With the country ranging from 55°N to 69°N, the climate of Sweden is diverse due to the length of the country. The usual conditions are mild for the latitudes with a maritime south, continental centre and subarctic north. Snow cover is variable in the densely populated south, but reliable in higher latitudes. Furthermore, the rain shadow of the Scandes results in quite dry winters and sunny summers in much of the country. +Germanic peoples have inhabited Sweden since prehistoric times, emerging into history as the Geats (Swedish: Götar) and Swedes (Svear) and constituting the sea peoples known as the Norsemen. An independent Swedish state emerged during the early 12th century. After the Black Death in the middle of the 14th century killed about a third of the Scandinavian population, the dominance of the Hanseatic League in Northern Europe threatened Scandinavia economically and politically. This led to the formation of the Scandinavian Kalmar Union in 1397, which Sweden left in 1523. When Sweden became involved in the Thirty Years' War on the Protestant side, an expansion of its territories began, forming the Swedish Empire, which remained one of the great powers of Europe until the early 18th century. +Swedish territories outside the Scandinavian Peninsula were gradually lost during the 18th and 19th centuries, ending with the annexation of present-day Finland by Russia in 1809. The last war in which Sweden was directly involved was in 1814 when Norway was militarily forced into a personal union, which peacefully dissolved in 1905. In 2014, Sweden celebrated 200 years of peace, a longer span of peacetime than even Switzerland. Sweden maintained an official policy of neutrality during wartime and non-participation in military alliances during peacetime, although Sweden secretly relied on U.S. nuclear submarines during the Cold War. Sweden has since 2008 joined EU battlegroups, provided intelligence to NATO and since 2009 openly moved towards cooperation with NATO. In 2022, following the Russian invasion of Ukraine, Sweden announced its intent to join NATO. +Sweden is a highly developed country ranked seventh in the Human Development Index, it is a constitutional monarchy and a parliamentary democracy, with legislative power vested in the 349-member unicameral Riksdag. It is a unitary state, currently divided into 21 counties and 290 municipalities. Sweden maintains a Nordic social welfare system that provides universal health care and tertiary education for its citizens. It has the world's 14th highest GDP per capita and ranks very highly in quality of life, health, education, protection of civil liberties, economic competitiveness, income equality, gender equality and prosperity. Sweden joined the European Union on 1 January 1995 but rejected Eurozone membership following a referendum. It is also a member of the United Nations, the Nordic Council, the Council of Europe, the World Trade Organization and the Organisation for Economic Co-operation and Development (OECD). + + +== Etymology == + +The name for Sweden is generally agreed to derive from the Proto-Indo-European root *s(w)e, meaning "one's own", referring to one's own tribe from the tribal period. The native Swedish name, Sverige (a compound of the words Svea and rike, with lenition of the consonant [k], first recorded in the cognate Swēorice in Beowulf), translates as "realm of the Swedes", which excluded the Geats in Götaland. +The contemporary English variation was derived in the 17th-century from Middle Dutch and Middle Low German. As early as 1287, references are found in Middle Dutch referring to a lande van sweden ("land of [the] Swedes"), with swede as the singular form. In Old English the country was known as Swéoland or Swíoríce, and in Early Modern English as Swedeland. Some Finnic languages, such as Finnish and Estonian, use the terms Ruotsi and Rootsi; these variations refer to the Rus' people who inhabited the coastal areas of Roslagen in Uppland and who gave their name to Russia. + + +== History == + + +=== Prehistory === + +Sweden's prehistory begins in the Allerød oscillation, a warm period around 12,000 BC, with Late Palaeolithic reindeer-hunting camps of the Bromme culture at the edge of the ice in what is now the country's southernmost province, Scania. This period was characterised by small clans of hunter-gatherers who relied on flint technology.Sweden and its people were first described by Publius Cornelius Tacitus in his written work Germania (98 AD). In Germania 44 and 45 he mentions the Swedes (Suiones) as a powerful tribe (distinguished not merely for their arms and men, but for their powerful fleets) with ships that had a prow at each end (longships). Which kings (*kuningaz) ruled these Suiones is unknown, but Norse mythology presents a long line of legendary and semi-legendary kings going back to the last centuries BC. As for literacy in Sweden itself, the runic script was in use among the south Scandinavian elite by at least the second century AD, but all that has come down to the present from the Roman Period is curt inscriptions on artefacts, mainly of male names, demonstrating that the people of south Scandinavia spoke Proto-Norse at the time, a language ancestral to Swedish and other North Germanic languages.In the sixth century, Jordanes names two tribes living in Scandza, both of which are now considered to be synonymous with the Swedes: the Suetidi and Suehans. Suetidi is considered to be the Latin form of Svíþjóð, the Old Norse name for the Swedes. Jordanes describes the Suetidi and Dani as being of the same stock and the tallest of people. He later mentions other Scandinavian tribes as being of a same stature. The Suehans were known to the Roman world as suppliers of black fox skins and, according to Jordanes, had very fine horses, similar to those of the Thyringi of Germania (alia vero gens ibi moratur Suehans, quae velud Thyringi equis utuntur eximiis). + + +=== Vikings === + +The Swedish Viking Age lasted roughly from the eighth century to the 11th century. It is believed that Swedish Vikings and Gutar mainly travelled east and south, going to Finland, Estonia, the Baltic countries, Russia, Belarus, Ukraine, the Black Sea and even as far as Baghdad. Their routes passed through the Dnieper south to Constantinople, on which they carried out numerous raids. The Byzantine Emperor Theophilos noticed their great skills in war, and invited them to serve as his personal bodyguard, known as the Varangian Guard. The Swedish Vikings, called Rus are believed to be the founding fathers of Kievan Rus'. The Arab traveller Ibn Fadlan described these Vikings saying: + +I have seen the Rus as they came on their merchant journeys and encamped by the Itil. I have never seen more perfect physical specimens, tall as date palms, blond and ruddy; they wear neither tunics nor caftans, but the men wear a garment which covers one side of the body and leaves a hand free. Each man has an axe, a sword, and a knife, and keeps each by him at all times. The swords are broad and grooved, of Frankish sort. + +The actions of these Swedish Vikings are commemorated on many runestones in Sweden, such as the Greece runestones and the Varangian runestones. There was also considerable participation in expeditions westwards, which are commemorated on stones such as the England runestones. The last major Swedish Viking expedition appears to have been the ill-fated expedition of Ingvar the Far-Travelled to Serkland, the region south-east of the Caspian Sea. Its members are commemorated on the Ingvar runestones, none of which mentions any survivor. What happened to the crew is unknown, but it is believed that they died of sickness. + + +=== Kingdom of Sweden === +It is not known when and how the kingdom of Sweden was born, but the list of Swedish monarchs is drawn from the first kings known to have ruled both Svealand (Sweden) and Götaland (Gothia) as one province, beginning with Eric the Victorious. Sweden and Gothia were two separate nations long before that and since antiquity. It is not known how long they existed: the epic poem Beowulf describes semi-legendary Swedish-Geatish wars in the sixth century. Götaland in this sense mainly includes the provinces of Östergötland (East Gothia) and Västergötland (West Gothia). The island of Gotland was disputed by other than Swedes, at this time (Danish, Hanseatic, and Gotland-domestic). Småland was at that time of little interest to anyone due to the deep pine forests, and only the city of Kalmar with its castle was of importance. The south-west parts of the Scandinavian peninsula consisted of three Danish provinces (Scania, Blekinge and Halland). North of Halland, Denmark had a direct border to Norway and its province Bohuslän. But there were Swedish settlements along the southern coastline of Norrland. + +During the early stages of the Scandinavian Viking Age, Ystad in the Danish province Scania and Paviken on Gotland were flourishing centres of trade, but they were not parts of the early Swedish Kingdom. Remains of what is believed to have been a large market dating from 600 to 700 CE have been found in Ystad. In Paviken, an important centre of trade in the Baltic region during the ninth and tenth century, remains have been found of a large Viking Age harbour with shipbuilding yards and handicraft industries. Between 800 and 1000, trade brought an abundance of silver to Gotland, and according to some scholars, the Gotlanders of this era hoarded more silver than the rest of the population of Scandinavia combined. + +Saint Ansgar is usually credited with introducing Christianity to Sweden in 829, but the new religion did not begin to fully replace paganism until the 12th century. During the 11th century, Christianity became the prevalent religion, and from 1050 Sweden is counted as a Christian nation. The period between 1100 and 1400 was characterised by internal power struggles and competition among the Nordic kingdoms. In the years 1150–1293 according to the legend of Eric IX and the Eric Chronicles Swedish kings made a first, second and third crusade to pagan Finland against Finns, Tavastians, and Karelians and started conflicts with the Rus' who no longer had any connection with Sweden. The Swedish colonisation of the coastal areas of Finland also started during the 12th and 13th century. In the 14th century, the colonisation began to be more organised, and by the end of the century, several of the coastal areas of Finland were inhabited mostly by Swedes. + +Except for the provinces of Scania, Blekinge and Halland in the south-west of the Scandinavian peninsula, which were parts of the Kingdom of Denmark during this time, feudalism never developed in Sweden as it did in the rest of Europe. As a result, the peasantry remained largely a class of free farmers throughout most of Swedish history. Slavery (also called thralldom) was not common in Sweden, and what slavery there was tended to be driven out of existence by the spread of Christianity, by the difficulty of obtaining slaves from lands east of the Baltic Sea, and by the development of cities before the 16th century. Indeed, both slavery and serfdom were abolished altogether by a decree of King Magnus IV in 1335. Former slaves tended to be absorbed into the peasantry, and some became labourers in the towns. Still, Sweden remained a poor and economically backward country in which barter was the primary means of exchange. For instance, the farmers of the province of Dalsland would transport their butter to the mining districts of Sweden and exchange it there for iron, which they would then take to the coast and trade for fish, which they consumed, while the iron would be shipped abroad.In the middle of the 14th century, Sweden was struck by the Black Death. The population of Sweden and most of Europe was decimated. The population (at same territory) did not reach the numbers of the year 1348 again until the beginning of the 19th century. One third of the population died during the period of 1349–1351. During this period, the Swedish cities began to acquire greater rights and were strongly influenced by German merchants of the Hanseatic League, active especially at Visby. In 1319, Sweden and Norway were united under King Magnus Eriksson, and in 1397 Queen Margaret I of Denmark affected the personal union of Sweden, Norway, and Denmark through the Kalmar Union. However, Margaret's successors, whose rule was also centred in Denmark, were unable to control the Swedish nobility. + +Many times the Swedish crown was inherited by child kings over the course of the kingdom's existence; consequently, real power was held for long periods by regents (notably those of the Sture family) chosen by the Swedish parliament. King Christian II of Denmark, who asserted his claim to Sweden by force of arms, ordered a massacre of Swedish nobles in Stockholm in 1520. This came to be known as the "Stockholm blood bath" and stirred the Swedish nobility to new resistance and, on 6 June (now Sweden's national holiday) in 1523, they made Gustav Vasa their king. This is sometimes considered as the foundation of modern Sweden. Shortly afterwards the new king rejected Catholicism and led Sweden into the Protestant Reformation. +The Hanseatic League had been officially formed at Lübeck on the Baltic coast of Northern Germany in 1356. The League sought civil and commercial privileges from the princes and royalty of the countries and cities along the coasts of the Baltic Sea. In exchange, they offered a certain amount of protection to the joining cities. Having their own navy, the Hansa were able to sweep the Baltic Sea free of pirates. The privileges obtained by the Hansa included assurances that only Hansa citizens would be allowed to trade from the ports where they were located. They sought agreement to be free of all customs and taxes. With these concessions, Lübeck merchants flocked to Stockholm, where they soon came to dominate the city's economic life and made the port city of Stockholm into the leading commercial and industrial city of Sweden. Under the Hanseatic trade, two-thirds of Stockholm's imports consisted of textiles, while the remaining third was salt. The main exports from Sweden were iron and copper.However, the Swedes began to resent the monopoly trading position of the Hansa (mostly consisting of German citizens), and to resent the income they felt they lost to the Hansa. Consequently, when Gustav Vasa or Gustav I broke the monopoly power of the Hanseatic League he was regarded as a hero by the Swedish people. History now views Gustav I as the father of the modern Swedish nation. The foundations laid by Gustav would take time to develop. Furthermore, when Sweden did develop, freed itself from the Hanseatic League, and entered its golden era, the fact that the peasantry had traditionally been free meant that more of the economic benefits flowed back to them rather than going to a feudal landowning class.The end of the 16th century was marked by a final phase of rivalry between the remaining Catholics and the new Protestant communities. In 1592, Gustav Vasa's Catholic grandson and king of Poland, Sigismund, ascended the Swedish throne. He pursued to strengthen Rome's influence by initiating Counter-Reformation and created a dual monarchy, which temporarily became known as the Polish-Swedish Union. His despotic rule, strongly characterised by intolerance towards the Protestants, sparked a civil war that plunged Sweden into poverty. In opposition, Sigismund's uncle and successor, Charles Vasa, summoned the Uppsala Synod in 1593 which officially confirmed the modern Church of Sweden as Lutheran. Following his deposition in 1599, Sigismund attempted to reclaim the throne at every expense and hostilities between Poland and Sweden continued for the next one hundred years. + + +=== Swedish Empire === + +During the 17th century, Sweden emerged as a European great power. Before the emergence of the Swedish Empire, Sweden was a poor and sparsely populated country on the fringe of European civilisation, with no significant power or reputation. Sweden rose to prominence on a continental scale during the reign of king Gustavus Adolphus, seizing territories from Russia and the Polish–Lithuanian Commonwealth in multiple conflicts, including the Thirty Years' War.During the Thirty Years' War, Sweden conquered approximately half of the Holy Roman states and defeated the Imperial army at the Battle of Breitenfeld in 1631. Gustavus Adolphus planned to become the new Holy Roman Emperor, ruling over a united Scandinavia and the Holy Roman states, but he was killed at the Battle of Lützen in 1632. After the Battle of Nördlingen in 1634, Sweden's only significant military defeat of the war, pro-Swedish sentiment among the German states faded. These German provinces broke away from Swedish power one by one, leaving Sweden with only a few northern German territories: Swedish Pomerania, Bremen-Verden and Wismar. From 1643 to 1645, during the last years of the war, Sweden and Denmark-Norway fought the Torstenson War. The result of that conflict and the conclusion of the Thirty Years' War helped establish postwar Sweden as a major force in Europe. + +In the middle of the 17th century, Sweden was the third-largest country in Europe by land area, surpassed by only Russia and Spain. Sweden reached its largest territorial extent under the rule of Charles X after the treaty of Roskilde in 1658, following Charles X's risky but successful crossing of the Danish Belts. The foundation of Sweden's success during this period is credited to Gustav I's major changes to the Swedish economy in the 16th century, and his introduction of Protestantism. In the 17th century, Sweden was engaged in many wars, for example with Poland–Lithuania, with both sides competing for territories of today's Baltic states, with Sweden suffering a notable defeat at the Battle of Kircholm. One-third of the Finnish population died in the devastating Great Famine of 1695–1697 that struck the country. Famine also hit Sweden, killing roughly 10% of Sweden's population.The Swedes conducted a series of invasions into the Polish–Lithuanian Commonwealth, known as the Deluge. After more than half a century of almost constant warfare, the Swedish economy had deteriorated. It became the lifetime task of Charles X's son, Charles XI, to rebuild the economy and refit the army. His legacy to his son, the coming ruler of Sweden, Charles XII, was one of the finest arsenals in the world, a large standing army and a great fleet. Russia, the most serious threat to Sweden at this time, had a larger army but lagged far behind in both equipment and training.After the Battle of Narva in 1700, one of the first battles of the Great Northern War, the Russian army was so severely devastated that Sweden had an open chance to invade Russia. However, Charles XII did not pursue the Russian army, instead turning against Poland and defeating the Polish king, Augustus II the Strong, and his Saxon allies at the Battle of Kliszów in 1702. This gave Russia time to rebuild and modernise its army. + +After the success of invading Poland, Charles XII decided to make an attempt at invading Russia, but this ended in a decisive Russian victory at the Battle of Poltava in 1709. After a long march exposed to Cossack raids, the Russian Tsar Peter the Great's scorched-earth techniques and the extremely cold winter of 1709, the Swedes stood weakened with a shattered morale and were enormously outnumbered against the Russian army at Poltava. The defeat meant the beginning of the end for the Swedish Empire. In addition, the plague raging in East Central Europe devastated the Swedish dominions and reached Central Sweden in 1710. Returning to Sweden in 1715, Charles XII launched two campaigns against Norway on 1716 and 1718, respectively. During the second attempt, he was shot to death during the siege of Fredriksten fortress. The Swedes were not militarily defeated at Fredriksten, but the whole structure and organisation of the campaign fell apart with the king's death, and the army withdrew. +Forced to cede large areas of land in the Treaty of Nystad in 1721, Sweden also lost its place as an empire and as the dominant state on the Baltic Sea. With Sweden's lost influence, Russia emerged as an empire and became one of Europe's dominant nations. As the war finally ended in 1721, Sweden had lost an estimated 200,000 men, 150,000 of those from the area of present-day Sweden and 50,000 from the Finnish part of Sweden.In the 18th century, Sweden did not have enough resources to maintain its territories outside Scandinavia, and most of them were lost, culminating with the loss in 1809 of eastern Sweden to Russia, which became the highly autonomous Grand Principality of Finland in Imperial Russia.In interest of re-establishing Swedish dominance in the Baltic Sea, Sweden allied itself against its traditional ally and benefactor, France, in the Napoleonic Wars. However, in 1810, a French Marshal, Jean-Baptiste Bernadotte, was chosen as heir presumptive to the decrepit Charles XIII; in 1818, he established the House of Bernadotte, taking the regnal name of Charles XIV. Sweden's role in the Battle of Leipzig gave it the authority to force Denmark–Norway, an ally of France, to cede Norway to the King of Sweden on 14 January 1814 in exchange for the northern German provinces, at the Treaty of Kiel. The Norwegian attempts to keep their status as a sovereign state were rejected by the Swedish king, Charles XIII. He launched a military campaign against Norway on 27 July 1814, ending in the Convention of Moss, which forced Norway into a personal union with Sweden under the Swedish crown, which lasted until 1905. The 1814 campaign was the last time Sweden was at war. + + +=== Modern history === + +The Swedish East India Company, Ostindiska Kompaniet, began in 1731. The obvious choice of home port was Gothenburg at Sweden's west coast, the mouth of Göta älv river is very wide and has the county's largest and best harbour for high-seas journeys. The trade continued into the 19th century, and caused the little town to become Sweden's second city. +There was a significant population increase during the 18th and 19th centuries, which the writer Esaias Tegnér in 1833 attributed to "the peace, the smallpox vaccine, and the potatoes". Between 1750 and 1850, the population in Sweden doubled. According to some scholars, mass emigration to America became the only way to prevent famine and rebellion; over 1% of the population emigrated annually during the 1880s. Nevertheless, Sweden remained poor, retaining a nearly entirely agricultural economy even as Denmark and Western European countries began to industrialise. + +Many looked towards America for a better life during this time. It is thought that between 1850 and 1910 more than one million Swedes moved to the United States. In the early 20th century, more Swedes lived in Chicago than in Gothenburg (Sweden's second largest city). Most Swedish immigrants moved to the midwestern United States, with a large population in Minnesota, with a few others moving to other parts of the United States and Canada. +Despite the slow rate of industrialisation into the 19th century, many important changes were taking place in the agrarian economy due to constant innovations and a rapid population growth. These innovations included government-sponsored programmes of enclosure, aggressive exploitation of agricultural lands, and the introduction of new crops such as the potato. Because the Swedish peasantry had never been enserfed as elsewhere in Europe, the Swedish farming culture began to take on a critical role in Swedish politics, which has continued through modern times with modern Agrarian party (now called the Centre Party). Between 1870 and 1914, Sweden began developing the industrialised economy that exists today.Strong grassroots movements sprang up in Sweden during the latter half of the 19th century (trade unions, temperance groups, and independent religious groups), creating a strong foundation of democratic principles. In 1889 The Swedish Social Democratic Party was founded. These movements precipitated Sweden's migration into a modern parliamentary democracy, achieved by the time of World War I. As the Industrial Revolution progressed during the 20th century, people gradually moved into cities to work in factories and became involved in socialist unions. A communist revolution was avoided in 1917, following the re-introduction of parliamentarism, and the country was democratised. + + +=== World War I and World War II === + +Sweden was officially neutral during World War I. However, under pressure from the German Empire, they did take steps which were detrimental to the Allied powers. Most notably, mining the Øresund channel, thus closing it to Allied shipping, and allowing the Germans to use Swedish facilities and the Swedish cipher to transmit secret messages to their overseas embassies. Sweden also allowed volunteers to fight for the White Guards alongside the Germans against the Red Guards and Russians in the Finnish Civil War, and briefly occupied Åland in cooperation with the German Empire. + +As in the First World War, Sweden remained officially neutral during World War II, although its neutrality during World War II has been disputed. Sweden was under German influence for much of the war, as ties to the rest of the world were cut off through blockades. The Swedish government felt that it was in no position to openly contest Germany, and therefore made some concessions. Sweden also supplied steel and machined parts to Germany throughout the war. The Swedish government unofficially supported Finland in the Winter War and the Continuation War by allowing volunteers and materiel to be shipped to Finland. However, Sweden supported Norwegian resistance against Germany, and in 1943 helped rescue Danish Jews from deportation to Nazi concentration camps. +During the last year of the war, Sweden began to play a role in humanitarian efforts, and many refugees, among them several thousand Jews from Nazi-occupied Europe, were rescued thanks to the Swedish rescue missions to internment camps and partly because Sweden served as a haven for refugees, primarily from the Nordic countries and the Baltic states. The Swedish diplomat Raoul Wallenberg and his colleagues ensured the safety of tens of thousands of Hungarian Jews. Nevertheless, both Swedes and others have argued that Sweden could have done more to oppose the Nazis' war efforts, even if it meant increasing the risk of occupation. + + +=== Post-war era === + +Sweden was officially a neutral country and remained outside NATO and Warsaw Pact membership during the Cold War, but privately Sweden's leadership had strong ties with the United States and other western governments. Following the war, Sweden took advantage of an intact industrial base, social stability and its natural resources to expand its industry to supply the rebuilding of Europe. Sweden received aid under the Marshall Plan and participated in the OECD. During most of the post-war era, the country was governed by the Swedish Social Democratic Party largely in co-operation with trade unions and industry. The government actively pursued an internationally competitive manufacturing sector of primarily large corporations.Sweden was one of the founding states of the European Free Trade Area (EFTA). During the 1960s the EFTA countries were often referred to as the Outer Seven, as opposed to the Inner Six of the then-European Economic Community (EEC).Sweden, like many industrialised countries, entered a period of economic decline and upheaval following the oil embargoes of 1973–74 and 1978–79. In the 1980s several key Swedish industries were significantly restructured. Shipbuilding was discontinued, wood pulp was integrated into modernised paper production, the steel industry was concentrated and specialised, and mechanical engineering was robotised.Between 1970 and 1990, the overall tax burden rose by over 10%, and the growth was low compared with other countries in Western Europe. Eventually, the government began to spend over half of the country's gross domestic product. Swedish GDP per capita ranking declined during this time. + + +=== Recent history === + +A bursting real estate bubble caused by inadequate controls on lending combined with an international recession and a policy switch from anti-unemployment policies to anti-inflationary policies resulted in a fiscal crisis in the early 1990s. Sweden's GDP declined by around 5%. In 1992, a run on the currency caused the central bank to briefly increase interest rates to 500%.The response of the government was to cut spending and institute a multitude of reforms to improve Sweden's competitiveness, among them reducing the welfare state and privatising public services and goods. Much of the political establishment promoted EU membership, and a referendum passed with 52.3% in favour of joining the EU on 13 November 1994. Sweden joined the European Union on 1 January 1995. In a 2003 referendum the Swedish electorate voted against the country joining the Euro currency. In 2006 Sweden got its first majority government for decades as the centre-right Alliance defeated the incumbent Social Democrat government. Following the rapid growth of support for the anti-immigration Sweden Democrats, and their entrance to the Riksdag in 2010, the Alliance became a minority cabinet. +Until recently Sweden remained non-aligned militarily, although it participated in some joint military exercises with NATO and some other countries, in addition to extensive cooperation with other European countries in the area of defence technology and defence industry. However, in 2022, in response to the 2022 Russian invasion of Ukraine, Sweden moved to formally join the NATO alliance. The same year, Sweden applied for NATO membership and was formally invited to join the alliance at the NATO Summit in Madrid. The secretary general of NATO Jens Stoltenberg spoke of a fast-track membership process of just a few weeks, however NATO member Turkey has repeatedly hindered Sweden from joining the alliance, demanding Swedish action against the PKK and for Sweden to extradite alleged Kurdish "terrorists" to Turkey, the situation straining relations between the two countries. Turkey has maintained links with Russia since its invasion of Ukraine in 2022.Swedish export weapons were also used by the American military in Iraq. Sweden has a long history of participating in international military operations, including Afghanistan, where Swedish troops are under NATO command, and in EU-sponsored peacekeeping operations in Kosovo, Bosnia and Herzegovina, and Cyprus. Sweden also participated in enforcing a UN mandated no-fly zone over Libya during the Arab Spring. Sweden held the chair of the European Union from 1 July to 31 December 2009. + +In recent decades Sweden has become a more culturally diverse nation due to significant immigration; in 2013, it was estimated that 15% of the population was foreign-born, and an additional 5% of the population were born to two immigrant parents. The influx of immigrants has brought new social challenges. Violent incidents have periodically occurred including the 2013 Stockholm riots, which broke out following the police shooting of an elderly Portuguese immigrant. In response to these violent events, the anti-immigration opposition party, the Sweden Democrats, promoted their anti-immigration policies, while the left-wing opposition blamed growing inequality caused by the centre-right government's socioeconomic policies.In 2014, Stefan Löfven (Social Democrats) won the General Election and became the new Swedish Prime Minister to succeed Fredrik Reinfeldt of the liberal conservative Moderate Party. The Sweden Democrats held the balance of power and voted the government's budget down in the Riksdag, but due to agreements between the government and the Alliance, the government was able to hang onto power. Sweden was heavily affected by the 2015 European migrant crisis, eventually forcing the government to tighten regulations of entry to the country, as Sweden received thousands of asylum seekers and migrants predominantly from Africa and the Middle East per week in autumn, overwhelming existing structures. Some of the asylum restrictions were relaxed again later.The 2018 general election saw the Red-greens lose seats to the right-wing Sweden Democrats and to the centre-right parties of the former Alliance. Despite holding only 33% of the seats in the Riksdag, the Social Democrats and the Greens managed to form a minority government, led by Prime Minister Stefan Lofven, in January 2019, relying on supply and confidence from the Centre Party, Liberals and the Left Party.In August 2021, Prime Minister Stefan Lofven announced his resignation and finance minister Magdalena Andersson was elected as the new head of Sweden's ruling Social Democrats in November 2021. On 30 November 2021, Magdalena Andersson became Sweden's first female prime minister. She formed a minority government made up of only her Social Democrats. Her plan for forming a new coalition government with the Green Party was unsuccessful because her budget proposal failed to pass.The September 2022 general election ended in a narrow win to a bloc of right-wing parties, meaning the resignation of Magdalena Andersson's government. On 18 October 2022, Ulf Kristersson of the Moderate Party became the new Prime Minister of Sweden. Kristersson's Moderates formed a centre-right coalition with the Christian Democrats and the Liberals. The new government will be backed by the biggest right-wing party, Sweden Democrats (SD) led by Jimmie Åkesson, meaning tougher immigration policies as a crucial part of a policy deal with the SD. + + +== Geography == + +Situated in Northern Europe, Sweden lies west of the Baltic Sea and Gulf of Bothnia, providing a long coastline, and forms the eastern part of the Scandinavian Peninsula. To the west is the Scandinavian mountain chain (Skanderna), a range that separates Sweden from Norway. Finland is located to its north-east. It has maritime borders with Denmark, Germany, Poland, Russia, Lithuania, Latvia and Estonia, and it is also linked to Denmark (south-west) by the Öresund Bridge. Its border with Norway (1,619 km long) is the longest uninterrupted border within Europe. +Sweden lies between latitudes 55° and 70° N, and mostly between longitudes 11° and 25° E (part of Stora Drammen island is just west of 11°). + +At 449,964 km2 (173,732 sq mi), Sweden is the 55th-largest country in the world, the fifth-largest country in Europe, and the largest country in Northern Europe. The lowest elevation in Sweden is in the bay of Lake Hammarsjön, near Kristianstad, at −2.41 m (−7.91 ft) below sea level. The highest point is Kebnekaise at 2,111 m (6,926 ft) above sea level. +Sweden has 25 provinces or landskap, based on culture, geography and history. While these provinces serve no political or administrative purpose, they play an important role in people's self-identity. The provinces are usually grouped together in three large lands, parts, the northern Norrland, the central Svealand and southern Götaland. The sparsely populated Norrland encompasses almost 60% of the country. Sweden also has the Vindelfjällen Nature Reserve, one of the largest protected areas in Europe, totaling 562,772 ha (approx. 5,628 km2). +About 15% of Sweden lies north of the Arctic Circle. Southern Sweden is predominantly agricultural, with increasing forest coverage northward. Around 65% of Sweden's total land area is covered with forests. The highest population density is in the Öresund Region in southern Sweden, along the western coast up to central Bohuslän, and in the valley of lake Mälaren and Stockholm. Gotland and Öland are Sweden's largest islands; Vänern and Vättern are its largest lakes. Vänern is the third largest in Europe, after Lake Ladoga and Lake Onega in Russia. Combined with the third- and fourth-largest lakes Mälaren and Hjälmaren, these lakes take up a significant part of southern Sweden's area. Sweden's extensive waterway availability throughout the south was exploited with the building of the Göta Canal in the 19th century, shortening the potential distance between the Baltic Sea south of Norrköping and Gothenburg by using the lake and river network to facilitate the canal.Sweden also has plenty of long rivers draining the lakes. Northern and Central Sweden have several wide rivers known as älvar, commonly sourced within the Scandinavian Mountains. The longest river is Klarälven-Göta älv, which originates in Trøndelag in central Norway, running 1,160 kilometres (720 mi) before it enters the sea at Gothenburg. Dalälven and the Torne are the second and third longest rivers in the country. Torne marks a large part of the Finland border. In southern Sweden, narrower rivers known as åar are also common. The vast majority of municipal seats are set either on the sea, a river or a lake and the majority of the country's population live in coastal municipalities. + + +=== Climate === + +Most of Sweden has a temperate climate, despite its northern latitude, with largely four distinct seasons and mild temperatures throughout the year. The winter in the far south is usually weak and is manifested only through some shorter periods with snow and sub-zero temperatures; autumn may well turn into spring there, without a distinct period of winter. The northern parts of the country have a subarctic climate while the central parts have a humid continental climate. The coastal south can be defined as having either a humid continental climate using the 0 °C isotherm, or an oceanic climate using the −3 °C isotherm. +Due to the increased maritime moderation in the peninsular south, summer differences between the coastlines of the southernmost and northernmost regions are about 2 °C (4 °F) in summer and 10 °C (18 °F) in winter. This grows further when comparing areas in the northern interior where the winter difference in the far north is about 15 °C (27 °F) throughout the country. The warmest summers usually happen in the Mälaren Valley around Stockholm due to the vast landmass shielding the middle east coast from Atlantic low-pressure systems in July compared to the south and west. Daytime highs in Sweden's municipal seats vary from 19 °C (66 °F) to 24 °C (75 °F) in July and −9 °C (16 °F) to 3 °C (37 °F) in January. The colder temperatures are influenced by the higher elevation in the northern interior. At sea level instead, the coldest average highs range from 21 °C (70 °F) to −6 °C (21 °F). As a result of the mild summers, the arctic region of Norrbotten has some of the northernmost agriculture in the world.Sweden is much warmer and drier than other places at a similar latitude, and even somewhat farther south, mainly because of the combination of the Gulf Stream and the general west wind drift, caused by the direction of planet Earth's rotation. Sweden has much milder winters than many parts of Russia, Canada, and the northern United States. Because of Sweden's high latitude, the length of daylight varies greatly. North of the Arctic Circle, the sun never sets for part of each summer, and it never rises for part of each winter. In the capital, Stockholm, daylight lasts for more than 18 hours in late June but only around 6 hours in late December. Sweden receives between 1,100 and 1,900 hours of sunshine annually. + +The highest temperature ever recorded in Sweden was 38 °C (100 °F) in Målilla in 1947, while the coldest temperature ever recorded was −52.6 °C (−62.7 °F) in Vuoggatjålme on 2 February 1966. Temperatures expected in Sweden are heavily influenced by the large Fennoscandian landmass, as well as continental Europe and western Russia, which allows hot or cool inland air to be easily transported to Sweden. That, in turn, renders most of Sweden's southern areas having warmer summers than almost everywhere in the nearby British Isles, even matching temperatures found along the continental Atlantic coast as far south as in northern Spain. In winter, however, the same high-pressure systems sometimes put the entire country far below freezing temperatures. There is some maritime moderation from the Atlantic which renders the Swedish continental climate less severe than that of nearby Russia. +Apart from the ice-free Atlantic bringing marine air into Sweden tempering winters, the mildness is further explained by prevailing low-pressure systems postponing winter, with the long nights often staying above freezing in the south of the country due to the abundant cloud cover. By the time winter finally breaks through, daylight hours rise quickly, ensuring that daytime temperatures soar quickly in spring. With the greater number of clear nights, frosts remain commonplace quite far south as late as April. +The relative strength of low and high-pressure systems of marine and continental air also define the highly variable summers. When hot continental air hits the country, the long days and short nights frequently bring temperatures up to 30 °C (86 °F) or above even in coastal areas. Nights normally remain cool, especially in inland areas. Coastal areas can see so-called tropical nights above 20 °C (68 °F) occur due to the moderating sea influence during warmer summers. Summers can be cool, especially in the north of the country. Transitional seasons are normally quite extensive and the four-season climate applies to most of Sweden's territory, except in Scania where some years do not record a meteorological winter (see table below) or in the high Lapland mountains where polar microclimates exist. +On average, most of Sweden receives between 500 and 800 mm (20 and 31 in) of precipitation each year, making it considerably drier than the global average. The south-western part of the country receives more precipitation, between 1,000 and 1,200 mm (39 and 47 in), and some mountain areas in the north are estimated to receive up to 2,000 mm (79 in). Despite northerly locations, southern and central Sweden may have almost no snow in some winters. Most of Sweden is located in the rain shadow of the Scandinavian Mountains through Norway and north-west Sweden. The blocking of cool and wet air in summer, as well as the greater landmass, leads to warm and dry summers far north in the country, with quite warm summers at the Bothnia Bay coast at 65 degrees latitude, which is unheard of elsewhere in the world at such northerly coastlines. +It is predicted that as the Barents Sea gets less frozen in the coming winters, becoming thus "Atlantified", additional evaporation will increase future snowfalls in Sweden and much of continental Europe. + + +=== Vegetation === + +Sweden has a considerable south to north distance (stretching between the latitudes N 55:20:13 and N 69:03:36) which causes large climatic difference, especially during the winter. The related matter of the length and strength of the four seasons plays a role in which plants that naturally can grow at various places. Sweden is divided in five major vegetation zones. These are: + +The southern deciduous forest zone +The southern coniferous forest zone +The northern coniferous forest zone, or the Taiga +The alpine-birch zone +The bare mountain zoneSouthern deciduous forest zone, also known as the nemoral region, the southern deciduous forest zone is a part of a larger vegetation zone which also includes Denmark and large parts of Central Europe. It has to a rather large degree become agricultural areas, but larger and smaller forests still exist. The region is characterised by a large wealth of trees and shrubs. The beech are the most dominant tree, but oak can also form smaller forests. elm at one time formed forests, but have been heavily reduced due to Dutch Elm disease. Other important trees and shrubs in this zone include hornbeam, elder, hazel, fly honeysuckle, linden (lime), spindle, yew, alder buckthorn, blackthorn, aspen, European rowan, Swedish whitebeam, juniper, European holly, ivy, dogwood, goat willow, larch, bird cherry, wild cherry, maple, ash, alder along creeks, and in sandy soil birch compete with pine. Spruce is not native but between approximately 1870 and 1980, large areas were planted with it. They tend to grow too quickly due to being outside of their native range and large distances between the tree rings cause poor board quality. Later some spruce trees began to die before reaching optimal height, and many more of the coniferous trees were uprooted during cyclones. During the last 40–50 years large areas of former spruce plantings have been replanted with deciduous forest.Southern coniferous forest zone, also known as the boreo-nemoral region, the southern coniferous forest zone is delimited by the oak's northern natural limit (limes norrlandicus) and the Spruce's southern natural limit, between the southern deciduous zone and the Taiga farther north. In the southern parts of this zone the coniferous species are found, mainly spruce and pine, mixed with various deciduous trees. Birch grows largely everywhere. The beech's northern boundary crosses this zone. This is however not the case with oak and ash. Although in its natural area, also planted Spruce are common, and such woods are very dense, as the spruces can grow very tight, especially in this vegetation zone's southern areas. +The northern coniferous forest zone or the Taiga begins north of the natural boundary of the oak. Of deciduous species the birch is the only one of significance. Pine and spruce are dominant, but the forests are slowly but surely more sparsely grown the farther towards the north it gets. In the extreme north is it difficult to state the trees forms true forests at all, due to the large distances between the trees.The alpine-birch zone, in the Scandinavian mountains, depending on both latitude and altitude, is an area where only a smaller kind of birch (Betula pubescens or B.tortuosa) can grow. Where this vegetation zone ends, no trees grow at all: the bare mountain zone.Sweden had a 2019 Forest Landscape Integrity Index mean score of 5.35/10, ranking it 103rd globally out of 172 countries. + + +== Government and politics == + + +=== Constitutional framework === + +Sweden has four fundamental laws (Swedish: grundlagar) which together form the Constitution: the Instrument of Government (Swedish: Regeringsformen), the Act of Succession (Swedish: Successionsordningen), the Freedom of the Press Act (Swedish: Tryckfrihetsförordningen), and the Fundamental Law on Freedom of Expression (Swedish: Yttrandefrihetsgrundlagen).The public sector in Sweden is divided into two parts: the legal person known as the State (Swedish: staten) and local authorities: the latter include Regional Councils (Swedish: regioner) (renamed from county councils (landsting) in 2020) and local Municipalities (Swedish: kommuner). The local authorities, rather than the State, make up the larger part of the public sector in Sweden. Regional Councils and Municipalities are independent of one another, the former merely covers a larger geographical area than the latter. The local authorities have self-rule, as mandated by the Constitution, and their own tax base. Notwithstanding their self-rule, local authorities are nevertheless in practice dependent upon the State, as the parameters of their responsibilities and the extent of their jurisdiction are specified in the Local Government Act (Swedish: Kommunallagen) passed by the Riksdag.Sweden is a constitutional monarchy, and King Carl XVI Gustaf is the head of state, but the role of the monarch is limited to ceremonial and representative functions. Under the provisions of the 1974 Instrument of Government, the King lacks any formal political power. The King opens the annual Riksdag session, chairs the Special Council held during a change of Government, holds regular Information Councils with the Prime Minister and the Government, chairs the meetings of the Advisory Council on Foreign Affairs (Swedish: Utrikesnämnden), and receives Letters of Credence of foreign ambassadors to Sweden and signs those of Swedish ambassadors sent abroad. In addition, the King pays State Visits abroad and receives those incoming as host. Apart from strictly official duties, the King and the other members of Royal Family undertake a variety of unofficial and other representative duties within Sweden and abroad.Legislative power is vested in the unicameral Riksdag with 349 members. General elections are held every four years, on the second Sunday of September. Legislation may be initiated by the Government or by members of the Riksdag. Members are elected on the basis of proportional representation to a four-year term. The internal workings of the Riksdag are, in addition to the Instrument of Government, regulated by the Riksdag Act (Swedish: Riksdagsordningen). The fundamental laws can be altered by the Riksdag alone; only an absolute majority with two separate votes, separated by a general election in between, is required. + +The Government (Swedish: Regeringen) operates as a collegial body with collective responsibility and consists of the Prime Minister — appointed and dismissed by the Speaker of the Riksdag (following an actual vote in the Riksdag before an appointment can be made) — and other cabinet ministers (Swedish: Statsråd), appointed and dismissed at the sole discretion of the Prime Minister. The Government is the supreme executive authority and is responsible for its actions to the Riksdag.Most of the State administrative authorities (Swedish: statliga förvaltningsmyndigheter) report to the Government, including (but not limited to) the Armed Forces, the Enforcement Authority, the National Library, the Swedish police and the Tax Agency. A unique feature of Swedish State administration is that individual cabinet ministers do not bear any individual ministerial responsibility for the performance of the agencies within their portfolio; as the director-generals and other heads of government agencies reports directly to the Government as a whole; and individual ministers are prohibited to interfere; thus the origin of the pejorative in Swedish political parlance term ministerstyre (English: "ministerial rule") in matters that are to be handled by the individual agencies, unless otherwise specifically provided for in law. +The Judiciary is independent from the Riksdag, Government and other State administrative authorities. The role of judicial review of legislation is not practised by the courts; instead, the Council on Legislation gives non-binding opinions on legality. There is no stare decisis in that courts are not bound by precedent, although it is influential. + + +=== Political parties and elections === + +The Swedish Social Democratic Party has played a leading role in Swedish politics since 1917, after the Reformists had confirmed their strength and the left-wing revolutionaries formed their own party. After 1932, most governments have been dominated by the Social Democrats. Only six general elections since World War II—1976, 1979, 1991, 2006, 2010 and 2022—have given the assembled bloc of centre-right parties enough seats in the Riksdag to form a government. +For over 50 years, Sweden had had five parties who continually received enough votes to gain seats in the Riksdag—the Social Democrats, the Moderate Party, the Centre Party, the Liberal People's Party and the Left Party—before the Green Party became the sixth party in the 1988 election. In the 1991 election, while the Greens lost their seats, two new parties gained seats for the first time: the Christian Democrats and New Democracy. The 1994 election saw the return of the Greens and the demise of New Democracy. It was not until elections in 2010 that an eighth party, the Sweden Democrats, gained Riksdag seats. In the elections to the European Parliament, parties who have failed to pass the Riksdag threshold have managed to gain representation at that venue: the June List (2004–2009), the Pirate Party (2009–2014), and Feminist Initiative (2014–2019). + +In the 2006 general election the Moderate Party formed the centre-right Alliance for Sweden bloc and won a majority of the Riksdag seats. In the 2010 general election the Alliance contended against a unified left block consisting of the Social Democrats, the Greens and the Left Party. The Alliance won a plurality of 173 seats, but remained two seats short of a 175-seat majority. Nevertheless, neither the Alliance, nor the left block, chose to form a coalition with the Sweden Democrats.The outcome of the 2014 general election resulted in the attainment of more seats by the three centre-left parties in comparison to the centre-right Alliance for Sweden, with the two blocs receiving 159 and 141 seats respectively. The non-aligned Sweden Democrats more than doubled their support and won the remaining 49 seats. On 3 October 2014, Stefan Löfven formed a minority government consisting of the Social Democrats and the Greens.In August 2021, Prime Minister Stefan Löfven announced his resignation and finance minister Magdalena Andersson was elected as the new head of Sweden's ruling Social Democrats in November 2021. On 30 November 2021, Magdalena Andersson became Sweden's first female prime minister. She formed a minority government made up of only her Social Democrats. Her plan for forming a new coalition government with the Green Party was unsuccessful the coalition partner left after her budget proposal failed to pass. In the 2022 election, the remnants of the Alliance were able to secure a narrow majority. This was backed up by the surging Sweden Democrats becoming the second largest party. The election saw Andersson resigning from her post, with the Moderate leader Ulf Kristersson the likely replacement. The election saw the right-wing coalition win dozens of small towns always dominated by the left, while suffering major losses in the big cities.Election turnout in Sweden has always been high by international comparison. Although it declined in recent decades, the latest elections saw an increase in voter turnout (80.11% in 2002, 81.99% in 2006, 84.63% in 2010, 85.81 in 2014) and 87.18% in 2018. Swedish politicians enjoyed a high degree of confidence from the citizens in the 1960s, However, that level of confidence has since declined steadily, and is now at a markedly lower level than in its Scandinavian neighbours. + + +=== Administrative divisions === + +Sweden is a unitary state divided into 21 regions (regioner) and 290 municipalities (kommuner). Every region corresponds to a county (län) with a number of municipalities per county. Regions and municipalities are both local government but have different roles and separate responsibilities. Health care, public transport and certain cultural institutions are administered by regional councils. Preschools, primary and secondary schooling, public water utilities, garbage disposal, elderly care and rescue services are administered by the municipalities. Gotland is a special case of being a region with only one municipality and the functions of region and municipality are performed by the same organisation.Municipal and region government in Sweden is similar to city commission and cabinet-style council government. Both levels have legislative assemblies (municipal councils and region assemblies of between 31 and 101 members (always an uneven number) that are elected from party-list proportional representation at the general election which are held every four years in conjunction with the national parliamentary elections. +Municipalities are also divided into a total of 2,512 parishes (församlingar). These have no official political responsibilities but are traditional subdivisions of the Church of Sweden and still have some importance as census districts for census-taking and elections. +The Swedish central government has 21 County Administrative Boards (Swedish: länsstyrelser), which are responsible for regional state administration not assigned to other government agencies or local government. Each county administrative board is led by a County Governor (Swedish: landshövding) appointed for a term of six years. The list of previous officeholders for the counties stretches back, in most cases, to 1634 when the counties were created by Lord High Chancellor Count Axel Oxenstierna. The main responsibility of the County Administrative Board is to co-ordinate the development of the county in line with goals set by the Riksdag and Government. +There are older historical divisions, primarily the twenty-five provinces and three lands, which still retain cultural significance. + + +=== Political history === + +The actual age of the kingdom of Sweden is unknown. Establishing the age depends mostly on whether Sweden should be considered a nation when the Svear (Sweonas) ruled Svealand or if the emergence of the nation started with the Svear and the Götar (Geats) of Götaland being united under one ruler. In the first case, Svealand was first mentioned as having one single ruler in the year 98 by Tacitus, but it is almost impossible to know for how long it had been this way. However, historians usually start the line of Swedish monarchs from when Svealand and Götaland were ruled under the same king, namely Eric the Victorious (Geat) and his son Olof Skötkonung in the tenth century. These events are often described as the consolidation of Sweden, although substantial areas were conquered and incorporated later. +Earlier kings, for which no reliable historical sources exist, can be read about in mythical kings of Sweden and semi-legendary kings of Sweden. Many of these kings are only mentioned in various saga and blend with Norse mythology. +The title Sveriges och Götes Konung was last used for Gustaf I of Sweden, after which the title became "King of Sweden, of the Goths and of the Wends" (Sveriges, Götes och Vendes Konung) in official documentation. Up until the beginning of the 1920s, all laws in Sweden were introduced with the words, "We, the king of Sweden, of the Goths and Wends". This title was used up until 1973. The present King of Sweden, Carl XVI Gustaf, was the first monarch officially proclaimed "King of Sweden" (Sveriges Konung) with no additional peoples mentioned in his title. +The term riksdag was used for the first time in the 1540s, although the first meeting where representatives of different social groups were called to discuss and determine affairs affecting the country as a whole took place as early as 1435, in the town of Arboga. During the Riksdag assemblies of 1527 and 1544, under King Gustav Vasa, representatives of all four estates of the realm (clergy, nobility, townsmen and peasants) were called on to participate for the first time. The monarchy became hereditary in 1544. +Executive power was historically shared between the King and an aristocratic Privy council until 1680, followed by the King's autocratic rule initiated by the commoner estates of the Riksdag. As a reaction to the failed Great Northern War, a parliamentary system was introduced in 1719, followed by three different flavours of constitutional monarchy in 1772, 1789 and 1809, the latter granting several civil liberties. Already during the first of those three periods, the 'Era of Liberty' (1719–72) the Swedish Rikstag had developed into a very active Parliament, and this tradition continued into the nineteenth century, laying the basis for the transition towards modern democracy at the end of that century.In 1866, Sweden became a constitutional monarchy with a bicameral parliament, with the First Chamber indirectly elected by local governments, and the Second Chamber directly elected in national elections every four years. In 1971 the parliament became unicameral. Legislative power was (symbolically) shared between the King and the Riksdag until 1975. Swedish taxation is controlled by the Riksdag. + +Sweden has a history of strong political involvement by ordinary people through its "popular movements" (Folkrörelser), the most notable being trade unions, the independent Christian movement, the temperance movement, the women's movement, and the intellectual property pirate movements. Sweden was the first country in the world to outlaw corporal punishment of children by their parents (parents' right to spank their own children was first removed in 1966, and it was explicitly prohibited by law from July 1979). +Sweden is currently leading the EU in statistics measuring equality in the political system and equality in the education system. The Global Gender Gap Report 2006 ranked Sweden as the number one country in terms of gender equality.Some Swedish political figures have become known worldwide, among these are: Raoul Wallenberg, Folke Bernadotte, the former Secretary-General of the United Nations Dag Hammarskjöld, the former Prime Minister Olof Palme, the former Prime Minister and later Foreign minister Carl Bildt, the former President of the General Assembly of the United Nations Jan Eliasson, and the former International Atomic Energy Agency Iraq inspector Hans Blix. + + +=== Judicial system === + +The courts are divided into two parallel and separate systems: The general courts (allmänna domstolar) for criminal and civil cases, and general administrative courts (allmänna förvaltningsdomstolar) for cases relating to disputes between private persons and the authorities. Each of these systems has three tiers, where the top tier court of the respective system typically only will hear cases that may become precedent. There are also a number of special courts, which will hear a narrower set of cases, as set down by legislation. While independent in their rulings, some of these courts are operated as divisions within courts of the general or general administrative courts. + +The Supreme Court of Sweden (Swedish: Högsta domstolen) is the third and final instance in all civil and criminal cases in Sweden. Before a case can be decided by the Supreme Court, leave to appeal must be obtained, and with few exceptions, leave to appeal can be granted only when the case is of interest as a precedent. The Supreme Court consists of 16 Justices (Swedish: justitieråd), appointed by the Government, but the court as an institution is independent of the Riksdag, and the Government is not able to interfere with the decisions of the court. +According to a victimisation survey of 1,201 residents in 2005, Sweden has above-average crime rates compared to other EU countries. Sweden has high or above-average levels of assaults, sexual assaults, hate crimes, and consumer fraud. Sweden has low levels of burglary, car theft and drug problems. Bribe seeking is rare.A mid-November 2013 news report announced that four prisons in Sweden were closed during the year due to a significant drop in the number of inmates. The decrease in the number of Swedish prisoners was considered "out-of-the-ordinary" by the head of Sweden's prison and probation services, with prison numbers in Sweden falling by around 1% a year since 2004. Prisons were closed in the towns of Åby, Håja, Båtshagen, and Kristianstad. + + +=== Foreign relations === + +Throughout the 20th century, Swedish foreign policy was based on the principle of non-alignment in peacetime and neutrality in wartime. Sweden's government pursued an independent course of nonalignment in times of peace so that neutrality would be possible in the event of war.Sweden's doctrine of neutrality is often traced back to the 19th century as the country has not been in a state of war since the end of the Swedish campaign against Norway in 1814. During World War II Sweden joined neither the allied nor axis powers. This has sometimes been disputed since in effect Sweden allowed in select cases the Nazi regime to use its railroad system to transport troops and goods, especially iron ore from mines in northern Sweden, which was vital to the German war machine. However, Sweden also indirectly contributed to the defence of Finland in the Winter War, and permitted the training of Norwegian and Danish troops in Sweden after 1943. + +During the early Cold War era, Sweden combined its policy of non-alignment and a low profile in international affairs with a security policy based on strong national defence. The function of the Swedish military was to deter attack. At the same time, the country maintained relatively close informal connections with the Western bloc, especially in the realm of intelligence exchange. In 1952, a Swedish DC-3 was shot down over the Baltic Sea by a Soviet MiG-15 jet fighter. Later investigations revealed that the plane was actually gathering information for NATO. Another plane, a Catalina search and rescue plane, was sent out a few days later and shot down by the Soviets as well. Prime Minister Olof Palme made an official visit to Cuba during the 1970s, during which he denounced Fulgencio Batista's government and praised contemporary Cuban and Cambodian revolutionaries in a speech. +Beginning in the late 1960s, Sweden attempted to play a more significant and independent role in international relations. It involved itself significantly in international peace efforts, especially through the United Nations, and in support to the Third World. +On 27 October 1981, a Whiskey-class submarine (U 137) from the Soviet Union ran aground close to the naval base at Karlskrona in the southern part of the country. Research has never clearly established whether the submarine ended up on the shoals through a navigational mistake or if an enemy committed espionage against Swedish military potential. The incident triggered a diplomatic crisis between Sweden and the Soviet Union. Following the 1986 assassination of Olof Palme and with the end of the Cold War, Sweden has adopted a more traditional foreign policy approach. Nevertheless, the country remains active in peacekeeping missions and maintains a considerable foreign aid budget. +Since 1995 Sweden has been a member of the European Union, and as a consequence of a new world security situation the country's foreign policy doctrine has been partly modified, with Sweden playing a more active role in European security co-operation. In 2022, in response to Russia's invasion of Ukraine, Sweden moved to formally join the NATO alliance. The secretary general of NATO Jens Stoltenberg spoke of a fast-track membership process of just a few weeks, however NATO member Turkey has repeatedly hindered Sweden from joining the alliance, demanding Swedish action against the PKK and for Sweden to extradite alleged Kurdish "terrorists" to Turkey, the situation straining relations between the two countries. Turkey has maintained links with Russia since its invasion of Ukraine in 2022. + + +=== Military === + +The law is enforced in Sweden by several government entities. The Swedish police is a Government agency concerned with police matters. The National Task Force is a national SWAT unit within the police force. The Swedish Security Service's responsibilities are counter-espionage, anti-terrorist activities, protection of the constitution and protection of sensitive objects and people. +The Försvarsmakten (Swedish Armed Forces) are a government agency reporting to the Swedish Ministry of Defence and responsible for the peacetime operation of the armed forces of Sweden. The primary task of the agency is to train and deploy peacekeeping forces abroad, while maintaining the long-term ability to refocus on the defence of Sweden in the event of war. The armed forces are divided into Army, Air Force and Navy. The head of the armed forces is the Supreme Commander (Överbefälhavaren, ÖB), the most senior commissioned officer in the country. Up to 1974, the King was pro forma Commander-in-Chief, but in reality it was clearly understood through the 20th century that the monarch would have no active role as a military leader. + +Until the end of the Cold War, nearly all males reaching the age of military service were conscripted. In recent years, the number of conscripted males has shrunk dramatically, while the number of female volunteers has increased slightly. Recruitment has generally shifted towards finding the most motivated recruits, rather than solely focusing on those otherwise most fit for service. By law, all soldiers serving abroad must be volunteers. In 1975, the total number of conscripts was 45,000. By 2003, it was down to 15,000. +On 1 July 2010, Sweden ended routine conscription, switching to an all-volunteer force unless otherwise required for defence readiness. Emphasis was to be placed on only recruiting those later prepared to volunteer for international service. The total forces gathered would consist of about 60,000 personnel. This in comparison with the 1980s, before the fall of the Soviet Union, when Sweden could gather up to 1,000,000 servicemembers. +However, on 11 December 2014, due to tensions in the Baltic area, the Swedish Government reintroduced one part of the Swedish conscription system, refresher training. On 2 March 2017, the government decided to reintroduce the remaining part of the Swedish conscription system, basic military training. The first recruits began their training in 2018. As the law is now gender neutral, both men and women may have to serve. Sweden decided not to sign the UN treaty on the Prohibition of Nuclear Weapons.Swedish units have taken part in peacekeeping operations in the Democratic Republic of the Congo, Cyprus, Bosnia and Herzegovina, Kosovo, Liberia, Lebanon, Afghanistan and Chad. + + +== Economy == + +Sweden is the twelfth-richest country in the world in terms of GDP (gross domestic product) per capita and a high standard of living is experienced by its citizens. Sweden is an export-oriented mixed economy. Timber, hydropower and iron ore constitute the resource base of an economy with a heavy emphasis on foreign trade. Sweden's engineering sector accounts for 50% of output and exports, while telecommunications, the automotive industry and the pharmaceutical industries are also of great importance. Sweden is the ninth-largest arms exporter in the world. Agriculture accounts for 2% of GDP and employment. The country ranks among the highest for telephone and Internet access penetration.Trade unions, employers' associations and collective agreements cover a large share of the employees in Sweden. The high coverage of collective agreements is achieved despite the absence of state mechanisms extending collective agreements to whole industries or sectors. Both the prominent role of collective bargaining and the way in which the high rate of coverage is achieved reflect the dominance of self-regulation (regulation by the labour market parties themselves) over state regulation in Swedish industrial relations. When the Swedish Ghent system was changed in 2007, resulting in considerably raised fees to unemployment funds, a substantial decline in union density and density of unemployment funds occurred. + +In 2010, Sweden's income Gini coefficient was the third lowest among developed countries, at 0.25—slightly higher than Japan and Denmark—suggesting Sweden had low income inequality. However, Sweden's wealth Gini coefficient at 0.853 was the second highest in developed countries, and above European and North American averages, suggesting high wealth inequality. Even on a disposable income basis, the geographical distribution of Gini coefficient of income inequality varies within different regions and municipalities of Sweden. Danderyd, outside Stockholm, has Sweden's highest Gini coefficient of income inequality, at 0.55, while Hofors near Gävle has the lowest at 0.25. In and around Stockholm and Scania, two of the more densely populated regions of Sweden, the income Gini coefficient is between 0.35 and 0.55.In terms of structure, the Swedish economy is characterised by a large, knowledge-intensive and export-oriented manufacturing sector; an increasing, but comparatively small, business service sector; and by international standards, a large public service sector. Large organisations, both in manufacturing and services, dominate the Swedish economy. High and medium-high technology manufacturing accounts for 9.9% of GDP.The 20 largest (by turnover) registered Swedish companies in 2007 were Volvo, Ericsson, Vattenfall, Skanska, Sony Ericsson Mobile Communications AB, Svenska Cellulosa Aktiebolaget, Electrolux, Volvo Personvagnar, TeliaSonera, Sandvik, Scania, ICA, Hennes & Mauritz, IKEA, Nordea, Preem, Atlas Copco, Securitas, Nordstjernan and SKF. The vast majority of Sweden's industry is privately controlled, unlike many other industrialised Western countries, and, in accordance with a historical standard, publicly owned enterprises are of minor importance. + +An estimated 4.5 million Swedish residents are employed, and around a third of the workforce completed tertiary education. In terms of GDP per-hour-worked, Sweden was the world's ninth highest in 2006 at US$31, compared to US$22 in Spain and US$35 in the United States. GDP per-hour-worked is growing 2.5% per year for the economy as a whole and the trade-terms-balanced productivity growth is 2%. According to the OECD, deregulation, globalisation, and technology sector growth have been key productivity drivers. Sweden is a world leader in privatised pensions and pension funding problems are relatively small compared to many other Western European countries. A pilot program to test the feasibility of a six-hour workday, without loss of pay, will commence in 2014, involving the participation of Gothenburg municipal staff. The Swedish government is seeking to reduce its costs through decreased sick leave hours and increased efficiency. + +The typical worker receives 40% of his or her labour costs after the tax wedge. Total tax collected by Sweden as a percentage of its GDP peaked at 52.3% in 1990. The country faced a real estate and banking crisis in 1990–1991, and consequently passed tax reforms in 1991 to implement tax rate cuts and tax base broadening over time. Since 1990, taxes as a percentage of GDP collected by Sweden have been dropping, with total tax rates for the highest income earners dropping the most. In 2010 45.8% of the country's GDP was collected as taxes, the second highest among OECD countries, and nearly double the percentage in the US or South Korea. Tax income-financed employment represents a third of the Swedish workforce, a substantially higher proportion than in most other countries. Overall, GDP growth has been fast since reforms—especially those in manufacturing—were enacted in the early 1990s. + +Sweden is the fourth-most competitive economy in the world, according to the World Economic Forum in its Global Competitiveness Report 2012–2013. Sweden is the top performing country in the 2014 Global Green Economy Index (GGEI). Sweden is ranked fourth in the IMD World Competitiveness Yearbook 2013. According to the book The Flight of the Creative Class by the US economist Professor Richard Florida of the University of Toronto, Sweden is ranked as having the best creativity in Europe for business and is predicted to become a talent magnet for the world's most purposeful workers. The book compiled an index to measure the kind of creativity it claims is most useful to business—talent, technology and tolerance.Sweden maintains its own currency, the Swedish krona (SEK), a result of the Swedes having rejected the euro in a referendum. The Swedish Riksbank—founded in 1668 and thus the oldest central bank in the world—is currently focusing on price stability with an inflation target of 2%. According to the Economic Survey of Sweden 2007 by the OECD, the average inflation in Sweden has been one of the lowest among European countries since the mid-1990s, largely because of deregulation and quick utilisation of globalisation.The largest trade flows are with Germany, the United States, Norway, the United Kingdom, Denmark and Finland. +Financial deregulation in the 1980s adversely affected the property market, leading to a bubble and eventually a crash in the early 1990s. Commercial property prices fell by up to two thirds, resulting in two Swedish banks having to be taken over by the government. In the following two decades the property sector strengthened. By 2014, legislators, economists and the IMF were again warning of a bubble with residential property prices soaring and the level of personal mortgage debt expanding. Household debt-to-income rose above 170% as the IMF was calling on legislators to consider zoning reform and other means of generating a greater supply of housing as demand was outstripping what was available, pushing prices higher. By August 2014, 40% of home borrowers had interest-only loans while those that did not were repaying principal at a rate that would take 100 years to fully repay. + + +=== Energy === + +Sweden's energy market is largely privatised. The Nordic energy market is one of the first liberalised energy markets in Europe and it is traded in NASDAQ OMX Commodities Europe and Nord Pool Spot. In 2006, out of a total electricity production of 139 TWh, electricity from hydropower accounted for 61 TWh (44%), and nuclear power delivered 65 TWh (47%). At the same time, the use of biofuels, peat etc. produced 13 TWh (9%) of electricity, while wind power produced 1 TWh (1%). Sweden was a net importer of electricity by a margin of 6 TWh. Biomass is mainly used to produce heat for district heating and central heating and industry processes. +Sweden joined the International Energy Agency in 1974, after the 1973 oil crisis strengthened Sweden's commitment to decrease dependence on imported fossil fuels. To protect against unexpected oil supply shocks and in accordance with international commitments made through the IEA, Sweden maintains a strategic petroleum reserve of at least 90 days of net oil imports. As of February 2022, Sweden's oil reserves totalled 130 days’ worth of net imports. Sweden has moved to generate electricity mostly from hydropower and nuclear power. The use of nuclear power has been limited, however. Among other things, the accident of Three Mile Island Nuclear Generating Station (United States) prompted the Riksdag to ban new nuclear plants. In March 2005, an opinion poll showed that 83% supported maintaining or increasing nuclear power.Sweden is considered a "global leader" in decarbonisation. Politicians have made announcements about oil phase-out in Sweden, decrease of nuclear power, and multibillion-dollar investments in renewable energy and energy efficiency. The country has for many years pursued a strategy of indirect taxation as an instrument of environmental policy, including energy taxes in general and carbon dioxide taxes in particular. Sweden was the first nation to implement carbon pricing, and its carbon prices remain the highest in the world as of 2020. This model has been shown to be particularly effective at decarbonizing the nation's economy. In 2014, Sweden was net exporter of electricity by a margin of 16 TWh; the production from wind power mills had increased to 11.5 TWh. + + +=== Transport === + +Sweden has 162,707 km (101,101 mi) of paved road and 1,428 km (887 mi) of expressways. Motorways run through Sweden and over the Øresund Bridge to Denmark. New motorways are still under construction and a new motorway from Uppsala to Gävle was finished on 17 October 2007. Sweden had left-hand traffic (vänstertrafik in Swedish) from approximately 1736 and continued to do so well into the 20th century. Voters rejected right-hand traffic in 1955, but after the Riksdag passed legislation in 1963 changeover took place on 3 September 1967, known in Swedish as Dagen H. +The Stockholm metro is the only underground system in Sweden and serves the city of Stockholm via 100 stations. The rail transport market is privatised, but while there are many privately owned enterprises, the largest operators are still owned by the state. The counties have financing, ticket and marketing responsibility for local trains. For other trains the operators handle tickets and marketing themselves. Operators include SJ, Veolia Transport, Green Cargo, Tågkompaniet and Inlandsbanan. Most of the railways are owned and operated by Trafikverket. + +Most tram networks were closed in 1967, as Sweden changed from left-side to right-side driving. But they survived in Norrköping, Stockholm and Gothenburg, with Gothenburg tram network being the largest. A new tram line opened in Lund on 13 December 2020. +The largest airports include Stockholm–Arlanda Airport (16.1 million passengers in 2009) 40 km (25 mi) north of Stockholm, Göteborg Landvetter Airport (4.3 million passengers in 2008), and Stockholm–Skavsta Airport (2.0 million passengers). Sweden hosts the two largest port companies in Scandinavia, Port of Göteborg AB (Gothenburg) and the transnational company Copenhagen Malmö Port AB. The most used airport for a large part of Southern Sweden is Kastrup or Copenhagen Airport which is located only 12 minutes by train from the closest Swedish railway station, Hyllie. Copenhagen Airport also is the largest international airport in Scandinavia and Finland. +Sweden also has a number of car ferry connections to several neighbouring countries. This includes a route from Umeå across the Gulf of Bothnia to Vaasa in Finland. There are several connections from the Stockholm area across the Sea of Åland to Mariehamn in Åland as well as Turku and Helsinki on the Finnish mainland and beyond to Estonia and St Petersburg in Russia. Ferry routes from the Stockholm area also connect with Ventspils and Riga in Latvia as well as Gdańsk in Poland across the Baltic Sea. The ferry ports of Karlskrona and Karlshamn in southeastern Sweden serve Gdynia, Poland, and Klaipėda, Lithuania. Ystad and Trelleborg near the southern tip of Sweden have ferry links with the Danish island of Bornholm and the German ports of Sassnitz, Rostock and Travemünde, respectively, and ferries run to Świnoujście, Poland, from both of them. Trelleborg is the busiest ferry port in Sweden in terms of weight transported by lorry. Its route to Sassnitz started as a steam-operated railway ferry in the 19th century, and today's ferry still carries trains to Berlin during the summer months. Another ferry route to Travemünde originates from Malmö. Despite the opening of the fixed link to Denmark, the Øresund Bridge, the busiest ferry route remains the short link across the narrowest section of the Øresund between Helsingborg and the Danish port of Helsingør, known as the HH Ferry route. There are over seventy departures a day each way; during peak times, a ferry departs every fifteen minutes. Ports higher up the Swedish west coast include Varberg, with a ferry connection across the Kattegat to Grenaa in Denmark, and Göteborg, serving Frederikshavn at the northern tip of Denmark and Kiel in Germany. Finally, there are ferries from Strömstad near the Norwegian border to destinations around the Oslofjord in Norway. There used to be ferry services to the United Kingdom from Göteborg to destinations such as Immingham, Harwich and Newcastle, but these have been discontinued. +Sweden has two domestic ferry lines with large vessels, both connecting Gotland with the mainland. The lines leave from Visby harbour on the island, and the ferries sail to either Oskarshamn or Nynäshamn. A smaller car ferry connects the island of Ven in Øresund with Landskrona. + + +=== Public policy === + +Sweden has one of the most highly developed welfare states in the world. According to a 2012 OECD report, the country had the second-highest public social spending as a percentage of its GDP after France (27.3% and 28.4%, respectively), and the third-highest total (public and private) social spending at 30.2% of its GDP, after France and Belgium (31.3% and 31.0%, respectively). Sweden spent 6.3% of its GDP, the ninth-highest among 34 OECD countries, to provide equal access to education. On health care, the country spent 10.0% of its total GDP, the 12th highest.Historically, Sweden provided solid support for free trade (except agriculture) and mostly relatively strong and stable property rights (both private and public), though some economists have pointed out that Sweden promoted industries with tariffs and used publicly subsidised R&D during the country's early critical years of industrialisation. After World War II a succession of governments expanded the welfare state by raising the taxes. During this period Sweden's economic growth was also one of the highest in the industrial world. A series of successive social reforms transformed the country into one of the most equal and developed on earth. The consistent growth of the welfare state led to Swedes achieving unprecedented levels of social mobility and quality of life—to this day Sweden consistently ranks at the top of league tables for health, literacy and Human Development—far ahead of some wealthier countries (for example the United States).However, from the 1970s and onwards Sweden's GDP growth fell behind other industrialised countries and the country's per capita ranking fell from fourth to 14th place in a few decades. From the mid-1990s until today Sweden's economic growth has once again accelerated and has been higher than in most other industrialised countries (including the US) during the last 15 years. A report from the United Nations Development Program predicted that Sweden's rating on the Human Development Index will fall from 0.949 in 2010 to 0.906 in 2030.Sweden began slowing the expansion of the welfare state in the 1980s, and even trimming it back. Sweden has been relatively quick to adopt neoliberal policies, such as privatisation, financialisation and deregulation, compared to countries such as France. The current Swedish government is continuing the trend of moderate rollbacks of previous social reforms. Growth has been higher than in many other EU-15 countries. Also since the mid-1980s, Sweden has had the fastest growth in inequality of any developed nation, according to the OECD. This has largely been attributed to the reduction in state benefits and a shift toward the privatisation of public services. According to Barbro Sorman, an activist of the opposition Left Party, "The rich are getting richer, and the poor are getting poorer. Sweden is starting to look like the USA." Nevertheless, it remains far more egalitarian than most nations. Partly as a result of these privatisations and widening economic disparity, the Swedes in the 2014 elections put the Social Democrats back in power.Sweden adopted free market agricultural policies in 1990. Since the 1930s, the agricultural sector had been subject to price controls. In June 1990, the Riksdag voted for a new agricultural policy marking a significant shift away from price controls. As a result, food prices fell somewhat. However, the liberalisations soon became moot because EU agricultural controls supervened.Since the late 1960s, Sweden has had the highest tax quota (as percentage of GDP) in the industrialised world, although today the gap has narrowed and Denmark has surpassed Sweden as the most heavily taxed country among developed countries. Sweden has a two-step progressive tax scale with a municipal income tax of about 30% and an additional high-income state tax of 20–25% when a salary exceeds roughly 320,000 SEK per year. Payroll taxes amount to 32%. In addition, a national VAT of 25% is added to many things bought by private citizens, with the exception of food (12% VAT), transportation, and books (6% VAT). Certain items are subject to additional taxes, e.g. electricity, petrol/diesel and alcoholic beverages. +In 2007, total tax revenue was 47.8% of GDP, the second-highest tax burden among developed countries, down from 49.1% 2006. Sweden's inverted tax wedge – the amount going to the service worker's wallet – is approximately 15%, compared to 10% in Belgium, 30% in Ireland, and 50% in the United States. Public sector spending amounts to 53% of the GDP. State and municipal employees total around a third of the workforce, much more than in most Western countries. Only Denmark has a larger public sector (38% of Danish workforce). Spending on transfers is also high. +In 2015 and 2016, 69 per cent of the employed workers is organised in trade unions. Union density in 2016 was 62% among blue-collar-workers (most of them in the Swedish Trade Union Confederation, LO) and 75% among white-collar workers (most of them in the Swedish Confederation of Professional Employees, TCO, and the Swedish Confederation of Professional Associations, SACO). Sweden has state-supported union unemployment funds (Ghent system). Trade unions have the right to elect two representatives to the board in all Swedish companies with more than 25 employees. Sweden has a relatively high amount of sick leave per worker in OECD: the average worker loses 24 days due to sickness.The unemployment rate was 7.2% in May 2017 while the employment rate was 67.4%, with the workforce consisting of 4,983,000 people while 387,000 are unemployed. Unemployment among youth (aged 24 or younger) in 2012 was 24.2%, making Sweden the OECD country with the highest ratio of youth unemployment versus unemployment in general. + + +=== Science and technology === + +In the 18th century, Sweden's scientific revolution took off. Previously, technical progress had mainly come from mainland Europe. +In 1739, the Royal Swedish Academy of Sciences was founded, with people such as Carl Linnaeus and Anders Celsius as early members. Many of the companies founded by early pioneers still remain major international brands. Gustaf Dalén founded AGA, and received the Nobel Prize for his sun valve. Alfred Nobel invented dynamite and instituted the Nobel Prizes. Lars Magnus Ericsson started the company bearing his name, Ericsson, still one of the largest telecom companies in the world. Jonas Wenström was an early pioneer in alternating current and is along with Serbian-American inventor Nikola Tesla credited as one of the inventors of the three-phase electrical system.The traditional engineering industry is still a major source of Swedish inventions, but pharmaceuticals, electronics and other high-tech industries are gaining ground. Tetra Pak was an invention for storing liquid foods, invented by Erik Wallenberg. Losec, an ulcer medicine, was the world's best-selling drug in the 1990s and was developed by AstraZeneca. More recently Håkan Lans invented the Automatic Identification System, a worldwide standard for shipping and civil aviation navigation. A large portion of the Swedish economy is to this day based on the export of technical inventions, and many large multinational corporations from Sweden have their origins in the ingenuity of Swedish inventors.Swedish inventors held 47,112 patents in the United States in 2014, according to the United States Patent and Trademark Office. As a nation, only ten other countries hold more patents than Sweden.Combined, the public and the private sector in Sweden allocate over 3.5% of GDP to research & development (R&D) per year, making Sweden's investment in R&D as a percentage of GDP the second-highest in the world. For several decades the Swedish government has prioritised scientific and R&D activities. As a percentage of GDP, the Swedish government spends the most of any nation on research and development. Sweden tops other European countries in the number of published scientific works per capita.In 2009, the decisions to construct Sweden's two largest scientific installations, the synchrotron radiation facility MAX IV Laboratory and the European Spallation Source (ESS), were taken. Both installations will be built in Lund. The European Spallation Source, costing some SEK 14 billion to construct, will begin initial operations in 2019 with construction completion scheduled for 2025. The ESS will give an approximately 30 times stronger neutron beam than any of today's existing neutron source installations. The MAX IV, costing some SEK 3 billion, was inaugurated on 21 June 2016. Both facilities have strong implications on material research. Sweden was ranked third in the Global Innovation Index in 2022. + + +=== Waste management === +Sweden is known for its efficient waste management system. Only 0.7% of the total household waste is disposed, and the rest is reused. Around 52% its waste is used for energy production (that is burnt) and 47% recycled. About two million tonnes of waste are imported from neighbouring countries to make profitable recycling products. As of 2023 report, Sweden generated 1.7 billion euros in 2020 (the highest so far was 1.98 billion euros in 2016) from recycling waste. The works are mostly executed through the public organisation, Swedish Waste Management (Avfall Sverige). + + +=== Taxes === + +On average, 27% of taxpayers' money in Sweden goes to education and healthcare, whereas 5% goes to the police and military, and 42% to social security.The typical worker receives 40% of his or her labour costs after the tax wedge. Total tax collected by Sweden as a percentage of its GDP peaked at 52.3% in 1990. The country faced a real estate and banking crisis in 1990–1991, and consequently passed tax reforms in 1991 to implement tax rate cuts and tax base broadening over time. Since 1990, taxes as a percentage of GDP collected by Sweden have been dropping, with total tax rates for the highest income earners dropping the most. In 2010, 45.8% of the country's GDP was collected as taxes, the second highest among OECD countries, and nearly double the percentage in the US or South Korea. + + +=== Pensions === + +Every Swedish resident receives a state pension. Swedish Pensions Agency is responsible for pensions. People who have worked in Sweden, but relocated to another country, can also receive the Swedish pension. There are several types of pensions in Sweden: occupational and private pensions, and national retirement. A person can receive a combination of the various types of pensions. + + +== Demographics == + +The total resident population of Sweden was 10,377,781 in October 2020. The population exceeded 10 million for the first time on Friday 20 January 2017.The average population density is just over 25 people per km2 (65 per square mile), with 1 437 persons per km2 in localities (continuous settlement with at least 200 inhabitants)., 87% of the population live in urban areas, which cover 1.5% of the entire land area. 63% of Swedes are in large urban areas. It is substantially higher in the south than in the north. The capital city Stockholm has a municipal population of about 950,000 (with 1.5 million in the urban area and 2.3 million in the metropolitan area). The second- and third-largest cities are Gothenburg and Malmö. Greater Gothenburg counts just over a million inhabitants and the same goes for the western part of Scania, along the Öresund. The Öresund Region, the Danish-Swedish cross-border region around the Öresund that Malmö is part of, has a population of 4 million. Outside of major cities, areas with notably higher population density include the agricultural part of Östergötland, the western coast, the area around Lake Mälaren and the agricultural area around Uppsala. +Norrland, which covers approximately 60% of the Swedish territory, has a very low population density (below 5 people per square kilometre). The mountains and most of the remote coastal areas are almost unpopulated. Low population density exists also in large parts of western Svealand, as well as southern and central Småland. An area known as Finnveden, which is located in the south-west of Småland, and mainly below the 57th parallel, can also be considered as almost empty of people. +Between 1820 and 1930, approximately 1.3 million Swedes, a third of the country's population at the time, emigrated to North America, and most of them to the United States. There are more than 4.4 million Swedish Americans according to a 2006 US Census Bureau estimate. In Canada, the community of Swedish ancestry is 330,000 strong.There are no official statistics on ethnicity, but according to Statistics Sweden, 2,752,572 (26%) inhabitants of Sweden were of a foreign background in 2021, defined as being born abroad or born in Sweden with both foreign-born parents. Of these inhabitants, 2,090,503 persons were born abroad and 662,069 persons were born in Sweden to parents born abroad. In addition, 805,340 persons had one parent born abroad with the other parent born in Sweden.Sweden has one of the oldest populations in the world, with the average age of 41.1 years. + + +=== Language === + +The official language of Sweden is Swedish, a North Germanic language, related and very similar to Danish and Norwegian, but differing in pronunciation and orthography. Norwegians have little difficulty understanding Swedish, and Danes can also understand it, with slightly more difficulty than Norwegians. The same goes for standard Swedish speakers, who find it far easier to understand Norwegian than Danish. The dialects spoken in Scania, the southernmost part of the country, are influenced by Danish because the region traditionally was a part of Denmark and is nowadays situated closely to it. Sweden Finns are Sweden's largest linguistic minority, comprising about 5% of Sweden's population, and Finnish is recognised as a minority language. Owing to a 21st-century influx of native speakers of Arabic, the use of Arabic is likely more widespread in the country than that of Finnish. However, no official statistics are kept on language use.Along with Finnish, four other minority languages are also recognised: Meänkieli, Sami, Romani, and Yiddish. Swedish became Sweden's official language on 1 July 2009, when a new language law was implemented. The issue of whether Swedish should be declared the official language had been raised in the past, and the Riksdag voted on the matter in 2005, but the proposal narrowly failed.In varying degrees, a majority of Swedes, especially those born after World War II, understand and speak English, owing to trade links, the popularity of overseas travel, a strong Anglo-American influence and the tradition of subtitling rather than dubbing foreign television shows and films, and the relative similarity of the two languages which makes learning English easier. In a 2005 survey by Eurobarometer, 89% of Swedes reported the ability to speak English.English became a compulsory subject for secondary school students studying natural sciences as early as 1849, and has been a compulsory subject for all Swedish students since the late 1940s. Depending on the local school authorities, English is currently a compulsory subject between first grade and ninth grade, with all students continuing in secondary school studying English for at least another year. Most students also study one and sometimes two additional languages. Some Danish and Norwegian is also taught as part of Swedish courses for native speakers. Because of the extensive mutual intelligibility between the three continental Scandinavian languages, Swedish speakers often use their native language when visiting or living in Norway or Denmark. + + +=== Religion === + +Before the 11th century, Swedes adhered to Norse paganism, worshiping Æsir gods, with its centre at the Temple in Uppsala. With Christianisation in the 11th century, the laws of the country changed, forbidding worship of other deities until the late 19th century. After the Protestant Reformation in the 1530s, a change led by Martin Luther's Swedish associate Olaus Petri, the authority of the Roman Catholic Church was abolished and Lutheranism became widespread. Adoption of Lutheranism was completed by the Uppsala Synod of 1593, and it became the official religion. During the era following the Reformation, usually known as the period of Lutheran orthodoxy, small groups of non-Lutherans, especially Calvinist Dutchmen, the Moravian Church and French Huguenots played a significant role in trade and industry, and were quietly tolerated as long as they kept a low religious profile. The Sami originally had their own shamanistic religion, but they were converted to Lutheranism by Swedish missionaries in the 17th and 18th centuries. + +With religious liberalisations in the late 18th century believers of other faiths, including Judaism and Roman Catholicism, were allowed to live and work freely in the country. However, until 1860 it remained illegal for Lutherans to convert to another religion. The 19th century saw the arrival of various evangelical free churches, and, towards the end of the century, secularism, leading many to distance themselves from church rituals. Leaving the Church of Sweden became legal with the so-called Dissenter Act of 1860, but only under the provision of entering another Christian denomination. The right to stand outside any religious denomination was formally established in the law on freedom of religion in 1951. +In 2000, the Church of Sweden was disestablished. Sweden was the second Nordic country to disestablish its state church (after Finland did so in the Church Act of 1869).At the end of 2022, 52.8% of Swedes belonged to the Church of Sweden; this number has been decreasing by 1-2 percentage points each year since 2001. Approximately 2% of the church's members regularly attend Sunday services. The reason for the large number of inactive members is partly that, until 1996, children automatically became members at birth if at least one of the parents was a member. Since 1996, only children and adults who are christened become members. Some 275,000 Swedes are today members of various Evangelical Protestant free churches (where congregation attendance is much higher), and due to recent immigration, there are now some 100,000 Eastern Orthodox Christians and 92,000 Roman Catholics living in Sweden.The first Muslim congregation was established in 1949, when a small contingent of Tatars migrated from Finland. Islam's presence in Sweden remained marginal until the 1960s, when Sweden started to receive migrants from the Balkans and Turkey. Further immigration from North Africa and the Middle East have brought the estimated Muslim population to 600,000. However, only about 110,000 were members of a congregation around 2010.According to the Eurobarometer Poll 2010, +18% of Swedish citizens responded that "they believe there is a god". +45% answered that "they believe there is some sort of spirit or life force". +34% answered that "they do not believe there is any sort of spirit, god, or life force".According to a Demoskop study in 2015, the beliefs of the Swedish showed that + +21% believed in a god (down from 35 percent in 2008). +16% believed in ghosts. +14% believed in creationism or intelligent design.Sociology professor Phil Zuckerman claims that Swedes, despite a lack of belief in God, commonly question the term atheist, preferring to call themselves Christians while being content with remaining in the Church of Sweden. Religion continues to play a role in Swedish cultural identity. This is evidenced by the fact that the majority of Swedish adults continue to remain members of the Lutheran Church despite having to pay a church tax; moreover, rates of baptism remain high and church weddings are increasing in Sweden. + + +=== Health === + +Healthcare in Sweden is mainly tax-funded, universal for all citizens, and decentralised, although private health care also exists. The health care system in Sweden is financed primarily through taxes levied by regional councils and municipalities. A total of 21 councils are in charge of primary and hospital care within the country. +Private healthcare is a rarity in Sweden, and even those private institutions work under the mandated city councils. The city councils regulates the rules and the establishment of potential private practices. While care for the elderly or those who need psychiatric help is conducted privately in many other countries, in Sweden, publicly funded local authorities are in charge of this type of care.Healthcare in Sweden is similar in quality to other developed nations. Sweden ranks in the top five countries with respect to low infant mortality. It also ranks high in life expectancy and in safe drinking water. In 2018, health and medical care represented around 11% of GDP. + + +=== Education === + +Children aged 1–5 years old are guaranteed a place in a public kindergarten (Swedish: förskola or, colloquially, dagis). Between the ages of 6 and 16, children attend compulsory comprehensive school. In the Programme for International Student Assessment (PISA), Swedish 15-year-old pupils score close to the OECD average. After completing the ninth grade, about 90% of the students continue with a three-year upper secondary school (gymnasium), which can lead to both a job qualification or entrance eligibility to university. The school system is largely financed by taxes. +The Swedish government treats public and independent schools equally by introducing education vouchers in 1992 as one of the first countries in the world after the Netherlands. Anyone can establish a for-profit school and the municipality must pay new schools the same amount as municipal schools get. School lunch is free for all students in Sweden, and providing breakfast is also encouraged.There are a number of different universities and colleges in Sweden, the oldest and largest of which are situated in Uppsala, Lund, Gothenburg and Stockholm. In 2000, 32% of Swedish people held a tertiary degree, making the country fifth in the OECD in that category. Along with several other European countries, the government also subsidises tuition of international students pursuing a degree at Swedish institutions, although a recent bill passed in the Riksdag will limit this subsidy to students from EEA countries and Switzerland.The large influx of immigrants to Swedish schools has been cited as a significant part of the reason why Sweden has dropped more than any other European country in the international PISA rankings. + + +== Culture == + +Sweden has many authors of worldwide recognition including August Strindberg, Astrid Lindgren, and Nobel Prize winners Selma Lagerlöf and Harry Martinson. In total seven Nobel Prizes in Literature have been awarded to Swedes. The nation's most well-known artists are painters such as Carl Larsson and Anders Zorn, and the sculptors Tobias Sergel and Carl Milles. +Swedish 20th-century culture is noted by pioneering works in the early days of cinema, with Mauritz Stiller and Victor Sjöström. In the 1920s–1980s, the filmmaker Ingmar Bergman and actors Greta Garbo and Ingrid Bergman became internationally noted people within cinema. More recently, the films of Lukas Moodysson, Lasse Hallström, and Ruben Östlund have received international recognition. +Throughout the 1960s and 1970s, Sweden was seen as an international leader in what is now referred to as the "sexual revolution", with gender equality having particularly been promoted. The early Swedish film I Am Curious (Yellow) (1967) reflected a liberal view of sexuality, including scenes of love making that caught international attention, and introduced the concept of the "Swedish sin" that had been introduced earlier in the US with Ingmar Bergman's Summer with Monika. +The image of "hot love and cold people" emerged. Sexual liberalism was seen as part of modernisation process that by breaking down traditional borders would lead to the emancipation of natural forces and desires.Sweden has also become very liberal towards homosexuality, as is reflected in the popular acceptance of films such as Show Me Love, which is about two young lesbians in the small Swedish town of Åmål. Since 1 May 2009, Sweden repealed its "registered partnership" laws and fully replaced them with gender-neutral marriage. Sweden also offers domestic partnerships for both same-sex and opposite-sex couples. Cohabitation (sammanboende) by couples of all ages, including teenagers as well as elderly couples, is widespread. As of 2009, Sweden is experiencing a baby boom. + + +=== Music === + +Historical re-creations of Norse music have been attempted based on instruments found in Viking sites. The instruments used were the lur (a sort of trumpet), simple string instruments, wooden flutes and drums. Sweden has a significant folk-music scene. The joik, a type of Sami music, is a chant that is part of the traditional Sami animistic spirituality. Notable composers include Carl Michael Bellman and Franz Berwald. +Sweden also has a prominent choral music tradition. Out of a population of 9.5 million, it is estimated that five to six hundred thousand people sing in choirs.In 2007, with over 800 million dollars in revenue, Sweden was the third-largest music exporter in the world and surpassed only by the US and the UK. According to one source 2013, Sweden produces the most chart hits per capita in the world, followed by the UK and the USA. +Sweden has a rather lively jazz scene. During the last sixty years or so it has attained a remarkably high artistic standard, stimulated by domestic as well as external influences and experiences. The Centre for Swedish Folk Music and Jazz Research has published an overview of jazz in Sweden by Lars Westin. + + +=== Architecture === + +Before the 13th century almost all buildings were made of timber, but a shift began towards stone. Early Swedish stone buildings are the Romanesque churches on the countryside. As so happens, many of them were built in Scania and are in effect Danish churches. This would include the Lund Cathedral from the 11th century and the somewhat younger church in Dalby, but also many early Gothic churches built through influences of the Hanseatic League, such as in Ystad, Malmö and Helsingborg. +Cathedrals in other parts of Sweden were also built as seats of Sweden's bishops. The Skara Cathedral is of bricks from the 14th century, and the Uppsala Cathedral in the 15th. In 1230 the foundations of the Linköping Cathedral were made, the material was there limestone, but the building took some 250 years to finish. +Among older structures are also some significant fortresses and other historical buildings such as at Borgholm Castle, Halltorps Manor and Eketorp fortress on the island Öland, the Nyköping fortress and the Visby city wall. + +Around 1520 Sweden was out of the Middle Ages and united under King Gustav Vasa, who immediately initiated grand mansions, castles and fortresses to be built. Some of the more magnificent include Kalmar Castle, Gripsholm Castle and the one at Vadstena. +In the next two centuries, Sweden was designated by Baroque architecture and later the rococo. Notable projects from that time include the city Karlskrona, which has now also been declared a World Heritage Site and the Drottningholm Palace. +1930 was the year of the great Stockholm exhibition, which marked the breakthrough of Functionalism, or funkis as it became known. The style came to dominate in the following decades. Some notable projects of this kind were the Million Programme, offering affordable living in large apartment complexes. +The Ericsson Globe, located in Stockholm, is the largest hemispherical building on Earth. Its dome has a diameter of 110 metres (360 feet) and took two and a half years to build. + + +=== Media === + +Swedes are among the greatest consumers of newspapers in the world, and nearly every town is served by a local paper. The country's main quality morning papers are Dagens Nyheter (liberal), Göteborgs-Posten (liberal), Svenska Dagbladet (liberal conservative) and Sydsvenska Dagbladet (liberal). The two largest evening tabloids are Aftonbladet (social democratic) and Expressen (liberal). The ad-financed, free international morning paper, Metro International, was founded in Stockholm, Sweden. The country's news is reported in English by, among others, The Local (liberal).The public broadcasting companies held a monopoly on radio and television for a long time in Sweden. Licence-funded radio broadcasts started in 1925. A second radio network was started in 1954, and a third opened 1962, in response to pirate radio stations. Non-profit community radio was allowed in 1979 and in 1993 commercial local radio started. +The licence-funded television service was officially launched in 1956. A second channel, TV2, was launched in 1969. These two channels (operated by Sveriges Television since the late 1970s) held a monopoly until the 1980s when cable and satellite television became available. The first Swedish-language satellite service was TV3 which started broadcasting from London in 1987. It was followed by Kanal 5 in 1989 (then known as Nordic Channel) and TV4 in 1990. +In 1991 the government announced it would begin taking applications from private television companies wishing to broadcast on the terrestrial network. TV4, which had previously been broadcasting via satellite, was granted a permit and began its terrestrial broadcasts in 1992, becoming the first private channel to broadcast television content from within the country. +Around half the population are connected to cable television. Digital terrestrial television in Sweden started in 1999 and the last analogue terrestrial broadcasts were terminated in 2007. + + +=== Literature === + +The first literary text from Sweden is the Rök runestone, carved during the Viking Age c. 800 AD. With the conversion of the land to Christianity around 1100 AD, Sweden entered the Middle Ages, during which monastic writers preferred to use Latin. Therefore, there are only a few texts in the Old Swedish from that period. Swedish literature only began to flourish when the language was standardised during the 16th century. This standardisation was largely due to the full translation of the Bible into Swedish in 1541. This translation is the so-called Gustav Vasa Bible. +With improved education and the freedom brought by secularisation, the 17th century saw several notable authors develop the Swedish language further. Some key figures include Georg Stiernhielm (17th century), who was the first to write classical poetry in Swedish; Johan Henric Kellgren (18th century), the first to write fluent Swedish prose; Carl Michael Bellman (late 18th century), the first writer of burlesque ballads; and August Strindberg (late 19th century), a socio-realistic writer and playwright who won worldwide fame. The early 20th century continued to produce notable authors, such as Selma Lagerlöf, (Nobel laureate 1909), Verner von Heidenstam (Nobel laureate 1916) and Pär Lagerkvist (Nobel laureate 1951). +In recent decades, a handful of Swedish writers have established themselves internationally, including the detective novelist Henning Mankell and the writer of spy fiction Jan Guillou. The Swedish writer to have made the most lasting impression on world literature is the children's book writer Astrid Lindgren, and her books about Pippi Longstocking, Emil, and others. In 2008, the second best-selling fiction author in the world was Stieg Larsson, whose Millennium series of crime novels is being published posthumously to critical acclaim. Larsson drew heavily on the work of Lindgren by basing his central character, Lisbeth Salander, on Longstocking. + + +=== Holidays === + +Apart from traditional Protestant Christian holidays, Sweden also celebrates some unique holidays, some of a pre-Christian tradition. They include Midsummer celebrating the summer solstice; Walpurgis Night (Valborgsmässoafton) on 30 April lighting bonfires; and Labour Day or May Day on 1 May is dedicated to socialist demonstrations. The day of giver-of-light Saint Lucia, 13 December, is widely acknowledged in elaborate celebrations which betoken its Italian origin and commence the month-long Christmas season. +6 June is the National Day of Sweden and has since 2005 been a public holiday. Furthermore, there are official flag flying day observances and a Namesdays in Sweden calendar. In August many Swedes have kräftskivor (crayfish dinner parties). Martin of Tours Eve is celebrated in Scania in November with Mårten Gås parties, where roast goose and svartsoppa ('black soup', made of goose stock, fruit, spices, spirits and goose blood) are served. The Sami, one of Sweden's indigenous minorities, have their holiday on 6 February and Scania celebrate their Scanian Flag day on the third Sunday in July. + + +=== Cuisine === + +Swedish cuisine, like that of the other Nordic countries (Denmark, Norway and Finland), was traditionally simple. Fish (particularly herring), meat, potatoes and dairy products played prominent roles. Spices were sparse. Preparations include Swedish meatballs, traditionally served with gravy, boiled potatoes and lingonberry jam; pancakes; pyttipanna, a spiced fried hash of meat and potatoes originally meant to use up any leftovers of meat; lutfisk; and the smörgåsbord, or lavish buffet. Akvavit is a popular alcoholic distilled beverage, and the drinking of snaps is of cultural importance. The traditional flat and dry crisp bread has developed into several contemporary variants. Regionally important foods are the surströmming (a fermented fish) in northern Sweden and eel in southern Sweden. +Swedish traditional dishes, some of which are many hundreds of years old, are still an important part of Swedish everyday meals, in spite of the fact that modern-day Swedish cuisine adopts many international dishes. +In August, at the traditional feast known as crayfish party, kräftskiva, Swedes eat large amounts of crayfish boiled with dill. + + +=== Cinema === + +Swedes have been fairly prominent in the film area through the years. A number of Swedish people have found success in Hollywood, including Ingrid Bergman, Greta Garbo and Max von Sydow. Amongst several directors who have made internationally successful films can be mentioned Ingmar Bergman, Lukas Moodysson and Lasse Hallström. + + +=== Fashion === +Interest in fashion is big in Sweden and the country headquarters famous brands like Hennes & Mauritz (operating as H&M), J. Lindeberg (operating as JL), Acne, Lindex, Odd Molly, Cheap Monday, Gant, WESC, Filippa K, and Nakkna within its borders. These companies, however, are composed largely of buyers who import fashionable goods from throughout Europe and America, continuing the trend of Swedish business toward multinational economic dependency like many of its neighbours. + + +=== Sports === + +Sport activities are a national movement with half of the population actively participating in organised sporting activities. The two main spectator sports are football and ice hockey. Second to football, horse sports (of which most of the participants are women) have the highest number of practitioners. Thereafter, golf, orienteering, gymnastics, track and field, and the team sports of ice hockey, handball, floorball, basketball and bandy are the most popular in terms of practitioners. +The Swedish national men's ice hockey team, affectionately known as Tre Kronor (English: Three Crowns; the national symbol of Sweden), is regarded as one of the best in the world. The team has won the World Championships nine times, placing them third in the all-time medal count. Tre Kronor also won Olympic gold medals in 1994 and 2006. In 2006, Tre Kronor became the first national hockey team to win both the Olympic and world championships in the same year. The Swedish national men's football team has seen some success at the World Cup in the past, finishing second when they hosted the tournament in 1958, and third twice, in 1950 and 1994. +Sweden hosted the 1912 Summer Olympics, Equestrian at the 1956 Summer Olympics and the FIFA World Cup in 1958. Other big sports events include the UEFA Euro 1992, 1995 FIFA Women's World Cup, 1995 World Championships in Athletics, UEFA Women's Euro 2013, and several championships of ice hockey, curling, athletics, skiing, bandy, figure skating and swimming. +In 2016, the Swedish Poker Federation (Svepof) joined The International Federation of Poker (IFP). + + +== See also == +List of Sweden-related topics +Outline of Sweden +329 Svea + + +== Notes == + + +== References == + + +== Further reading == + + +== External links == + +Sweden. The World Factbook. Central Intelligence Agency. +Sweden entry at Britannica.com +Gosse, Edmund William (1887). "Sweden" . Encyclopædia Britannica. Vol. XXII (9th ed.). pp. 736–758. +Gosse, Edmund William; and four others (1911). "Sweden" . Encyclopædia Britannica. Vol. 26 (11th ed.). pp. 188–221. +Hildebrand, Karl (1922). "Sweden" . Encyclopædia Britannica. Vol. 32 (12th ed.). +Sweden from UCB Libraries GovPubs +Sweden at Curlie +Sweden profile from the BBC News + Wikimedia Atlas of Sweden + Geographic data related to Sweden at OpenStreetMap +Key Development Forecasts for Sweden from International Futures +Study in Sweden – official guide to studying in Sweden +Wayback Machine Technological Waves and Economic Growth in Sweden 1850–2005 +Sweden – Economic Growth and Structural Change, 1800–2000 — EH.Net Encyclopedia +vifanord – a digital library that provides scientific information on the Nordic and Baltic countries as well as the Baltic region as a wholePublic sector + +Sweden.se — Sweden's official portal +The Swedish Parliament – official website +The Government of Sweden – official website +The Royal Court Archived 11 October 2016 at the Wayback Machine – official website of the Swedish MonarchyNews media + +Radio Sweden – public service +Sveriges Television (in Swedish) – public service +Dagens Nyheter (in Swedish) +Svenska Dagbladet (in Swedish) +The Local – Sweden's news in English – independent English language news siteTrade + +World Bank Summary Trade Statistics SwedenTravel + +VisitSweden.com – official travel and tourism website for Sweden \ No newline at end of file diff --git a/RAG/notebooks/llamaindex/llamaindex_basic_RAG.ipynb b/RAG/notebooks/llamaindex/llamaindex_basic_RAG.ipynb new file mode 100644 index 00000000..281b0d08 --- /dev/null +++ b/RAG/notebooks/llamaindex/llamaindex_basic_RAG.ipynb @@ -0,0 +1,438 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c5fb0b9e-f9cd-404f-bd8d-0273e94ac1fe", + "metadata": {}, + "source": [ + "# RAG Example Using NVIDIA API Catalog and LlamaIndex" + ] + }, + { + "cell_type": "markdown", + "id": "2969cdab-82fc-4ce5-bde1-b4f629691f27", + "metadata": {}, + "source": [ + "This notebook introduces how to use LlamaIndex to interact with NVIDIA hosted NIM microservices like chat, embedding, and reranking models to build a simple retrieval-augmented generation (RAG) application." + ] + }, + { + "cell_type": "markdown", + "id": "e4253bd0-4313-4056-95f5-899a180879c2", + "metadata": {}, + "source": [ + "## Terminology" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "5a084a00-b65d-483a-a7c6-b4c12e4272dd", + "metadata": {}, + "source": [ + "#### RAG\n", + "\n", + "- RAG is a technique for augmenting LLM knowledge with additional data.\n", + "- LLMs can reason about wide-ranging topics, but their knowledge is limited to the public data up to a specific point in time that they were trained on.\n", + "- If you want to build AI applications that can reason about private data or data introduced after a model's cutoff date, you need to augment the knowledge of the model with the specific information it needs.\n", + "- The process of bringing the appropriate information and inserting it into the model prompt is known as retrieval augmented generation (RAG).\n", + "\n", + "The preceding summary of RAG originates in the LangChain v0.2 tutorial [Build a RAG App](https://python.langchain.com/v0.2/docs/tutorials/rag/) tutorial in the LangChain v0.2 documentation.\n", + "\n", + "For comprehensive information, refer to the LLamaIndex documentation for [Building an LLM Application](https://docs.llamaindex.ai/en/stable/understanding/#:~:text=on%20your%20machine.-,Building%20a%20RAG%20pipeline,-%3A%20Retrieval%2DAugmented%20Generation).\n", + "\n", + "#### NIM\n", + "\n", + "- [NIM microservices](https://developer.nvidia.com/blog/nvidia-nim-offers-optimized-inference-microservices-for-deploying-ai-models-at-scale/) are containerized microservices that simplify the deployment of generative AI models like LLMs and are optimized to run on NVIDIA GPUs. \n", + "- NIM microservices support models across domains like chat, embedding, reranking, and more from both the community and NVIDIA.\n", + "\n", + "#### NVIDIA API Catalog\n", + "\n", + "- [NVIDIA API Catalog](https://build.nvidia.com/explore/discover) is a hosted platform for accessing a wide range of microservices online.\n", + "- You can test models on the catalog and then export them with an NVIDIA AI Enterprise license for on-premises or cloud deployment\n", + "\n", + "#### LlamaIndex Concepts\n", + "\n", + " - `Data connectors` ingest your existing data from their native source and format.\n", + " - `Data indexes` structure your data in intermediate representations that are easy and performant for LLMs to consume.\n", + " - `Engines` provide natural language access to your data for building context-augmented LLM apps.\n", + "\n", + "LlamaIndex also provides integrations like `llms-nvidia`, `embeddings-nvidia` & `nvidia-rerank` to work with NVIDIA microservices." + ] + }, + { + "cell_type": "markdown", + "id": "ca300278-5ff4-47c4-ab70-c6584ef73c9f", + "metadata": {}, + "source": [ + "## Installation and Requirements\n", + "\n", + "Create a Python environment (preferably with Conda) using Python version 3.10.14. \n", + "To install Jupyter Lab, refer to the [installation](https://jupyter.org/install) page." + ] + }, + { + "cell_type": "markdown", + "id": "1b7a52a0-7e5e-4064-9665-cb947d600f84", + "metadata": {}, + "source": [ + "## Getting Started!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36287c2e-8708-4006-8adf-851f06cce02d", + "metadata": {}, + "outputs": [], + "source": [ + "# Requirements\n", + "!pip install --upgrade pip\n", + "!pip install llama-index-core==0.10.50\n", + "!pip install llama-index-readers-file==0.1.25\n", + "!pip install llama-index-llms-nvidia==0.1.3\n", + "!pip install llama-index-embeddings-nvidia==0.1.4\n", + "!pip install llama-index-postprocessor-nvidia-rerank==0.1.2\n", + "!pip install ipywidgets==8.1.3" + ] + }, + { + "cell_type": "markdown", + "id": "04495732-c2db-4c97-91d0-96708814334d", + "metadata": {}, + "source": [ + "To get started you need a `NVIDIA_API_KEY` to use NVIDIA AI Foundation models:\n", + "\n", + "1) Create a free account with [NVIDIA](https://build.nvidia.com/explore/discover).\n", + "2) Click on your model of choice.\n", + "3) Under Input select the Python tab, and click **Get API Key** and then click **Generate Key**.\n", + "4) Copy and save the generated key as NVIDIA_API_KEY. From there, you should have access to the endpoints." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "bbb51115-79f8-48c3-b3ee-d434916945f6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Enter your NVIDIA API key: ········\n" + ] + } + ], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "if not os.environ.get(\"NVIDIA_API_KEY\", \"\").startswith(\"nvapi-\"):\n", + " nvidia_api_key = getpass.getpass(\"Enter your NVIDIA API key: \")\n", + " assert nvidia_api_key.startswith(\"nvapi-\"), f\"{nvidia_api_key[:5]}... is not a valid key\"\n", + " os.environ[\"NVIDIA_API_KEY\"] = nvidia_api_key" + ] + }, + { + "cell_type": "markdown", + "id": "25656ab5-0046-4e27-be65-b3d3d547b4c6", + "metadata": {}, + "source": [ + "## RAG Example using LLM and Embedding" + ] + }, + { + "cell_type": "markdown", + "id": "54e86bc0-e9c5-4a2b-be0e-7fca0331e886", + "metadata": {}, + "source": [ + "### 1) Initialize the LLM\n", + "\n", + "`llama-index-llms-nvidia`, also known as NVIDIA's LLM connector,\n", + "allows your connect to and generate from compatible models available on the NVIDIA API catalog.\n", + "\n", + "Here we will use **mixtral-8x7b-instruct-v0.1** " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75f7bdd3-2c6f-4ba2-bd89-175cedbf4f3b", + "metadata": {}, + "outputs": [], + "source": [ + "# Settings enables global configuration as a singleton object throughout your application.\n", + "# Here, it is used to set the LLM, embedding model, and text splitter configurations globally.\n", + "from llama_index.core import Settings\n", + "from llama_index.llms.nvidia import NVIDIA\n", + "\n", + "# Here we are using mixtral-8x7b-instruct-v0.1 model from API Catalog\n", + "Settings.llm = NVIDIA(model=\"mistralai/mixtral-8x7b-instruct-v0.1\")" + ] + }, + { + "cell_type": "markdown", + "id": "35cc87a6-2f83-4652-95f1-cf349db8bad6", + "metadata": {}, + "source": [ + "### 2) Intiatlize the embedding\n", + "\n", + "We selected **NV-Embed-QA** as the embedding" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d88f7838-b9f9-4fc5-8779-84df6cb26017", + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index.embeddings.nvidia import NVIDIAEmbedding\n", + "Settings.embed_model = NVIDIAEmbedding(model=\"NV-Embed-QA\", truncate=\"END\")" + ] + }, + { + "cell_type": "markdown", + "id": "b9862f2e-5055-4fe4-818d-708091243d74", + "metadata": {}, + "source": [ + "### 3) Obtain some toy text dataset\n", + "Here we are loading a toy data from a text documents and in real-time data can be loaded from various sources. " + ] + }, + { + "cell_type": "markdown", + "id": "851b16b3-43ac-4269-9f37-05a33efe24fb", + "metadata": {}, + "source": [ + "Real world documents can be very long, this makes it hard to fit in the context window of many models. Even for those models that could fit the full post in their context window, models can struggle to find information in very long inputs.\n", + "\n", + "To handle this we’ll split the Document into chunks for embedding and vector storage." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "804c85f6-181b-4291-a685-d6b378015544", + "metadata": {}, + "outputs": [], + "source": [ + "# For this example we load a toy data set (it's a simple text file with some information about Sweden)\n", + "TOY_DATA_PATH = \"./data/\"\n", + "\n", + "from llama_index.core.node_parser import SentenceSplitter\n", + "from llama_index.core import SimpleDirectoryReader\n", + "Settings.text_splitter = SentenceSplitter(chunk_size=400)\n", + "documents = SimpleDirectoryReader(TOY_DATA_PATH).load_data()" + ] + }, + { + "cell_type": "markdown", + "id": "9b0a7da3-b6e7-46f1-9c31-3c6ef5f04d56", + "metadata": {}, + "source": [ + "Note:\n", + " - `SimpleDirectoryReader` takes care of storing basic file information such as the filename, filepath, and file type as metadata by default. This metadata can be used to keep track of the source file, allowing us to use it later for citation or metadata filtering." + ] + }, + { + "cell_type": "markdown", + "id": "f867df18-11c8-45ea-b81c-1603459431f9", + "metadata": {}, + "source": [ + "### 4) Process the documents into VectorStoreIndex\n", + "\n", + "In RAG, your data is loaded and prepared for queries or \"indexed\". User queries act on the index, which filters your data down to the most relevant context. This context and your query then go to the LLM along with a prompt, and the LLM provides a response." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2d7b2fbd-8cb1-4d68-9659-2426b9ecffe3", + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index.core import VectorStoreIndex\n", + "# When you use from_documents, your Documents are split into chunks and parsed into Node objects\n", + "# By default, VectorStoreIndex stores everything in memory\n", + "index = VectorStoreIndex.from_documents(documents)" + ] + }, + { + "cell_type": "markdown", + "id": "3fe85dad-12bb-47d2-a407-9b89b5270d4e", + "metadata": {}, + "source": [ + "### 5) Create a Query Engine to ask question over your data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5de3e07d-5fbe-4fe7-8f23-ed0b082f2413", + "metadata": {}, + "outputs": [], + "source": [ + "# Returns a Query engine for this index.\n", + "query_engine = index.as_query_engine(similarity_top_k=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5aa362c9-48ab-4646-bc29-bc2aca92505d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Sweden is a Northern European country, occupying the eastern part of the Scandinavian Peninsula. It shares borders with Norway to the west and north, Finland to the east, and is linked to Denmark in the southwest by the Öresund Bridge. Sweden is the largest country in Northern Europe and the fifth largest in Europe, with a total area of 449,964 km2. The country stretches between latitudes 55° and 70° N, and mostly between longitudes 11° and 25° E.\n", + "\n", + "Sweden's diverse climate is influenced by its varied topography, which includes a long coastline, numerous lakes, vast forested areas, and the Scandes mountain range that separates it from Norway. The capital and largest city is Stockholm.\n", + "\n", + "Sweden has a population of approximately 10.5 million people, with the majority residing in urban areas. The country is known for its extensive coastline, numerous lakes, and vast forested areas, as well as its commitment to social welfare, gender equality, and environmental sustainability.\n", + "\n", + "Historically, Sweden has maintained a policy of neutrality and non-participation in military alliances. However, it has recently moved towards cooperation with NATO.\n", + "\n", + "Sweden is a highly developed country, ranked seventh in the Human Development Index. It is a constitutional monarchy and parliamentary democracy, with legislative power vested in the 349-member unicameral Riksdag. The country is known for its high standard of living, universal health care, and tertiary education for its citizens.\n", + "\n", + "The official language of Sweden is Swedish, a North Germanic language closely related to Danish and Norwegian. English is widely spoken and understood by a majority of Swedes.\n", + "\n", + "Sweden's economy is mixed and largely service-oriented, with a strong emphasis on engineering, telecommunications, automotive, and pharmaceutical industries. The country is home to several multinational corporations, including IKEA, Volvo, Ericsson, and H&M.\n", + "\n", + "In summary, Sweden is a highly developed, forested country located in Northern Europe, known for its extensive coastline, high standard of living, commitment to social welfare, and diverse climate.\n" + ] + } + ], + "source": [ + "response = query_engine.query(\n", + " \"Tell me about Sweden?\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "c29478b0-0fb1-4678-93cd-b159dc9884a7", + "metadata": {}, + "source": [ + "## RAG Example with LLM, Embedding & Reranking" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "b88a91ed-5905-474e-8d8e-f5d887638a1d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " I don't have information about a \"Nordic Channel\" in the context of your query. However, I can share that the Swedish broadcasting landscape has seen significant developments. Radio broadcasts started in 1925, and in response to pirate radio stations, a second and third network were established in 1954 and 1962, respectively. In 1989, a satellite service known as Kanal 5 began broadcasting, which might be the service you're referring to, although it's not specifically labeled as \"Nordic Channel\" in the information provided.\n" + ] + } + ], + "source": [ + "# Let's test a more complex query using the above LLM Embedding query_engine and see if the reranker can help.\n", + "response = query_engine.query(\n", + " \"What is Nordic Channel?\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "9d3854c7-68a3-45b4-9e69-2c4e583d651f", + "metadata": {}, + "source": [ + "### Enhancing accuracy for single data sources\n", + "\n", + "This example demonstrates how a re-ranking model can be used to combine retrieval results and improve accuracy during retrieval of documents.\n", + "\n", + "Typically, reranking is a critical piece of high-accuracy, efficient retrieval pipelines. Generally, there are two important use cases:\n", + "\n", + "- Combining results from multiple data sources\n", + "- Enhancing accuracy for single data sources\n", + "\n", + "Here, we focus on demonstrating only the second use case." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b7e8677e-a37f-42e2-8fea-4c4413f7d682", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " The Nordic Channel was a Swedish-language satellite service that was launched in 1989. It is now known as Kanal 5.\n" + ] + } + ], + "source": [ + "# We will narrow the collection to 40 results and further narrow it to 4 with the reranker.\n", + "from llama_index.postprocessor.nvidia_rerank import NVIDIARerank\n", + "\n", + "reranker_query_engine = index.as_query_engine(\n", + " similarity_top_k=40, node_postprocessors=[NVIDIARerank(top_n=4)]\n", + ")\n", + "\n", + "response = reranker_query_engine.query(\n", + " \"What is Nordic Channel?\"\n", + ")\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "aa2c289d-b10f-4bad-bb55-edc779e544f4", + "metadata": {}, + "source": [ + "#### Note:\n", + " - In this notebook, we used NVIDIA NIM microservices from the NVIDIA API Catalog.\n", + " - The above APIs, NVIDIA (llms), NVIDIAEmbedding, and NVIDIARerank, also support self-hosted microservices.\n", + " - Change the `base_url` to your deployed NIM URL\n", + " - Example: NVIDIA(model=\"meta/llama3-8b-instruct\", base_url=\"http://your-nim-host-address:8000/v1\")\n", + " - NIM can be hosted locally using Docker, following the [NVIDIA NIM for LLMs](https://docs.nvidia.com/nim/large-language-models/latest/getting-started.html) documentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6ea831a-76a5-431f-9b7f-f5bad5cb567b", + "metadata": {}, + "outputs": [], + "source": [ + "# Example Code snippet if you want to use a self-hosted NIM\n", + "from llama_index.llms.nvidia import NVIDIA\n", + "\n", + "llm = NVIDIA(model=\"meta/llama3-8b-instruct\", base_url=\"http://your-nim-host-address:8000/v1\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (rag_notebooks)", + "language": "python", + "name": "rag_notebooks" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/inputs.jsonl b/RAG/notebooks/nemo/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/inputs.jsonl similarity index 100% rename from notebooks/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/inputs.jsonl rename to RAG/notebooks/nemo/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/inputs.jsonl diff --git a/notebooks/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/judge_prompts.jsonl b/RAG/notebooks/nemo/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/judge_prompts.jsonl similarity index 100% rename from notebooks/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/judge_prompts.jsonl rename to RAG/notebooks/nemo/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/judge_prompts.jsonl diff --git a/notebooks/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/question.jsonl b/RAG/notebooks/nemo/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/question.jsonl similarity index 100% rename from notebooks/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/question.jsonl rename to RAG/notebooks/nemo/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/question.jsonl diff --git a/notebooks/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/reference_answer/references.jsonl b/RAG/notebooks/nemo/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/reference_answer/references.jsonl similarity index 100% rename from notebooks/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/reference_answer/references.jsonl rename to RAG/notebooks/nemo/Nemo Evaluator Llama 3.1 Workbook/custom_dataset/reference_answer/references.jsonl diff --git a/notebooks/Nemo Evaluator Llama 3.1 Workbook/evaluator_notebook.ipynb b/RAG/notebooks/nemo/Nemo Evaluator Llama 3.1 Workbook/evaluator_notebook.ipynb similarity index 100% rename from notebooks/Nemo Evaluator Llama 3.1 Workbook/evaluator_notebook.ipynb rename to RAG/notebooks/nemo/Nemo Evaluator Llama 3.1 Workbook/evaluator_notebook.ipynb diff --git a/RetrievalAugmentedGeneration/Dockerfile b/RAG/src/chain_server/Dockerfile similarity index 52% rename from RetrievalAugmentedGeneration/Dockerfile rename to RAG/src/chain_server/Dockerfile index cb504a08..5ed2364b 100644 --- a/RetrievalAugmentedGeneration/Dockerfile +++ b/RAG/src/chain_server/Dockerfile @@ -10,8 +10,8 @@ ENV DEBIAN_FRONTEND noninteractive RUN apt update && \ apt install -y curl software-properties-common libgl1 libglib2.0-0 && \ add-apt-repository ppa:deadsnakes/ppa && \ - apt update && apt install -y python3.10 python3.10-dev python3.10-distutils && \ - apt-get clean + apt update && apt install -y python3.10 && \ + apt-get clean # Install pip for python3.10 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 @@ -22,34 +22,39 @@ RUN rm -rf /var/lib/apt/lists/* RUN apt autoremove -y curl software-properties-common # Install common dependencies for all examples -RUN --mount=type=bind,source=RetrievalAugmentedGeneration/requirements.txt,target=/opt/requirements.txt \ +RUN --mount=type=bind,source=RAG/src/chain_server/requirements.txt,target=/opt/requirements.txt \ pip3 install --no-cache-dir -r /opt/requirements.txt - + +# Set environment variables needed for Text splitter +RUN mkdir /tmp-data/; mkdir /tmp-data/nltk_data/ +RUN chmod 777 -R /tmp-data +RUN chown 1000:1000 -R /tmp-data +ENV NLTK_DATA=/tmp-data/nltk_data/ +ENV HF_HOME=/tmp-data + +# Install nltk packages to avoid runtime download +RUN python3.10 -m nltk.downloader averaged_perceptron_tagger +RUN python3.10 -m nltk.downloader stopwords +RUN python3.10 -m nltk.downloader punkt +RUN python3.10 -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('Snowflake/snowflake-arctic-embed-l'); model.save('/tmp-data')" + # Install any example specific dependency if available -ARG EXAMPLE_NAME -COPY RetrievalAugmentedGeneration/examples/${EXAMPLE_NAME} /opt/RetrievalAugmentedGeneration/example -RUN if [ -f "/opt/RetrievalAugmentedGeneration/example/requirements.txt" ] ; then \ - pip3 install --no-cache-dir -r /opt/RetrievalAugmentedGeneration/example/requirements.txt ; else \ +ARG EXAMPLE_PATH +COPY RAG/examples/${EXAMPLE_PATH} /opt/RAG/examples/${EXAMPLE_PATH} +RUN if [ -f "/opt/RAG/examples/${EXAMPLE_PATH}/requirements.txt" ] ; then \ + pip3 install --no-cache-dir -r /opt/RAG/examples/${EXAMPLE_PATH}/requirements.txt ; else \ echo "Skipping example dependency installation, since requirements.txt was not found" ; \ fi -RUN python3.10 -m nltk.downloader averaged_perceptron_tagger -RUN if [ "${EXAMPLE_NAME}" = "multimodal_rag" ] ; then \ +RUN if [ "${EXAMPLE_PATH}" = "advanced_rag/multimodal_rag" ] ; then \ apt update && \ - apt install -y libreoffice && \ - apt install -y tesseract-ocr ; \ + apt install -y libreoffice tesseract-ocr ; \ fi -# Copy required common modules for all examples -COPY RetrievalAugmentedGeneration/__init__.py /opt/RetrievalAugmentedGeneration/ -COPY RetrievalAugmentedGeneration/common /opt/RetrievalAugmentedGeneration/common -COPY integrations /opt/integrations -COPY tools /opt/tools -RUN mkdir /tmp-data/; mkdir /tmp-data/nltk_data/ -RUN chmod 777 -R /tmp-data -RUN chown 1000:1000 -R /tmp-data -ENV NLTK_DATA=/tmp-data/nltk_data/ -ENV HF_HOME=/tmp-data +# Copy required common modules for all examples +COPY RAG/src/chain_server /opt/RAG/src/chain_server +COPY RAG/src/pandasai /opt/RAG/src/pandasai +COPY RAG/tools /opt/RAG/tools WORKDIR /opt -ENTRYPOINT ["uvicorn", "RetrievalAugmentedGeneration.common.server:app"] +ENTRYPOINT ["uvicorn", "RAG.src.chain_server.server:app"] diff --git a/notebooks/01_dataloader.ipynb b/RAG/src/chain_server/RAG_Chain_Server_API_Client.ipynb similarity index 100% rename from notebooks/01_dataloader.ipynb rename to RAG/src/chain_server/RAG_Chain_Server_API_Client.ipynb diff --git a/tools/observability/__init__.py b/RAG/src/chain_server/__init__.py similarity index 95% rename from tools/observability/__init__.py rename to RAG/src/chain_server/__init__.py index e42268fe..9ba9d431 100644 --- a/tools/observability/__init__.py +++ b/RAG/src/chain_server/__init__.py @@ -11,4 +11,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/RAG/src/chain_server/base.py b/RAG/src/chain_server/base.py new file mode 100644 index 00000000..b091da20 --- /dev/null +++ b/RAG/src/chain_server/base.py @@ -0,0 +1,68 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base interface that all RAG examples should implement.""" + +from abc import ABC, abstractmethod +from typing import Generator, List + + +class BaseExample(ABC): + """This class defines the basic structure for building RAG chain server examples. + All RAG chain server example classes should inherit from this base class and implement the + abstract methods to define their specific functionality. + """ + + @abstractmethod + def llm_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Generator[str, None, None]: + """Implements the LLM chain logic specific to the example. + It's called when the `/generate` API is invoked with `use_knowledge_base` set to `False`. + + Args: + query (str): Query to be answered by llm. + chat_history (List[Message]): Conversation history between user and chain. + + Returns: + Generator[str, None, None]: A generator that yields strings, representing the tokens of the LLM chain. + """ + + pass + + @abstractmethod + def rag_chain(self, query: str, chat_history: List["Message"], **kwargs) -> Generator[str, None, None]: + """Implements the RAG chain logic specific to the example. + It's called when the `/generate` API is invoked with `use_knowledge_base` set to `True`. + + Args: + query (str): Query to be answered by llm. + chat_history (List[Message]): Conversation history between user and chain. + + Returns: + Generator[str, None, None]: A generator that yields strings, representing the steps or outputs of the RAG chain. + """ + + pass + + @abstractmethod + def ingest_docs(self, data_dir: str, filename: str) -> None: + """Defines how documents are ingested for processing by the RAG chain server example. + It's called when the POST endpoint of`/documents` API is invoked. + + Args: + filepath (str): The path to the document file. + filename (str): The name of the document file. + """ + + pass diff --git a/RetrievalAugmentedGeneration/common/configuration.py b/RAG/src/chain_server/configuration.py similarity index 57% rename from RetrievalAugmentedGeneration/common/configuration.py rename to RAG/src/chain_server/configuration.py index da3a7967..2fead6c5 100644 --- a/RetrievalAugmentedGeneration/common/configuration.py +++ b/RAG/src/chain_server/configuration.py @@ -14,7 +14,7 @@ # limitations under the License. """The definition of the application configuration.""" -from RetrievalAugmentedGeneration.common.configuration_wizard import ConfigWizard, configclass, configfield +from RAG.src.chain_server.configuration_wizard import ConfigWizard, configclass, configfield @configclass @@ -26,24 +26,21 @@ class VectorStoreConfig(ConfigWizard): """ name: str = configfield( - "name", - default="milvus", # supports pgvector, milvus - help_txt="The name of vector store", + "name", default="milvus", help_txt="The name of vector store", # supports pgvector, milvus ) url: str = configfield( "url", - default="http://milvus:19530", # for pgvector `pgvector:5432` + default="http://milvus:19530", # for pgvector `pgvector:5432` help_txt="The host of the machine running Vector Store DB", ) nlist: int = configfield( - "nlist", - default=64, # IVF Flat milvus - help_txt="Number of cluster units", + "nlist", default=64, help_txt="Number of cluster units", # IVF Flat milvus ) nprobe: int = configfield( - "nprobe", - default=16, # IVF Flat milvus - help_txt="Number of units to query", + "nprobe", default=16, help_txt="Number of units to query", # IVF Flat milvus + ) + index_type: str = configfield( + "index_type", default="IVF_FLAT", help_txt="Index of the vector db", # IVF Flat for milvus ) @@ -56,14 +53,10 @@ class LLMConfig(ConfigWizard): """ server_url: str = configfield( - "server_url", - default="", - help_txt="The location of the Triton server hosting the llm model.", + "server_url", default="", help_txt="The location of the Triton server hosting the llm model.", ) model_name: str = configfield( - "model_name", - default="ensemble", - help_txt="The name of the hosted model.", + "model_name", default="ensemble", help_txt="The name of the hosted model.", ) model_engine: str = configfield( "model_engine", @@ -76,6 +69,7 @@ class LLMConfig(ConfigWizard): help_txt="The name of the ai catalog model to be used with PandasAI agent", ) + @configclass class TextSplitterConfig(ConfigWizard): """Configuration class for the Text Splitter. @@ -90,14 +84,10 @@ class TextSplitterConfig(ConfigWizard): help_txt="The name of Sentence Transformer model used for SentenceTransformer TextSplitter.", ) chunk_size: int = configfield( - "chunk_size", - default=510, - help_txt="Chunk size for text splitting.", + "chunk_size", default=510, help_txt="Chunk size for text splitting.", ) chunk_overlap: int = configfield( - "chunk_overlap", - default=200, - help_txt="Overlapping text length for splitting.", + "chunk_overlap", default=200, help_txt="Overlapping text length for splitting.", ) @@ -109,9 +99,7 @@ class EmbeddingConfig(ConfigWizard): """ model_name: str = configfield( - "model_name", - default="snowflake/arctic-embed-l", - help_txt="The name of huggingface embedding model.", + "model_name", default="snowflake/arctic-embed-l", help_txt="The name of huggingface embedding model.", ) model_engine: str = configfield( "model_engine", @@ -124,9 +112,27 @@ class EmbeddingConfig(ConfigWizard): help_txt="The required dimensions of the embedding model. Currently utilized for vector DB indexing.", ) server_url: str = configfield( - "server_url", - default="", - help_txt="The url of the server hosting nemo embedding model", + "server_url", default="", help_txt="The url of the server hosting nemo embedding model", + ) + + +@configclass +class RankingConfig(ConfigWizard): + """Configuration class for the Re-ranking. + + :cvar model_name: The name of the Ranking model. + """ + + model_name: str = configfield( + "model_name", default="nv-rerank-qa-mistral-4b:1", help_txt="The name of Ranking model.", + ) + model_engine: str = configfield( + "model_engine", + default="nvidia-ai-endpoints", + help_txt="The server type of the hosted model. Allowed values are nvidia-ai-endpoints", + ) + server_url: str = configfield( + "server_url", default="", help_txt="The url of the server hosting nemo Ranking model", ) @@ -139,9 +145,7 @@ class RetrieverConfig(ConfigWizard): """ top_k: int = configfield( - "top_k", - default=4, - help_txt="Number of relevant results to retrieve", + "top_k", default=4, help_txt="Number of relevant results to retrieve", ) score_threshold: float = configfield( "score_threshold", @@ -149,9 +153,7 @@ class RetrieverConfig(ConfigWizard): help_txt="The minimum confidence score for the retrieved values to be considered", ) nr_url: str = configfield( - "nr_url", - default='http://retrieval-ms:8000', - help_txt="The nemo retriever microservice url", + "nr_url", default='http://retrieval-ms:8000', help_txt="The nemo retriever microservice url", ) nr_pipeline: str = configfield( "nr_pipeline", @@ -160,50 +162,6 @@ class RetrieverConfig(ConfigWizard): ) -@configclass -class PromptsConfig(ConfigWizard): - """Configuration class for the Prompts. - - :cvar chat_template: Prompt template for chat. - :cvar rag_template: Prompt template for rag. - :cvar multi_turn_rag_template: Prompt template for multi-turn rag. - """ - - chat_template: str = configfield( - "chat_template", - default=( - "You are a helpful, respectful and honest assistant." - "Always answer as helpfully as possible, while being safe." - "Please ensure that your responses are positive in nature." - ), - help_txt="Prompt template for chat.", - ) - rag_template: str = configfield( - "rag_template", - default=( - "[INST] <>" - "Use the following context to answer the user's question. If you don't know the answer," - "just say that you don't know, don't try to make up an answer." - "<>" - "[INST] Context: {context_str} Question: {query_str} Only return the helpful" - " answer below and nothing else. Helpful answer:[/INST]" - ), - help_txt="Prompt template for rag.", - ) - multi_turn_rag_template: str = configfield( - "multi_turn_rag_template", - default=( - "You are a document chatbot. Help the user as they ask questions about documents." - " User message just asked: {input}\n\n" - " For this, we have retrieved the following potentially-useful info: " - " Conversation History Retrieved:\n{history}\n\n" - " Document Retrieved:\n{context}\n\n" - " Answer only from retrieved data. Make your response conversational." - ), - help_txt="Prompt template for rag.", - ) - - @configclass class AppConfig(ConfigWizard): """Configuration class for the application. @@ -233,26 +191,14 @@ class AppConfig(ConfigWizard): default=LLMConfig(), ) text_splitter: TextSplitterConfig = configfield( - "text_splitter", - env=False, - help_txt="The configuration for text splitter.", - default=TextSplitterConfig(), + "text_splitter", env=False, help_txt="The configuration for text splitter.", default=TextSplitterConfig(), ) embeddings: EmbeddingConfig = configfield( - "embeddings", - env=False, - help_txt="The configuration of embedding model.", - default=EmbeddingConfig(), + "embeddings", env=False, help_txt="The configuration of embedding model.", default=EmbeddingConfig(), ) - retriever: RetrieverConfig = configfield( - "retriever", - env=False, - help_txt="The configuration of the retriever pipeline.", - default=RetrieverConfig(), + ranking: RankingConfig = configfield( + "ranking", env=False, help_txt="The configuration of ranking model.", default=RankingConfig(), ) - prompts: PromptsConfig = configfield( - "prompts", - env=False, - help_txt="Prompt templates for chat and rag.", - default=PromptsConfig(), + retriever: RetrieverConfig = configfield( + "retriever", env=False, help_txt="The configuration of the retriever pipeline.", default=RetrieverConfig(), ) diff --git a/RetrievalAugmentedGeneration/frontend/frontend/configuration_wizard.py b/RAG/src/chain_server/configuration_wizard.py similarity index 89% rename from RetrievalAugmentedGeneration/frontend/frontend/configuration_wizard.py rename to RAG/src/chain_server/configuration_wizard.py index d63d9e41..5e53a6b6 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/configuration_wizard.py +++ b/RAG/src/chain_server/configuration_wizard.py @@ -30,14 +30,7 @@ from typing import Any, Callable, Dict, List, Optional, TextIO, Tuple, Union import yaml -from dataclass_wizard import ( - JSONWizard, - LoadMeta, - YAMLWizard, - errors, - fromdict, - json_field, -) +from dataclass_wizard import JSONWizard, LoadMeta, YAMLWizard, errors, fromdict, json_field from dataclass_wizard.models import JSONField from dataclass_wizard.utils.string_conv import to_camel_case @@ -46,9 +39,7 @@ _LOGGER = logging.getLogger(__name__) -def configfield( - name: str, *, env: bool = True, help_txt: str = "", **kwargs: Any -) -> JSONField: +def configfield(name: str, *, env: bool = True, help_txt: str = "", **kwargs: Any) -> JSONField: """Create a data class field with the specified name in JSON format. :param name: The name of the field. @@ -148,9 +139,7 @@ def print_help( default = "NO-DEFAULT-VALUE" else: default = val.default - help_printer( - f"{_Color.BOLD}{' ' * indent}{jsonname}:{_Color.END} {default}\n" - ) + help_printer(f"{_Color.BOLD}{' ' * indent}{jsonname}:{_Color.END} {default}\n") # print comments if is_embedded_config: @@ -158,9 +147,7 @@ def print_help( if val.metadata.get("help"): help_printer(f"{' ' * indent}# {val.metadata['help']}\n") if not is_embedded_config: - typestr = getattr(val.type, "__name__", None) or str(val.type).replace( - "typing.", "" - ) + typestr = getattr(val.type, "__name__", None) or str(val.type).replace("typing.", "") help_printer(f"{' ' * indent}# Type: {typestr}\n") if val.metadata.get("env", True): help_printer(f"{' ' * indent}# ENV Variable: {full_envname}\n") @@ -170,17 +157,13 @@ def print_help( if is_embedded_config: new_env_parent = f"{env_parent}_{envname}" new_json_parent = json_parent + (jsonname,) - val.type.print_help( - help_printer, env_parent=new_env_parent, json_parent=new_json_parent - ) + val.type.print_help(help_printer, env_parent=new_env_parent, json_parent=new_json_parent) help_printer("\n") @classmethod def envvars( - cls, - env_parent: Optional[str] = None, - json_parent: Optional[Tuple[str, ...]] = None, + cls, env_parent: Optional[str] = None, json_parent: Optional[Tuple[str, ...]] = None, ) -> List[Tuple[str, Tuple[str, ...], type]]: """Calculate valid environment variables and their config structure location. @@ -213,9 +196,7 @@ def envvars( if is_embedded_config: new_env_parent = f"{env_parent}_{envname}" new_json_parent = json_parent + (jsonname,) - output += val.type.envvars( - env_parent=new_env_parent, json_parent=new_json_parent - ) + output += val.type.envvars(env_parent=new_env_parent, json_parent=new_json_parent) elif val.metadata.get("env", True): output += [(full_envname, json_parent + (jsonname,), val.type)] @@ -246,10 +227,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "ConfigWizard": var_value = try_json_load(var_value) update_dict(data, conf_path, var_value) _LOGGER.debug( - "Found EnvVar Config - %s:%s = %s", - var_name, - str(var_type), - repr(var_value), + "Found EnvVar Config - %s:%s = %s", var_name, str(var_type), repr(var_value), ) LoadMeta(key_transform="CAMEL").bind_to(cls) @@ -272,9 +250,7 @@ def from_file(cls, filepath: str) -> Optional["ConfigWizard"]: _LOGGER.error("The configuration file cannot be found.") file = None except PermissionError: - _LOGGER.error( - "Permission denied when trying to read the configuration file." - ) + _LOGGER.error("Permission denied when trying to read the configuration file.") file = None if not file: return None @@ -284,8 +260,7 @@ def from_file(cls, filepath: str) -> Optional["ConfigWizard"]: data = read_json_or_yaml(file) except ValueError as err: _LOGGER.error( - "Configuration file must be valid JSON or YAML. The following errors occured:\n%s", - str(err), + "Configuration file must be valid JSON or YAML. The following errors occured:\n%s", str(err), ) data = None config = None @@ -297,9 +272,7 @@ def from_file(cls, filepath: str) -> Optional["ConfigWizard"]: try: config = cls.from_dict(data) except errors.MissingFields as err: - _LOGGER.error( - "Configuration is missing required fields: \n%s", str(err) - ) + _LOGGER.error("Configuration is missing required fields: \n%s", str(err)) config = None except errors.ParseError as err: _LOGGER.error("Invalid configuration value provided:\n%s", str(err)) @@ -352,9 +325,7 @@ def read_json_or_yaml(stream: TextIO) -> Dict[str, Any]: return data # neither json nor yaml - err_msg = "\n\n".join( - [key + " Parser Errors:\n" + str(val) for key, val in exceptions.items()] - ) + err_msg = "\n\n".join([key + " Parser Errors:\n" + str(val) for key, val in exceptions.items()]) raise ValueError(err_msg) @@ -372,12 +343,7 @@ def try_json_load(value: str) -> Any: return value -def update_dict( - data: Dict[str, Any], - path: Tuple[str, ...], - value: Any, - overwrite: bool = False, -) -> None: +def update_dict(data: Dict[str, Any], path: Tuple[str, ...], value: Any, overwrite: bool = False,) -> None: """Update a dictionary with a new value at a given path. :param data: The dictionary to be updated. diff --git a/RAG/src/chain_server/requirements.txt b/RAG/src/chain_server/requirements.txt new file mode 100644 index 00000000..217b864a --- /dev/null +++ b/RAG/src/chain_server/requirements.txt @@ -0,0 +1,31 @@ +fastapi==0.110.0 +uvicorn[standard]==0.27.1 +python-multipart==0.0.9 +langchain==0.1.9 +unstructured[all-docs]==0.12.5 +sentence-transformers==3.0.0 +llama-index-core==0.10.27 +llama-index-readers-file==0.1.22 +llama-index-llms-langchain==0.1.3 +llama-index-embeddings-langchain==0.1.2 +llama-index-vector-stores-milvus==0.1.6 +llama-index-vector-stores-postgres==0.1.5 +pymilvus==2.4.0 +dataclass-wizard==0.22.3 +opencv-python==4.8.0.74 +minio==7.2.5 +asyncpg==0.29.0 +psycopg2-binary==2.9.9 +pgvector==0.2.5 +langchain-core==0.1.29 +langchain-nvidia-ai-endpoints==0.1.6 +opentelemetry-sdk==1.23.0 +opentelemetry-api==1.23.0 +opentelemetry-exporter-otlp-proto-grpc==1.23.0 +faiss-cpu==1.7.4 +httpcore==1.0.2 +httpx==0.26.0 +bleach==6.1.0 +flatdict==4.0.1 +chardet==5.2.0 +nltk==3.8.1 diff --git a/RAG/src/chain_server/server.py b/RAG/src/chain_server/server.py new file mode 100644 index 00000000..e93bd69a --- /dev/null +++ b/RAG/src/chain_server/server.py @@ -0,0 +1,491 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The definition of the Llama Index chain server.""" +import importlib +import json +import logging +import os +import shutil +from inspect import getmembers, isclass +from pathlib import Path +from typing import Any, Dict, List +from uuid import uuid4 + +import bleach +from fastapi import FastAPI, File, Request, UploadFile +from fastapi.encoders import jsonable_encoder +from fastapi.exception_handlers import request_validation_exception_handler +from fastapi.exceptions import RequestValidationError +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse, StreamingResponse +from pydantic import BaseModel, Field, constr, validator +from pymilvus.exceptions import MilvusException, MilvusUnavailableException +from starlette.status import HTTP_422_UNPROCESSABLE_ENTITY + +from RAG.src.chain_server.tracing import llamaindex_instrumentation_wrapper + +logging.basicConfig(level=os.environ.get('LOGLEVEL', 'INFO').upper()) +logger = logging.getLogger(__name__) + +# create the FastAPI server +app = FastAPI() + +# Allow access in browser from RAG UI and Storybook (development) +origins = ["*"] +app.add_middleware( + CORSMiddleware, allow_origins=origins, allow_credentials=False, allow_methods=["*"], allow_headers=["*"], +) + +EXAMPLE_DIR = "RAG/examples/" + + +class Message(BaseModel): + """Definition of the Chat Message type.""" + + role: str = Field( + description="Role for a message AI, User and System", default="user", max_length=256, pattern=r'[\s\S]*' + ) + content: str = Field( + description="The input query/prompt to the pipeline.", + default="I am going to Paris, what should I see?", + max_length=131072, + pattern=r'[\s\S]*', + ) + + @validator('role') + def validate_role(cls, value): + """ Field validator function to validate values of the field role""" + value = bleach.clean(value, strip=True) + valid_roles = {'user', 'assistant', 'system'} + if value.lower() not in valid_roles: + raise ValueError("Role must be one of 'user', 'assistant', or 'system'") + return value.lower() + + @validator('content') + def sanitize_content(cls, v): + """ Feild validator function to santize user populated feilds from HTML""" + return bleach.clean(v, strip=True) + + +class Prompt(BaseModel): + """Definition of the Prompt API data type.""" + + messages: List[Message] = Field( + ..., + description="A list of messages comprising the conversation so far. The roles of the messages must be alternating between user and assistant. The last input message should have role user. A message with the the system role is optional, and must be the very first message if it is present.", + max_items=50000, + ) + use_knowledge_base: bool = Field(..., description="Whether to use a knowledge base") + temperature: float = Field( + 0.2, + description="The sampling temperature to use for text generation. The higher the temperature value is, the less deterministic the output text will be. It is not recommended to modify both temperature and top_p in the same call.", + ge=0.1, + le=1.0, + ) + top_p: float = Field( + 0.7, + description="The top-p sampling mass used for text generation. The top-p value determines the probability mass that is sampled at sampling time. For example, if top_p = 0.2, only the most likely tokens (summing to 0.2 cumulative probability) will be sampled. It is not recommended to modify both temperature and top_p in the same call.", + ge=0.1, + le=1.0, + ) + max_tokens: int = Field( + 1024, + description="The maximum number of tokens to generate in any given call. Note that the model is not aware of this value, and generation will simply stop at the number of tokens specified.", + ge=0, + le=1024, + format="int64", + ) + # seed: int = Field(42, description="If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result.") + # bad: List[str] = Field(None, description="A word or list of words not to use. The words are case sensitive.") + stop: List[constr(max_length=256, pattern=r'[\s\S]*')] = Field( + description="A string or a list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + max_items=256, + default=[], + ) + # stream: bool = Field(True, description="If set, partial message deltas will be sent. Tokens will be sent as data-only server-sent events (SSE) as they become available (JSON responses are prefixed by data:), with the stream terminated by a data: [DONE] message.") + + @validator('use_knowledge_base') + def sanitize_use_kb(cls, v): + """ Feild validator function to santize user populated feilds from HTML""" + v = bleach.clean(str(v), strip=True) + try: + return {"True": True, "False": False}[v] + except KeyError: + raise ValueError("use_knowledge_base must be a boolean value") + + @validator('temperature') + def sanitize_temperature(cls, v): + """ Feild validator function to santize user populated feilds from HTML""" + return float(bleach.clean(str(v), strip=True)) + + @validator('top_p') + def sanitize_top_p(cls, v): + """ Feild validator function to santize user populated feilds from HTML""" + return float(bleach.clean(str(v), strip=True)) + + +class ChainResponseChoices(BaseModel): + """ Definition of Chain response choices""" + + index: int = Field(default=0, ge=0, le=256, format="int64") + message: Message = Field(default=Message(role="assistant", content="")) + finish_reason: str = Field(default="", max_length=4096, pattern=r'[\s\S]*') + + +class ChainResponse(BaseModel): + """Definition of Chain APIs resopnse data type""" + + id: str = Field(default="", max_length=100000, pattern=r'[\s\S]*') + choices: List[ChainResponseChoices] = Field(default=[], max_items=256) + + +class DocumentSearch(BaseModel): + """Definition of the DocumentSearch API data type.""" + + query: str = Field( + description="The content or keywords to search for within documents.", + max_length=131072, + pattern=r'[\s\S]*', + default="", + ) + top_k: int = Field( + description="The maximum number of documents to return in the response.", + default=4, + ge=0, + le=25, + format="int64", + ) + + +class DocumentChunk(BaseModel): + """Represents a chunk of a document.""" + + content: str = Field( + description="The content of the document chunk.", max_length=131072, pattern=r'[\s\S]*', default="" + ) + filename: str = Field( + description="The name of the file the chunk belongs to.", max_length=4096, pattern=r'[\s\S]*', default="" + ) + score: float = Field(..., description="The relevance score of the chunk.") + + +class DocumentSearchResponse(BaseModel): + """Represents a response from a document search.""" + + chunks: List[DocumentChunk] = Field(..., description="List of document chunks.", max_items=256) + + +class DocumentsResponse(BaseModel): + """Represents the response containing a list of documents.""" + + documents: List[constr(max_length=131072, pattern=r'[\s\S]*')] = Field( + description="List of filenames.", max_items=1000000, default=[] + ) + + +class HealthResponse(BaseModel): + message: str = Field(max_length=4096, pattern=r'[\s\S]*', default="") + + +@app.on_event("startup") +def import_example() -> None: + """ + Import the example class from the specified example file. + The example directory is expected to have a python file where the example class is defined. + """ + + # path of the example directory, to check for chain implementation + file_location = os.path.join(EXAMPLE_DIR, os.environ.get("EXAMPLE_PATH", "basic_rag/llamaindex")) + + # Walk through the directory to find the RAG chains and store it's class + for root, dirs, files in os.walk(file_location): + for file in files: + if not file.endswith(".py"): + continue + + # Import the specified file dynamically + spec = importlib.util.spec_from_file_location(name="example", location=os.path.join(root, file)) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + # Scan each class in the file to find one with the 3 implemented methods: ingest_docs, rag_chain and llm_chain + for name, _ in getmembers(module, isclass): + try: + cls = getattr(module, name) + if set(["ingest_docs", "llm_chain", "rag_chain"]).issubset(set(dir(cls))): + if name == "BaseExample": + continue + # Try creating example class instance, store it in app.example if successful + example = cls() + app.example = cls + return + except: + raise ValueError(f"Class {name} is not implemented and could not be instantiated.") + + raise NotImplementedError(f"Could not find a valid example class in {EXAMPLE_DIR}") + + +@app.exception_handler(RequestValidationError) +async def request_validation_exception_handler(request: Request, exc: RequestValidationError) -> JSONResponse: + return JSONResponse( + status_code=HTTP_422_UNPROCESSABLE_ENTITY, + content={"detail": jsonable_encoder(exc.errors(), exclude={"input"})}, + ) + + +@app.get( + "/health", + response_model=HealthResponse, + responses={ + 500: { + "description": "Internal Server Error", + "content": {"application/json": {"example": {"detail": "Internal server error occurred"}}}, + } + }, +) +def health_check(): + """ + Perform a Health Check + + Returns 200 when service is up. This does not check the health of downstream services. + """ + + response_message = "Service is up." + return HealthResponse(message=response_message) + + +@app.post( + "/documents", + responses={ + 500: { + "description": "Internal Server Error", + "content": {"application/json": {"example": {"detail": "Internal server error occurred"}}}, + } + }, +) +@llamaindex_instrumentation_wrapper +async def upload_document(request: Request, file: UploadFile = File(...)) -> JSONResponse: + """Upload a document to the vector store.""" + + if not file.filename: + return JSONResponse(content={"message": "No files provided"}, status_code=200) + + try: + upload_folder = "/tmp-data/uploaded_files" + upload_file = os.path.basename(file.filename) + if not upload_file: + raise RuntimeError("Error parsing uploaded filename.") + file_path = os.path.join(upload_folder, upload_file) + uploads_dir = Path(upload_folder) + uploads_dir.mkdir(parents=True, exist_ok=True) + + # Copy uploaded file to upload_dir directory and pass that file path to chain server + with open(file_path, "wb") as f: + shutil.copyfileobj(file.file, f) + + app.example().ingest_docs(file_path, upload_file) + + return JSONResponse(content={"message": "File uploaded successfully"}, status_code=200) + + except Exception as e: + logger.error( + "Error from POST /documents endpoint. Ingestion of file: " + + file.filename + + " failed with error: " + + str(e) + ) + return JSONResponse(content={"message": str(e)}, status_code=500) + + +@app.post( + "/generate", + response_model=ChainResponse, + responses={ + 500: { + "description": "Internal Server Error", + "content": {"application/json": {"example": {"detail": "Internal server error occurred"}}}, + } + }, +) +@llamaindex_instrumentation_wrapper +async def generate_answer(request: Request, prompt: Prompt) -> StreamingResponse: + """Generate and stream the response to the provided prompt.""" + + chat_history = prompt.messages + # The last user message will be the query for the rag or llm chain + last_user_message = next((message.content for message in reversed(chat_history) if message.role == 'user'), None) + + # Find and remove the last user message if present + for i in reversed(range(len(chat_history))): + if chat_history[i].role == 'user': + del chat_history[i] + break # Remove only the last user message + + # All the other information from the prompt like the temperature, top_p etc., are llm_settings + llm_settings = {key: value for key, value in vars(prompt).items() if key not in ['messages', 'use_knowledge_base']} + try: + example = app.example() + generator = None + # call rag_chain if use_knowledge_base is enabled + if prompt.use_knowledge_base: + logger.info("Knowledge base is enabled. Using rag chain for response generation.") + generator = example.rag_chain(query=last_user_message, chat_history=chat_history, **llm_settings) + + else: + generator = example.llm_chain(query=last_user_message, chat_history=chat_history, **llm_settings) + + def response_generator(): + """Convert generator streaming response into `data: ChainResponse` format for chunk + """ + # unique response id for every query + resp_id = str(uuid4()) + if generator: + logger.debug(f"Generated response chunks\n") + # Create ChainResponse object for every token generated + for chunk in generator: + chain_response = ChainResponse() + response_choice = ChainResponseChoices(index=0, message=Message(role="assistant", content=chunk)) + chain_response.id = resp_id + chain_response.choices.append(response_choice) + logger.debug(response_choice) + # Send generator with tokens in ChainResponse format + yield "data: " + str(chain_response.json()) + "\n\n" + chain_response = ChainResponse() + + # [DONE] indicate end of response from server + response_choice = ChainResponseChoices(finish_reason="[DONE]") + chain_response.id = resp_id + chain_response.choices.append(response_choice) + logger.debug(response_choice) + yield "data: " + str(chain_response.json()) + "\n\n" + else: + chain_response = ChainResponse() + yield "data: " + str(chain_response.json()) + "\n\n" + + return StreamingResponse(response_generator(), media_type="text/event-stream") + + except (MilvusException, MilvusUnavailableException) as e: + exception_msg = "Error from milvus server. Please ensure you have ingested some documents. Please check chain-server logs for more details." + chain_response = ChainResponse() + response_choice = ChainResponseChoices( + index=0, message=Message(role="assistant", content=exception_msg), finish_reason="[DONE]" + ) + chain_response.choices.append(response_choice) + logger.error( + f"Error from Milvus database in /generate endpoint. Please ensure you have ingested some documents. Error details: {e}" + ) + return StreamingResponse( + iter(["data: " + str(chain_response.json()) + "\n\n"]), media_type="text/event-stream", status_code=500 + ) + + except Exception as e: + exception_msg = "Error from chain server. Please check chain-server logs for more details." + chain_response = ChainResponse() + response_choice = ChainResponseChoices( + index=0, message=Message(role="assistant", content=exception_msg), finish_reason="[DONE]" + ) + chain_response.choices.append(response_choice) + logger.error(f"Error from /generate endpoint. Error details: {e}") + return StreamingResponse( + iter(["data: " + str(chain_response.json()) + "\n\n"]), media_type="text/event-stream", status_code=500 + ) + + +@app.post( + "/search", + response_model=DocumentSearchResponse, + responses={ + 500: { + "description": "Internal Server Error", + "content": {"application/json": {"example": {"detail": "Internal server error occurred"}}}, + } + }, +) +@llamaindex_instrumentation_wrapper +async def document_search(request: Request, data: DocumentSearch) -> Dict[str, List[Dict[str, Any]]]: + """Search for the most relevant documents for the given search parameters.""" + + try: + example = app.example() + if hasattr(example, "document_search") and callable(example.document_search): + search_result = example.document_search(data.query, data.top_k) + chunks = [] + # Format top_k result in response format + for entry in search_result: + content = entry.get("content", "") # Default to empty string if "content" key doesn't exist + source = entry.get("source", "") # Default to empty string if "source" key doesn't exist + score = entry.get("score", 0.0) # Default to 0.0 if "score" key doesn't exist + chunk = DocumentChunk(content=content, filename=source, document_id="", score=score) + chunks.append(chunk) + return DocumentSearchResponse(chunks=chunks) + raise NotImplementedError("Example class has not implemented the document_search method.") + + except Exception as e: + logger.error(f"Error from POST /search endpoint. Error details: {e}") + return JSONResponse(content={"message": "Error occurred while searching documents."}, status_code=500) + + +@app.get( + "/documents", + response_model=DocumentsResponse, + responses={ + 500: { + "description": "Internal Server Error", + "content": {"application/json": {"example": {"detail": "Internal server error occurred"}}}, + } + }, +) +@llamaindex_instrumentation_wrapper +async def get_documents(request: Request) -> DocumentsResponse: + """Get list of document ingested in vectorstore.""" + try: + example = app.example() + if hasattr(example, "get_documents") and callable(example.get_documents): + documents = example.get_documents() + return DocumentsResponse(documents=documents) + else: + raise NotImplementedError("Example class has not implemented the get_documents method.") + + except Exception as e: + logger.error(f"Error from GET /documents endpoint. Error details: {e}") + return JSONResponse(content={"message": "Error occurred while fetching documents."}, status_code=500) + + +@app.delete( + "/documents", + responses={ + 500: { + "description": "Internal Server Error", + "content": {"application/json": {"example": {"detail": "Internal server error occurred"}}}, + } + }, +) +@llamaindex_instrumentation_wrapper +async def delete_document(request: Request, filename: str) -> JSONResponse: + """Delete a document from vectorstore.""" + try: + example = app.example() + if hasattr(example, "delete_documents") and callable(example.delete_documents): + status = example.delete_documents([filename]) + if not status: + raise Exception(f"Error in deleting document {filename}") + return JSONResponse(content={"message": f"Document {filename} deleted successfully"}, status_code=200) + + raise NotImplementedError("Example class has not implemented the delete_document method.") + + except Exception as e: + logger.error(f"Error from DELETE /documents endpoint. Error details: {e}") + return JSONResponse(content={"message": f"Error deleting document {filename}"}, status_code=500) diff --git a/RetrievalAugmentedGeneration/common/tracing.py b/RAG/src/chain_server/tracing.py similarity index 74% rename from RetrievalAugmentedGeneration/common/tracing.py rename to RAG/src/chain_server/tracing.py index 4e24337d..5a1607b3 100644 --- a/RetrievalAugmentedGeneration/common/tracing.py +++ b/RAG/src/chain_server/tracing.py @@ -16,20 +16,21 @@ """Module for configuring objects used to create OpenTelemetry traces.""" import os -import llama_index +from functools import wraps + from langchain.callbacks.base import BaseCallbackHandler as langchain_base_cb_handler from llama_index.core.callbacks.simple_llm_handler import SimpleLLMHandler as llama_index_base_cb_handler -from opentelemetry import trace, context +from opentelemetry import context, trace +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.propagate import get_global_textmap, set_global_textmap +from opentelemetry.propagators.composite import CompositePropagator from opentelemetry.sdk.resources import SERVICE_NAME, Resource from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import SimpleSpanProcessor -from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator -from opentelemetry.propagate import set_global_textmap, get_global_textmap -from opentelemetry.propagators.composite import CompositePropagator -from tools.observability.langchain import opentelemetry_callback as langchain_otel_cb -from tools.observability.llamaindex import opentelemetry_callback as llama_index_otel_cb -from functools import wraps + +from RAG.tools.observability.langchain import opentelemetry_callback as langchain_otel_cb +from RAG.tools.observability.llamaindex import opentelemetry_callback as llama_index_otel_cb # Configure tracer used by the Chain Server to create spans resource = Resource.create({SERVICE_NAME: "chain-server"}) @@ -40,22 +41,21 @@ trace.set_tracer_provider(provider) tracer = trace.get_tracer("chain-server") - if os.environ.get("ENABLE_TRACING") == "true": # Configure Propagator used for processing trace context received by the Chain Server propagator = TraceContextTextMapPropagator() - # Configure Langchain OpenTelemetry callback handler + # Configure Langchain OpenTelemetry callback handler langchain_cb_handler = langchain_otel_cb.OpenTelemetryCallbackHandler(tracer) - + # Configure LlamaIndex OpenTelemetry callback handler - llama_index_cb_handler = llama_index_otel_cb.OpenTelemetryCallbackHandler(tracer) - + llama_index_cb_handler = llama_index_otel_cb.OpenTelemetryCallbackHandler(tracer) + else: - propagator = CompositePropagator([]) # No-op propagator - langchain_cb_handler = langchain_base_cb_handler() + propagator = CompositePropagator([]) # No-op propagator + langchain_cb_handler = langchain_base_cb_handler() llama_index_cb_handler = llama_index_base_cb_handler() - + set_global_textmap(propagator) # Wrapper Function to perform LlamaIndex instrumentation @@ -72,18 +72,43 @@ async def wrapper(*args, **kwargs): return wrapper + # Wrapper Function to perform Langchain instrumentation def langchain_instrumentation_method_wrapper(func): @wraps(func) def wrapper(*args, **kwargs): result = func(langchain_cb_handler, *args, **kwargs) return result + return wrapper + # Wrapper Class to perform Langchain instrumentation def langchain_instrumentation_class_wrapper(func): class WrapperClass(func): def __init__(self, *args, **kwargs): self.cb_handler = langchain_cb_handler super().__init__(*args, **kwargs) - return WrapperClass \ No newline at end of file + + return WrapperClass + + +def inject_context(ctx): + carrier = {} + get_global_textmap().inject(carrier, context=ctx) + return carrier + + +# Wrapper Function to perform instrumentation +def instrumentation_wrapper(func): + def wrapper(self, *args, **kwargs): + span_name = func.__name__ + span = tracer.start_span(span_name) + span_ctx = trace.set_span_in_context(span) + carrier = inject_context(span_ctx) + [span.set_attribute(f"{kw}", kwargs[kw]) for kw in kwargs] + result = func(self, carrier, *args, **kwargs) + span.end() + return result + + return wrapper diff --git a/RAG/src/chain_server/utils.py b/RAG/src/chain_server/utils.py new file mode 100644 index 00000000..caa8169b --- /dev/null +++ b/RAG/src/chain_server/utils.py @@ -0,0 +1,711 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility functions for the LLM Chains.""" +import logging +import os +from functools import lru_cache, wraps +from pathlib import Path +from shlex import quote +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional +from urllib.parse import urlparse + +import yaml + +logger = logging.getLogger(__name__) + +try: + import torch +except Exception as e: + logger.error(f"torch import failed with error: {e}") + +try: + import psycopg2 +except Exception as e: + logger.error(f"psycogp2 import failed with error: {e}") + +try: + from sqlalchemy.engine.url import make_url +except Exception as e: + logger.error(f"SQLalchemy import failed with error: {e}") + +try: + from llama_index.core.indices import VectorStoreIndex + from llama_index.core.postprocessor.types import BaseNodePostprocessor + from llama_index.core.schema import MetadataMode + from llama_index.core.service_context import ServiceContext, set_global_service_context + from llama_index.core.utils import get_tokenizer, globals_helper + from llama_index.embeddings.langchain import LangchainEmbedding + from llama_index.llms.langchain import LangChainLLM + from llama_index.vector_stores.milvus import MilvusVectorStore + from llama_index.vector_stores.postgres import PGVectorStore + + if TYPE_CHECKING: + from llama_index.core.indices.base_retriever import BaseRetriever + from llama_index.core.indices.query.schema import QueryBundle + from llama_index.core.schema import NodeWithScore + from llama_index.core.callbacks import CallbackManager + + from RAG.src.chain_server.tracing import llama_index_cb_handler +except Exception as e: + logger.error(f"Llamaindex import failed with error: {e}") + +try: + from langchain.text_splitter import SentenceTransformersTokenTextSplitter + from langchain_community.embeddings import HuggingFaceEmbeddings + from langchain_community.vectorstores import FAISS +except Exception as e: + logger.error(f"Langchain import failed with error: {e}") + +try: + from langchain_core.vectorstores import VectorStore +except Exception as e: + logger.error(f"Langchain core import failed with error: {e}") + +try: + from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank +except Exception as e: + logger.error(f"Langchain nvidia ai endpoints import failed with error: {e}") + +try: + from langchain_community.docstore.in_memory import InMemoryDocstore + from langchain_community.vectorstores import Milvus, PGVector +except Exception as e: + logger.error(f"Langchain community import failed with error: {e}") + +try: + from faiss import IndexFlatL2 +except Exception as e: + logger.error(f"faiss import failed with error: {e}") + +from langchain.llms.base import LLM +from langchain_core.documents.compressor import BaseDocumentCompressor +from langchain_core.embeddings import Embeddings +from langchain_core.language_models.chat_models import SimpleChatModel + +from RAG.src.chain_server import configuration + +if TYPE_CHECKING: + from RAG.src.chain_server.configuration_wizard import ConfigWizard + +DEFAULT_MAX_CONTEXT = 1500 + + +class LimitRetrievedNodesLength(BaseNodePostprocessor): + """Llama Index chain filter to limit token lengths.""" + + def _postprocess_nodes( + self, nodes: List["NodeWithScore"] = [], query_bundle: Optional["QueryBundle"] = None + ) -> List["NodeWithScore"]: + """Postprocess llamaindex nodes after nodes retrieval. + Token excedding the limit will be ignored + + Args: + nodes List["NodeWithScore"]: A list of nodes with scores to be processed. + query_bundle Optional["QueryBundle"]: This dataclass contains the original query string and associated transformations. + + Returns: + List["NodeWithScore"]: A filtered lists of nodes + """ + included_nodes = [] + current_length = 0 + limit = DEFAULT_MAX_CONTEXT + tokenizer = get_tokenizer() + + # Ignore tokens with index more than limit, default to DEFAULT_MAX_CONTEXT + for node in nodes: + current_length += len(tokenizer(node.get_content(metadata_mode=MetadataMode.LLM))) + if current_length > limit: + break + included_nodes.append(node) + + return included_nodes + + +def utils_cache(func: Callable) -> Callable: + """Decorator to convert unhashable args to hashable ones + + Args: + func (Callable): The function to be decorated and args, kwargs needs to be hashable. + + Returns: + Callable: A wrapped function that has hashable argument and kwargs. + """ + + @wraps(func) + def wrapper(*args, **kwargs): + # Convert unhashable args to hashable ones + args_hashable = tuple(tuple(arg) if isinstance(arg, (list, dict, set)) else arg for arg in args) + kwargs_hashable = { + key: tuple(value) if isinstance(value, (list, dict, set)) else value for key, value in kwargs.items() + } + return func(*args_hashable, **kwargs_hashable) + + return wrapper + + +@utils_cache +@lru_cache +def set_service_context(**kwargs) -> None: + """Set the global service context.""" + llm = LangChainLLM(get_llm(**kwargs)) + embedding = LangchainEmbedding(get_embedding_model()) + # set service context for llama index based example GenerativeAIExamples/RAG/examples/basic_rag/llamaindex/ + # update llm and embedding model details + service_context = ServiceContext.from_defaults( + llm=llm, embed_model=embedding, callback_manager=CallbackManager([llama_index_cb_handler]) + ) + set_global_service_context(service_context) + + +@lru_cache +def get_config() -> "ConfigWizard": + """Parse the application configuration. + + Returns: + ConfigWizard: Updated RAG from configuration.py in dataclass object. + """ + config_file = os.environ.get("APP_CONFIG_FILE", "/dev/null") + # Default configuration is imported from GenerativeAIExamples/RAG/src/chain_server/configuration.py + # config can be modified using environment variable + config = configuration.AppConfig.from_file(config_file) + if config: + return config + raise RuntimeError("Unable to find configuration.") + + +@lru_cache +def get_prompts() -> Dict: + """Retrieves prompt configurations from YAML file and return a dict. + + Returns: + Dict: A dictionary containing the merged prompt configurations. + """ + + # default config taking from prompt.yaml + default_config_path = os.path.join( + "RAG/examples/", os.environ.get("EXAMPLE_PATH", "basic_rag/llamaindex"), "prompt.yaml" + ) + default_config = {} + if Path(default_config_path).exists(): + with open(default_config_path, 'r') as file: + default_config = yaml.safe_load(file) + + # Load prompt.yaml mounted by user + config_file = os.environ.get("PROMPT_CONFIG_FILE", "/prompt.yaml") + + config = {} + if Path(config_file).exists(): + with open(config_file, 'r') as file: + config = yaml.safe_load(file) + + # merge default prompts with user provided prompt, prioritize user prompt + config = _combine_dicts(default_config, config) + return config + + +@lru_cache +def get_vector_index(collection_name: str = "") -> "VectorStoreIndex": + """Create the vector db index for llamaindex based example. + + Args: + collection_name (str): The name of the collection within the vector store. Defaults to vector_db if not set. + + Returns: + VectorStoreIndex: A VectorStoreIndex object from llama_index. + """ + config = get_config() + vector_store = None + store_nodes_override = True + + logger.info(f"Using {config.vector_store.name} as vector store") + + # vectorstore name can be updated using environment variable APP_VECTORSTORE_NAME + if config.vector_store.name == "pgvector": + db_name = quote(os.getenv('POSTGRES_DB', None)) + if not collection_name: + collection_name = os.getenv('COLLECTION_NAME', "vector_db") + # vectorstore url can be updated using environment variable APP_VECTORSTORE_URL, it should be in ip:port format + connection_string = f"postgresql://{os.getenv('POSTGRES_USER', '')}:{os.getenv('POSTGRES_PASSWORD', '')}@{config.vector_store.url}/{db_name}" + logger.info(f"Using PGVector collection: {collection_name}") + + conn = psycopg2.connect(connection_string) + conn.autocommit = True + + with conn.cursor() as c: + # Check for database existence first + c.execute(f"SELECT 1 FROM pg_database WHERE datname = '{db_name}'") + if not c.fetchone(): # Database doesn't exist + c.execute(f"CREATE DATABASE {db_name}") + + url = make_url(connection_string) + + vector_store = PGVectorStore.from_params( + database=db_name, + host=url.host, + password=url.password, + port=url.port, + user=url.username, + table_name=collection_name, + embed_dim=config.embeddings.dimensions, + ) + store_nodes_override = True + elif config.vector_store.name == "milvus": + if not collection_name: + collection_name = os.getenv('COLLECTION_NAME', "vector_db") + logger.info(f"Using milvus collection: {collection_name}") + # vectorstore url can be updated using environment variable APP_VECTORSTORE_URL, it should be in http://ip:port format + vector_store = MilvusVectorStore( + uri=config.vector_store.url, + dim=config.embeddings.dimensions, + collection_name=collection_name, + # Set milvus index type + index_config={"index_type": config.vector_store.index_type, "nlist": config.vector_store.nlist}, + search_config={"nprobe": config.vector_store.nprobe}, + overwrite=False, + ) + store_nodes_override = False + else: + raise RuntimeError("Unable to find any supported Vector Store DB. Supported engines are milvus and pgvector.") + vector_store_index = VectorStoreIndex.from_vector_store( + vector_store=vector_store, store_nodes_override=store_nodes_override + ) + return vector_store_index + + +def create_vectorstore_langchain(document_embedder: "Embeddings", collection_name: str = "") -> VectorStore: + """Create the vectorstore object for langchain based example. + + Args: + document_embedder (Embeddings): Embedding model object to generate embedding of document. + collection_name (str): The name of the collection within the vector store. Defaults to vector_db if not set. + + Returns: + VectorStore: A VectorStore object of given vectorstore from langchain. + """ + + config = get_config() + + if not collection_name: + collection_name = os.getenv('COLLECTION_NAME', "vector_db") + + # vectorstore name can be updated using environment variable APP_VECTORSTORE_NAME + if config.vector_store.name == "faiss": + vectorstore = FAISS(document_embedder, IndexFlatL2(config.embeddings.dimensions), InMemoryDocstore(), {}) + elif config.vector_store.name == "pgvector": + db_name = os.getenv('POSTGRES_DB', None) + logger.info(f"Using PGVector collection: {collection_name}") + # vectorstore url can be updated using environment variable APP_VECTORSTORE_URL, it should be in ip:port format + connection_string = f"postgresql://{os.getenv('POSTGRES_USER', '')}:{os.getenv('POSTGRES_PASSWORD', '')}@{config.vector_store.url}/{db_name}" + vectorstore = PGVector( + collection_name=collection_name, connection_string=connection_string, embedding_function=document_embedder, + ) + elif config.vector_store.name == "milvus": + logger.info(f"Using milvus collection: {collection_name}") + # vectorstore url can be updated using environment variable APP_VECTORSTORE_URL, it should be in http://ip:port format + url = urlparse(config.vector_store.url) + vectorstore = Milvus( + document_embedder, + connection_args={"host": url.hostname, "port": url.port}, + collection_name=collection_name, + auto_id=True, + ) + else: + raise ValueError(f"{config.vector_store.name} vector database is not supported") + logger.info("Vector store created and saved.") + return vectorstore + + +def get_vectorstore(vectorstore: Optional["VectorStore"], document_embedder: "Embeddings") -> VectorStore: + """Retrieves or creates a VectorStore object from langchain. + + Args: + vectorstore (Optional[VectorStore]): VectorStore object from langchain. + document_embedder (Embeddings): Embedding model object to generate embedding of document. + + Returns: + VectorStore: A VectorStore object of given vectorstore from langchain. + """ + if vectorstore is None: + return create_vectorstore_langchain(document_embedder) + return vectorstore + + +@lru_cache +def get_doc_retriever(num_nodes: int = 4) -> "BaseRetriever": + """Create the document retriever. + + Args: + num_nodes (int): Number of documents to be retrieved from vectorstore. + + Returns: + BaseRetriever: Object of retriever. + """ + index = get_vector_index() + return index.as_retriever(similarity_top_k=num_nodes) + + +@utils_cache +@lru_cache() +def get_llm(**kwargs) -> LLM | SimpleChatModel: + """Create the LLM connection. + + Returns: + LLM: llm object from langchain base class. + """ + settings = get_config() + + logger.info(f"Using {settings.llm.model_engine} as model engine for llm. Model name: {settings.llm.model_name}") + # llm engine name can be updated using environment variable APP_LLM_MODELENGINE + if settings.llm.model_engine == "nvidia-ai-endpoints": + unused_params = [key for key in kwargs.keys() if key not in ['temperature', 'top_p', 'max_tokens']] + if unused_params: + logger.warning( + f"The following parameters from kwargs are not supported: {unused_params} for {settings.llm.model_engine}" + ) + # If server url is set using APP_LLM_SERVERURL, locally hosted NIM is used otherwise Nvidia hosted model are used + if settings.llm.server_url: + logger.info(f"Using llm model {settings.llm.model_name} hosted at {settings.llm.server_url}") + return ChatNVIDIA( + base_url=f"http://{settings.llm.server_url}/v1", + temperature=kwargs.get('temperature', None), + top_p=kwargs.get('top_p', None), + max_tokens=kwargs.get('max_tokens', None), + ) + else: + logger.info(f"Using llm model {settings.llm.model_name} from api catalog") + # Using Nvidia hosted model + return ChatNVIDIA( + model=settings.llm.model_name, + temperature=kwargs.get('temperature', None), + top_p=kwargs.get('top_p', None), + max_tokens=kwargs.get('max_tokens', None), + ) + else: + raise RuntimeError( + "Unable to find any supported Large Language Model server. Supported engine name is nvidia-ai-endpoints." + ) + + +@lru_cache +def get_embedding_model() -> Embeddings: + """Create the embedding model. + + Returns: + Embeddings: object of base embedding class of langchain + """ + model_kwargs = {"device": "cpu"} + if torch.cuda.is_available(): + model_kwargs["device"] = "cuda:0" + + encode_kwargs = {"normalize_embeddings": False} + settings = get_config() + + logger.info( + f"Using {settings.embeddings.model_engine} as model engine and {settings.embeddings.model_name} and model for embeddings" + ) + # Model engine can be updated using APP_EMBEDDINGS_MODELENGINE environment variable + if settings.embeddings.model_engine == "huggingface": + # Model name can be updated using APP_EMBEDDINGS_MODELNAME + hf_embeddings = HuggingFaceEmbeddings( + model_name=settings.embeddings.model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs, + ) + # Load in a specific embedding model + return hf_embeddings + elif settings.embeddings.model_engine == "nvidia-ai-endpoints": + # If URL set in APP_EMBEDDINGS_SERVERURL then Nvidia hosted model is used otherwise local NIM is used for inference + if settings.embeddings.server_url: + logger.info( + f"Using embedding model {settings.embeddings.model_name} hosted at {settings.embeddings.server_url}" + ) + return NVIDIAEmbeddings(base_url=f"http://{settings.embeddings.server_url}/v1", truncate="END") + else: + logger.info(f"Using embedding model {settings.embeddings.model_name} hosted at api catalog") + return NVIDIAEmbeddings(model=settings.embeddings.model_name, truncate="END") + else: + raise RuntimeError( + "Unable to find any supported embedding model. Supported engine is huggingface and nvidia-ai-endpoints." + ) + + +@lru_cache +def get_ranking_model() -> BaseDocumentCompressor: + """Create the ranking model. + + Returns: + BaseDocumentCompressor: Base class for document compressors. + """ + + settings = get_config() + + try: + if settings.ranking.model_engine == "nvidia-ai-endpoints": + if settings.ranking.server_url: + logger.info(f"Using ranking model hosted at {settings.ranking.server_url}") + return NVIDIARerank( + base_url=f"http://{settings.ranking.server_url}/v1", top_n=settings.retriever.top_k, truncate="END" + ) + elif settings.ranking.model_name: + logger.info(f"Using ranking model {settings.ranking.model_name} hosted at api catalog") + return NVIDIARerank(model=settings.ranking.model_name, top_n=settings.retriever.top_k, truncate="END") + else: + logger.warning("Unable to find any supported ranking model. Supported engine is nvidia-ai-endpoints.") + except Exception as e: + logger.error(f"An error occurred while initializing ranking_model: {e}") + return None + + +def get_text_splitter() -> SentenceTransformersTokenTextSplitter: + """Return the token text splitter instance from langchain. + + Returns: + SentenceTransformersTokenTextSplitter: Splitting text to tokens using sentence model tokenizer + """ + + if get_config().text_splitter.model_name: + embedding_model_name = get_config().text_splitter.model_name + + # Chunksize and chunk overlap can up updated using APP_TEXTSPLITTER_CHUNKSIZE and APP_TEXTSPLITTER_CHUNKOVERLAP respectively + return SentenceTransformersTokenTextSplitter( + model_name=embedding_model_name, + tokens_per_chunk=get_config().text_splitter.chunk_size - 2, + chunk_overlap=get_config().text_splitter.chunk_overlap, + ) + + +def get_docs_vectorstore_langchain(vectorstore: VectorStore) -> List[str]: + """Retrieves filenames stored in the vector store implemented in LangChain. + + Args: + vectorstore (VectorStore): VectorStore object from langchain. + + Returns: + List[str]: List of document ingested in vectorstore + """ + + settings = get_config() + try: + # No API available in LangChain for listing the docs, thus usig its private _dict + extract_filename = lambda metadata: os.path.basename(metadata['source']) + if settings.vector_store.name == "faiss": + in_memory_docstore = vectorstore.docstore._dict + filenames = [extract_filename(doc.metadata) for doc in in_memory_docstore.values()] + filenames = list(set(filenames)) + return filenames + elif settings.vector_store.name == "pgvector": + # No API availbe in LangChain for listing the docs, thus usig its private _make_session + with vectorstore._make_session() as session: + embedding_doc_store = session.query( + vectorstore.EmbeddingStore.custom_id, + vectorstore.EmbeddingStore.document, + vectorstore.EmbeddingStore.cmetadata, + ).all() + filenames = set([extract_filename(metadata) for _, _, metadata in embedding_doc_store if metadata]) + return filenames + elif settings.vector_store.name == "milvus": + # Getting all the ID's > 0 + if vectorstore.col: + milvus_data = vectorstore.col.query(expr="pk >= 0", output_fields=["pk", "source", "text"]) + filenames = set([extract_filename(metadata) for metadata in milvus_data]) + return filenames + except Exception as e: + logger.error(f"Error occurred while retrieving documents: {e}") + return [] + + +def del_docs_vectorstore_langchain(vectorstore: VectorStore, filenames: List[str]) -> bool: + """Delete documents from the vector index implemented in LangChain. + + Args: + vectorstore (VectorStore): VectorStore object from langchain. + + Returns: + bool: Delete document operation status + """ + + settings = get_config() + try: + # No other API availbe in LangChain for listing the docs, thus usig its private _dict + extract_filename = lambda metadata: os.path.basename(metadata['source']) + if settings.vector_store.name == "faiss": + in_memory_docstore = vectorstore.docstore._dict + for filename in filenames: + # iterate over all the document, find doc_id related to document name + ids_list = [ + doc_id + for doc_id, doc_data in in_memory_docstore.items() + if extract_filename(doc_data.metadata) == filename + ] + if not len(ids_list): + logger.info("File does not exist in the vectorstore") + return False + # delete all doc_ids from vectorstore with filename + vectorstore.delete(ids_list) + logger.info(f"Deleted documents with filenames {filename}") + elif settings.vector_store.name == "pgvector": + with vectorstore._make_session() as session: + collection = vectorstore.get_collection(session) + filter_by = vectorstore.EmbeddingStore.collection_id == collection.uuid + embedding_doc_store = ( + session.query( + vectorstore.EmbeddingStore.custom_id, + vectorstore.EmbeddingStore.document, + vectorstore.EmbeddingStore.cmetadata, + ) + .filter(filter_by) + .all() + ) + for filename in filenames: + # iterate over all the document, find doc_id related to document name + ids_list = [ + doc_id + for doc_id, doc_data, metadata in embedding_doc_store + if extract_filename(metadata) == filename + ] + if not len(ids_list): + logger.info("File does not exist in the vectorstore") + return False + # delete all doc_ids from vectorstore with filename + vectorstore.delete(ids_list) + logger.info(f"Deleted documents with filenames {filename}") + elif settings.vector_store.name == "milvus": + # Getting all the ID's > 0 + milvus_data = vectorstore.col.query(expr="pk >= 0", output_fields=["pk", "source", "text"]) + for filename in filenames: + # get ids with filename in metadata + ids_list = [metadata["pk"] for metadata in milvus_data if extract_filename(metadata) == filename] + if not len(ids_list): + logger.info("File does not exist in the vectorstore") + return False + # delete all ids from vectorstore with filename in metadata + vectorstore.col.delete(f"pk in {ids_list}") + logger.info(f"Deleted documents with filenames {filename}") + return True + except Exception as e: + logger.error(f"Error occurred while deleting documents: {e}") + return False + return True + + +def get_docs_vectorstore_llamaindex() -> List[str]: + """Retrieves filenames stored in the vector store implemented in LlamaIndex. + + Returns: + List[str]: List of file in vectorstore for llama index based example. + """ + + settings = get_config() + index = get_vector_index() + decoded_filenames = [] + try: + if settings.vector_store.name == "pgvector": + ref_doc_info = index.ref_doc_info + # iterate over all the document in vectorstore and return unique filename + for _, ref_doc_value in ref_doc_info.items(): + metadata = ref_doc_value.metadata + if 'filename' in metadata: + filename = metadata['filename'] + decoded_filenames.append(filename) + decoded_filenames = list(set(decoded_filenames)) + elif settings.vector_store.name == "milvus": + client = index.vector_store.client + collection_name = os.getenv('COLLECTION_NAME', "vector_db") + query_res = client.query( + collection_name=collection_name, filter="common_field == 'all'", output_fields=["filename"] + ) + if not query_res: + return decoded_filenames + + # iterate over all the document in collection and return unique filename + filenames = [entry.get('filename') for entry in query_res] + for filename in filenames: + decoded_filenames.append(filename) + decoded_filenames = list(set(decoded_filenames)) + return decoded_filenames + except Exception as e: + logger.error(f"Error occurred while retrieving documents: {e}") + return [] + + +def del_docs_vectorstore_llamaindex(filenames: List[str]) -> bool: + """Delete documents from the vector index implemented in LlamaIndex. + + Args: + filenames (List[str]): List of filenames to be deleted from vectorstore. + + Returns: + bool: Delete document operation status + """ + + settings = get_config() + index = get_vector_index() + try: + if settings.vector_store.name == "pgvector": + ref_doc_info = index.ref_doc_info + # Iterate over all the filenames and if filename present in metadata of doc delete it + for filename in filenames: + for ref_doc_id, doc_info in ref_doc_info.items(): + if 'filename' in doc_info.metadata and doc_info.metadata['filename'] == filename: + index.delete_ref_doc(ref_doc_id, delete_from_docstore=True) + logger.info(f"Deleted documents with filenames {filename}") + elif settings.vector_store.name == "milvus": + for filename in filenames: + client = index.vector_store.client + collection_name = os.getenv('COLLECTION_NAME', "vector_db") + query_res = client.query( + collection_name=collection_name, filter=f"filename == '{filename}'", output_fields=["id"] + ) + if not query_res: + logger.info("File does not exist in the vectorstore") + return False + + # Fetch vectorstore document ids for given filename + ids = [entry.get('id') for entry in query_res] + # Delete all document with ids from vectorstore to delete file + res = client.delete(collection_name=collection_name, filter=f"id in {str(ids)}") + logger.info(f"Deleted documents with filenames {filename}") + return True + except Exception as e: + logger.error(f"Error occurred while deleting documents: {e}") + return False + + +def _combine_dicts(dict_a: Dict[str, Any], dict_b: Dict[str, Any]) -> Dict[str, Any]: + """Combines two dictionaries recursively, prioritizing values from dict_b. + + Args: + dict_a: The first dictionary. + dict_b: The second dictionary. + + Returns: + A new dictionary with combined key-value pairs. + """ + + combined_dict = dict_a.copy() # Start with a copy of dict_a + + for key, value_b in dict_b.items(): + if key in combined_dict: + value_a = combined_dict[key] + # Remove the special handling for "command" + if isinstance(value_a, dict) and isinstance(value_b, dict): + combined_dict[key] = _combine_dicts(value_a, value_b) + # Otherwise, replace the value from A with the value from B + else: + combined_dict[key] = value_b + else: + # Add any key not present in A + combined_dict[key] = value_b + + return combined_dict diff --git a/integrations/pandasai/llms/__init__.py b/RAG/src/pandasai/llms/__init__.py similarity index 100% rename from integrations/pandasai/llms/__init__.py rename to RAG/src/pandasai/llms/__init__.py diff --git a/integrations/pandasai/llms/nv_aiplay.py b/RAG/src/pandasai/llms/nv_aiplay.py similarity index 98% rename from integrations/pandasai/llms/nv_aiplay.py rename to RAG/src/pandasai/llms/nv_aiplay.py index e169ec56..ff6dc4cb 100644 --- a/integrations/pandasai/llms/nv_aiplay.py +++ b/RAG/src/pandasai/llms/nv_aiplay.py @@ -18,15 +18,16 @@ import logging from typing import Any, Dict, Optional +from langchain_nvidia_ai_endpoints import ChatNVIDIA from pandasai.llm.base import LLM -from pandasai.prompts.base import BasePrompt from pandasai.pipelines.pipeline_context import PipelineContext -from langchain_nvidia_ai_endpoints import ChatNVIDIA +from pandasai.prompts.base import BasePrompt -from RetrievalAugmentedGeneration.common.utils import get_config +from RAG.src.chain_server.utils import get_config logger = logging.getLogger(__name__) + class NVIDIA(LLM): """ A wrapper class on PandasAI base LLM class to NVIDIA Foundational Models. diff --git a/RAG/src/rag_playground/Dockerfile b/RAG/src/rag_playground/Dockerfile new file mode 100644 index 00000000..acc2807f --- /dev/null +++ b/RAG/src/rag_playground/Dockerfile @@ -0,0 +1,39 @@ +ARG BASE_IMAGE_URL=nvcr.io/nvidia/base/ubuntu +ARG BASE_IMAGE_TAG=22.04_20240212 + +FROM ${BASE_IMAGE_URL}:${BASE_IMAGE_TAG} + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV DEBIAN_FRONTEND noninteractive + +# Install required ubuntu packages for setting up python 3.10 +RUN apt update && \ + apt install -y dpkg openssl libgl1 linux-libc-dev libksba8 curl software-properties-common build-essential libssl-dev libffi-dev && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt update && apt install -y python3.10 python3.10-dev python3.10-distutils + +# Install pip for python3.10 +RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 + +RUN rm -rf /var/lib/apt/lists/* + +# Install all custom python dependencies +RUN mkdir /app +COPY requirements.txt /app +RUN apt-get update; \ + apt-get upgrade -y; \ + pip3 --no-cache-dir install -r /app/requirements.txt; + + +ARG PLAYGROUND_MODE=default +RUN if [ "${PLAYGROUND_MODE}" = "speech" ] ; then \ + pip3 --no-cache-dir install nvidia-riva-client==2.14.0; \ + fi + +RUN apt-get clean + +USER 1001 +COPY $PLAYGROUND_MODE /app/frontend + +WORKDIR /app +ENTRYPOINT ["python3.10", "-m", "frontend"] diff --git a/RetrievalAugmentedGeneration/frontend/frontend/__init__.py b/RAG/src/rag_playground/default/__init__.py similarity index 96% rename from RetrievalAugmentedGeneration/frontend/frontend/__init__.py rename to RAG/src/rag_playground/default/__init__.py index 631a8847..e57aa978 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/__init__.py +++ b/RAG/src/rag_playground/default/__init__.py @@ -80,9 +80,7 @@ def main() -> "APIServer": _LOGGER.info("Configuration: \n%s", config.to_yaml()) # connect to other services - client = chat_client.ChatClient( - f"{config.server_url}:{config.server_port}", config.model_name - ) + client = chat_client.ChatClient(f"{config.server_url}:{config.server_port}", config.model_name) # create api server _LOGGER.info("Instantiating the API Server.") diff --git a/RetrievalAugmentedGeneration/frontend/frontend/__main__.py b/RAG/src/rag_playground/default/__main__.py similarity index 74% rename from RetrievalAugmentedGeneration/frontend/frontend/__main__.py rename to RAG/src/rag_playground/default/__main__.py index cd2fa34a..df0bda71 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/__main__.py +++ b/RAG/src/rag_playground/default/__main__.py @@ -34,10 +34,7 @@ def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Document Retrieval Service") parser.add_argument( - "--help-config", - action="store_true", - default=False, - help="show the configuration help text", + "--help-config", action="store_true", default=False, help="show the configuration help text", ) parser.add_argument( @@ -48,18 +45,10 @@ def parse_args() -> argparse.Namespace: help="path to the configuration file (json or yaml)", ) parser.add_argument( - "-v", - "--verbose", - action="count", - default=1, - help="increase output verbosity", + "-v", "--verbose", action="count", default=1, help="increase output verbosity", ) parser.add_argument( - "-q", - "--quiet", - action="count", - default=0, - help="decrease output verbosity", + "-q", "--quiet", action="count", default=0, help="decrease output verbosity", ) parser.add_argument( @@ -70,28 +59,14 @@ def parse_args() -> argparse.Namespace: help="Bind socket to this host.", ) parser.add_argument( - "--port", - metavar="PORT_NUM", - type=int, - default=8080, - help="Bind socket to this port.", + "--port", metavar="PORT_NUM", type=int, default=8080, help="Bind socket to this port.", ) parser.add_argument( - "--workers", - metavar="NUM_WORKERS", - type=int, - default=1, - help="Number of worker processes.", + "--workers", metavar="NUM_WORKERS", type=int, default=1, help="Number of worker processes.", ) + parser.add_argument("--ssl-keyfile", metavar="SSL_KEY", type=str, default=None, help="SSL key file") parser.add_argument( - "--ssl-keyfile", metavar="SSL_KEY", type=str, default=None, help="SSL key file" - ) - parser.add_argument( - "--ssl-certfile", - metavar="SSL_CERT", - type=str, - default=None, - help="SSL certificate file", + "--ssl-certfile", metavar="SSL_CERT", type=str, default=None, help="SSL certificate file", ) cliargs = parser.parse_args() diff --git a/RetrievalAugmentedGeneration/frontend/frontend/api.py b/RAG/src/rag_playground/default/api.py similarity index 88% rename from RetrievalAugmentedGeneration/frontend/frontend/api.py rename to RAG/src/rag_playground/default/api.py index 25df0ad5..96b5ec38 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/api.py +++ b/RAG/src/rag_playground/default/api.py @@ -20,9 +20,8 @@ from fastapi import FastAPI from fastapi.responses import FileResponse from fastapi.staticfiles import StaticFiles -from frontend.chat_client import ChatClient - from frontend import pages +from frontend.chat_client import ChatClient STATIC_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), "static") @@ -47,15 +46,9 @@ def __init__(self, client: ChatClient) -> None: def configure_routes(self) -> None: """Configure the routes in the API Server.""" _ = gr.mount_gradio_app( - self, - blocks=pages.converse.build_page(self._client), - path=f"/content{pages.converse.PATH}", - ) - _ = gr.mount_gradio_app( - self, - blocks=pages.kb.build_page(self._client), - path=f"/content{pages.kb.PATH}", + self, blocks=pages.converse.build_page(self._client), path=f"/content{pages.converse.PATH}", ) + _ = gr.mount_gradio_app(self, blocks=pages.kb.build_page(self._client), path=f"/content{pages.kb.PATH}",) @self.get("/") async def root_redirect() -> FileResponse: diff --git a/RetrievalAugmentedGeneration/frontend/frontend/assets/__init__.py b/RAG/src/rag_playground/default/assets/__init__.py similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/assets/__init__.py rename to RAG/src/rag_playground/default/assets/__init__.py diff --git a/RAG/src/rag_playground/default/assets/kaizen-theme.css b/RAG/src/rag_playground/default/assets/kaizen-theme.css new file mode 100644 index 00000000..2b0f6dc2 --- /dev/null +++ b/RAG/src/rag_playground/default/assets/kaizen-theme.css @@ -0,0 +1,13 @@ +.tabitem { + background-color: var(--block-background-fill); + } + + .gradio-container { + /* This needs to be !important, otherwise the breakpoint override the container being full width */ + max-width: 100% !important; + padding: 10px !important; + } + + footer { + visibility: hidden; + } \ No newline at end of file diff --git a/RetrievalAugmentedGeneration/frontend/frontend/assets/kaizen-theme.json b/RAG/src/rag_playground/default/assets/kaizen-theme.json similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/assets/kaizen-theme.json rename to RAG/src/rag_playground/default/assets/kaizen-theme.json diff --git a/RetrievalAugmentedGeneration/frontend/frontend/chat_client.py b/RAG/src/rag_playground/default/chat_client.py similarity index 53% rename from RetrievalAugmentedGeneration/frontend/frontend/chat_client.py rename to RAG/src/rag_playground/default/chat_client.py index 19d8c42a..21816997 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/chat_client.py +++ b/RAG/src/rag_playground/default/chat_client.py @@ -14,15 +14,14 @@ # limitations under the License. """The API client for the langchain-esque service.""" +import json import logging import mimetypes import typing -import json import requests -from requests.exceptions import ConnectionError - from frontend import tracing +from requests.exceptions import ConnectionError _LOGGER = logging.getLogger(__name__) @@ -42,33 +41,23 @@ def model_name(self) -> str: return self._model_name @tracing.instrumentation_wrapper - def search( - self, carrier, prompt: str - ) -> typing.List[typing.Dict[str, typing.Union[str, float]]]: + def search(self, carrier, prompt: str) -> typing.List[typing.Dict[str, typing.Union[str, float]]]: """Search for relevant documents and return json data.""" data = {"query": prompt, "top_k": 4} - headers = { - **carrier, - "accept": "application/json", "Content-Type": "application/json" - } + headers = {**carrier, "accept": "application/json", "Content-Type": "application/json"} url = f"{self.server_url}/search" - _LOGGER.debug( - "looking up documents - %s", str({"server_url": url, "post_data": data}) - ) + _LOGGER.debug("looking up documents - %s", str({"server_url": url, "post_data": data})) try: with requests.post(url, headers=headers, json=data, timeout=30) as req: - req.raise_for_status() - response = req.json() - return typing.cast( - typing.List[typing.Dict[str, typing.Union[str, float]]], response - ) + req.raise_for_status() + response = req.json() + return typing.cast(typing.List[typing.Dict[str, typing.Union[str, float]]], response) except Exception as e: - _LOGGER.error(f"Failed to get response from /documentSearch endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details.") - return typing.cast( - typing.List[typing.Dict[str, typing.Union[str, float]]], [] + _LOGGER.error( + f"Failed to get response from /documentSearch endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details." ) - + return typing.cast(typing.List[typing.Dict[str, typing.Union[str, float]]], []) @tracing.predict_instrumentation_wrapper def predict( @@ -76,46 +65,42 @@ def predict( ) -> typing.Generator[str, None, None]: """Make a model prediction.""" data = { - "messages": [ - { - "role": "user", - "content": query - } - ], - "use_knowledge_base": use_knowledge_base, + "messages": [{"role": "user", "content": query}], + "use_knowledge_base": use_knowledge_base, } url = f"{self.server_url}/generate" - _LOGGER.debug( - "making inference request - %s", str({"server_url": url, "post_data": data}) - ) + _LOGGER.debug("making inference request - %s", str({"server_url": url, "post_data": data})) try: with requests.post(url, stream=True, json=data, timeout=50, headers=carrier) as req: - req.raise_for_status() - for chunk in req.iter_lines(): - raw_resp = chunk.decode("UTF-8") - if not raw_resp: - continue - resp_dict = None - try: - resp_dict = json.loads(raw_resp[6:]) - resp_choices = resp_dict.get("choices", []) - if len(resp_choices): - resp_str = resp_choices[0].get("message", {}).get("content", "") - yield resp_str - else: - yield "" - except Exception as e: - raise ValueError(f"Invalid response json: {raw_resp}") from e + req.raise_for_status() + for chunk in req.iter_lines(): + raw_resp = chunk.decode("UTF-8") + if not raw_resp: + continue + resp_dict = None + try: + resp_dict = json.loads(raw_resp[6:]) + resp_choices = resp_dict.get("choices", []) + if len(resp_choices): + resp_str = resp_choices[0].get("message", {}).get("content", "") + yield resp_str + else: + yield "" + except Exception as e: + raise ValueError(f"Invalid response json: {raw_resp}") from e except Exception as e: - _LOGGER.error(f"Failed to get response from /generate endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details.") - yield str("Failed to get response from /generate endpoint of chain-server. Check if the fastapi server in chain-server is up. Refer to chain-server logs for details.") + _LOGGER.error( + f"Failed to get response from /generate endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details." + ) + yield str( + "Failed to get response from /generate endpoint of chain-server. Check if the fastapi server in chain-server is up. Refer to chain-server logs for details." + ) # Send None to indicate end of response yield None - @tracing.instrumentation_wrapper def upload_documents(self, carrier, file_paths: typing.List[str]) -> None: """Upload documents to the kb.""" @@ -132,8 +117,7 @@ def upload_documents(self, carrier, file_paths: typing.List[str]) -> None: files = {"file": (fpath, open(fpath, "rb"), mime_type)} _LOGGER.debug( - "uploading file - %s", - str({"server_url": url, "file": fpath}), + "uploading file - %s", str({"server_url": url, "file": fpath}), ) resp = requests.post( @@ -142,35 +126,30 @@ def upload_documents(self, carrier, file_paths: typing.List[str]) -> None: if resp.status_code == 500: raise ValueError(f"{resp.json().get('message', 'Failed to upload document')}") except Exception as e: - _LOGGER.error(f"Failed to get response from /documents endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details.") + _LOGGER.error( + f"Failed to get response from /documents endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details." + ) raise ValueError(f"{e}") - @tracing.instrumentation_wrapper def delete_documents(self, carrier, file_name: str) -> str: """ Delete Selected documents""" - headers = { - **carrier, - "accept": "application/json", "Content-Type": "application/json" - } - params = { - 'filename': file_name - } + headers = {**carrier, "accept": "application/json", "Content-Type": "application/json"} + params = {'filename': file_name} url = f"{self.server_url}/documents" try: - _LOGGER.debug( - f"Delete request received for file_name: {file_name}" - ) + _LOGGER.debug(f"Delete request received for file_name: {file_name}") with requests.delete(url, headers=headers, params=params, timeout=30) as req: req.raise_for_status() response = req.json() return response except Exception as e: - _LOGGER.error(f"Failed to delete {file_name} using /documents endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details.") + _LOGGER.error( + f"Failed to delete {file_name} using /documents endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details." + ) return "" - @tracing.instrumentation_wrapper def get_uploaded_documents(self, carrier) -> typing.List[str]: """Get list of Uploaded documents.""" @@ -179,20 +158,20 @@ def get_uploaded_documents(self, carrier) -> typing.List[str]: **carrier, "accept": "application/json", } - uploaded_files=[] + uploaded_files = [] try: - resp = requests.get( - url, headers=headers, timeout=600 - ) - response = json.loads(resp.content) - if resp.status_code == 500: - raise ValueError(f"{resp.json().get('message', 'Failed to get uploaded documents')}") - else: - uploaded_files=response['documents'] + resp = requests.get(url, headers=headers, timeout=600) + response = json.loads(resp.content) + if resp.status_code == 500: + raise ValueError(f"{resp.json().get('message', 'Failed to get uploaded documents')}") + else: + uploaded_files = response['documents'] except ConnectionError as e: # Avoid playground crash when chain server starts after rag-playground _LOGGER.error(f"Failed to connect /documents endpoint of chain-server. Error details: {e}.") except Exception as e: - _LOGGER.error(f"Failed to get response from /documents endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details.") + _LOGGER.error( + f"Failed to get response from /documents endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details." + ) raise ValueError(f"{e}") return uploaded_files diff --git a/RetrievalAugmentedGeneration/frontend/frontend/configuration.py b/RAG/src/rag_playground/default/configuration.py similarity index 76% rename from RetrievalAugmentedGeneration/frontend/frontend/configuration.py rename to RAG/src/rag_playground/default/configuration.py index 864ae45b..2ae91fd8 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/configuration.py +++ b/RAG/src/rag_playground/default/configuration.py @@ -28,17 +28,11 @@ class AppConfig(ConfigWizard): """ server_url: str = configfield( - "serverUrl", - default="http://localhost", - help_txt="The location of the chat server.", + "serverUrl", default="http://localhost", help_txt="The location of the chat server.", ) server_port: str = configfield( - "serverPort", - default="8000", - help_txt="The port on which the chat server is listening for HTTP requests.", + "serverPort", default="8000", help_txt="The port on which the chat server is listening for HTTP requests.", ) model_name: str = configfield( - "modelName", - default="meta/llama3-70b-instruct", - help_txt="The name of the hosted LLM model.", + "modelName", default="meta/llama3-70b-instruct", help_txt="The name of the hosted LLM model.", ) diff --git a/RetrievalAugmentedGeneration/common/configuration_wizard.py b/RAG/src/rag_playground/default/configuration_wizard.py similarity index 89% rename from RetrievalAugmentedGeneration/common/configuration_wizard.py rename to RAG/src/rag_playground/default/configuration_wizard.py index d63d9e41..5e53a6b6 100644 --- a/RetrievalAugmentedGeneration/common/configuration_wizard.py +++ b/RAG/src/rag_playground/default/configuration_wizard.py @@ -30,14 +30,7 @@ from typing import Any, Callable, Dict, List, Optional, TextIO, Tuple, Union import yaml -from dataclass_wizard import ( - JSONWizard, - LoadMeta, - YAMLWizard, - errors, - fromdict, - json_field, -) +from dataclass_wizard import JSONWizard, LoadMeta, YAMLWizard, errors, fromdict, json_field from dataclass_wizard.models import JSONField from dataclass_wizard.utils.string_conv import to_camel_case @@ -46,9 +39,7 @@ _LOGGER = logging.getLogger(__name__) -def configfield( - name: str, *, env: bool = True, help_txt: str = "", **kwargs: Any -) -> JSONField: +def configfield(name: str, *, env: bool = True, help_txt: str = "", **kwargs: Any) -> JSONField: """Create a data class field with the specified name in JSON format. :param name: The name of the field. @@ -148,9 +139,7 @@ def print_help( default = "NO-DEFAULT-VALUE" else: default = val.default - help_printer( - f"{_Color.BOLD}{' ' * indent}{jsonname}:{_Color.END} {default}\n" - ) + help_printer(f"{_Color.BOLD}{' ' * indent}{jsonname}:{_Color.END} {default}\n") # print comments if is_embedded_config: @@ -158,9 +147,7 @@ def print_help( if val.metadata.get("help"): help_printer(f"{' ' * indent}# {val.metadata['help']}\n") if not is_embedded_config: - typestr = getattr(val.type, "__name__", None) or str(val.type).replace( - "typing.", "" - ) + typestr = getattr(val.type, "__name__", None) or str(val.type).replace("typing.", "") help_printer(f"{' ' * indent}# Type: {typestr}\n") if val.metadata.get("env", True): help_printer(f"{' ' * indent}# ENV Variable: {full_envname}\n") @@ -170,17 +157,13 @@ def print_help( if is_embedded_config: new_env_parent = f"{env_parent}_{envname}" new_json_parent = json_parent + (jsonname,) - val.type.print_help( - help_printer, env_parent=new_env_parent, json_parent=new_json_parent - ) + val.type.print_help(help_printer, env_parent=new_env_parent, json_parent=new_json_parent) help_printer("\n") @classmethod def envvars( - cls, - env_parent: Optional[str] = None, - json_parent: Optional[Tuple[str, ...]] = None, + cls, env_parent: Optional[str] = None, json_parent: Optional[Tuple[str, ...]] = None, ) -> List[Tuple[str, Tuple[str, ...], type]]: """Calculate valid environment variables and their config structure location. @@ -213,9 +196,7 @@ def envvars( if is_embedded_config: new_env_parent = f"{env_parent}_{envname}" new_json_parent = json_parent + (jsonname,) - output += val.type.envvars( - env_parent=new_env_parent, json_parent=new_json_parent - ) + output += val.type.envvars(env_parent=new_env_parent, json_parent=new_json_parent) elif val.metadata.get("env", True): output += [(full_envname, json_parent + (jsonname,), val.type)] @@ -246,10 +227,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "ConfigWizard": var_value = try_json_load(var_value) update_dict(data, conf_path, var_value) _LOGGER.debug( - "Found EnvVar Config - %s:%s = %s", - var_name, - str(var_type), - repr(var_value), + "Found EnvVar Config - %s:%s = %s", var_name, str(var_type), repr(var_value), ) LoadMeta(key_transform="CAMEL").bind_to(cls) @@ -272,9 +250,7 @@ def from_file(cls, filepath: str) -> Optional["ConfigWizard"]: _LOGGER.error("The configuration file cannot be found.") file = None except PermissionError: - _LOGGER.error( - "Permission denied when trying to read the configuration file." - ) + _LOGGER.error("Permission denied when trying to read the configuration file.") file = None if not file: return None @@ -284,8 +260,7 @@ def from_file(cls, filepath: str) -> Optional["ConfigWizard"]: data = read_json_or_yaml(file) except ValueError as err: _LOGGER.error( - "Configuration file must be valid JSON or YAML. The following errors occured:\n%s", - str(err), + "Configuration file must be valid JSON or YAML. The following errors occured:\n%s", str(err), ) data = None config = None @@ -297,9 +272,7 @@ def from_file(cls, filepath: str) -> Optional["ConfigWizard"]: try: config = cls.from_dict(data) except errors.MissingFields as err: - _LOGGER.error( - "Configuration is missing required fields: \n%s", str(err) - ) + _LOGGER.error("Configuration is missing required fields: \n%s", str(err)) config = None except errors.ParseError as err: _LOGGER.error("Invalid configuration value provided:\n%s", str(err)) @@ -352,9 +325,7 @@ def read_json_or_yaml(stream: TextIO) -> Dict[str, Any]: return data # neither json nor yaml - err_msg = "\n\n".join( - [key + " Parser Errors:\n" + str(val) for key, val in exceptions.items()] - ) + err_msg = "\n\n".join([key + " Parser Errors:\n" + str(val) for key, val in exceptions.items()]) raise ValueError(err_msg) @@ -372,12 +343,7 @@ def try_json_load(value: str) -> Any: return value -def update_dict( - data: Dict[str, Any], - path: Tuple[str, ...], - value: Any, - overwrite: bool = False, -) -> None: +def update_dict(data: Dict[str, Any], path: Tuple[str, ...], value: Any, overwrite: bool = False,) -> None: """Update a dictionary with a new value at a given path. :param data: The dictionary to be updated. diff --git a/RetrievalAugmentedGeneration/frontend/frontend/pages/__init__.py b/RAG/src/rag_playground/default/pages/__init__.py similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/pages/__init__.py rename to RAG/src/rag_playground/default/pages/__init__.py diff --git a/RAG/src/rag_playground/default/pages/converse.py b/RAG/src/rag_playground/default/pages/converse.py new file mode 100644 index 00000000..1b595d40 --- /dev/null +++ b/RAG/src/rag_playground/default/pages/converse.py @@ -0,0 +1,119 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module contains the frontend gui for having a conversation.""" +import functools +import logging +from typing import Any, Dict, List, Tuple, Union + +import gradio as gr + +from frontend import assets, chat_client + +_LOGGER = logging.getLogger(__name__) +PATH = "/converse" +TITLE = "Converse" +OUTPUT_TOKENS = 1024 +MAX_DOCS = 5 + +_LOCAL_CSS = """ + +#contextbox { + overflow-y: scroll !important; + max-height: 400px; +} +""" + + +def build_page(client: chat_client.ChatClient) -> gr.Blocks: + """Build the gradio page to be mounted in the frame.""" + kui_theme, kui_styles = assets.load_theme("kaizen") + + with gr.Blocks(title=TITLE, theme=kui_theme, css=kui_styles + _LOCAL_CSS) as page: + + # create the page header + gr.Markdown(f"# {TITLE}") + + # chat logs + with gr.Row(equal_height=True): + chatbot = gr.Chatbot(scale=2, label=client.model_name) + latest_response = gr.Textbox(visible=False) + context = gr.JSON(scale=1, label="Knowledge Base Context", visible=False, elem_id="contextbox",) + + # check boxes + with gr.Row(): + with gr.Column(scale=10, min_width=150): + kb_checkbox = gr.Checkbox(label="Use knowledge base", info="", value=False) + + # text input boxes + with gr.Row(): + with gr.Column(scale=10, min_width=500): + msg = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False,) + + # user feedback + with gr.Row(): + # _ = gr.Button(value="👍 Upvote") + # _ = gr.Button(value="👎 Downvote") + # _ = gr.Button(value="⚠️ Flag") + submit_btn = gr.Button(value="Submit") + _ = gr.ClearButton(msg) + _ = gr.ClearButton([msg, chatbot], value="Clear History") + ctx_show = gr.Button(value="Show Context") + ctx_hide = gr.Button(value="Hide Context", visible=False) + + # hide/show context + def _toggle_context(btn: str) -> Dict[gr.component, Dict[Any, Any]]: + if btn == "Show Context": + out = [True, False, True] + if btn == "Hide Context": + out = [False, True, False] + return { + context: gr.update(visible=out[0]), + ctx_show: gr.update(visible=out[1]), + ctx_hide: gr.update(visible=out[2]), + } + + ctx_show.click(_toggle_context, [ctx_show], [context, ctx_show, ctx_hide]) + ctx_hide.click(_toggle_context, [ctx_hide], [context, ctx_show, ctx_hide]) + + # form actions + _my_build_stream = functools.partial(_stream_predict, client) + msg.submit(_my_build_stream, [kb_checkbox, msg, chatbot], [msg, chatbot, context, latest_response]) + submit_btn.click(_my_build_stream, [kb_checkbox, msg, chatbot], [msg, chatbot, context, latest_response]) + + page.queue() + return page + + +def _stream_predict( + client: chat_client.ChatClient, use_knowledge_base: bool, question: str, chat_history: List[Tuple[str, str]], +) -> Any: + """Make a prediction of the response to the prompt.""" + chunks = "" + chat_history = chat_history or [] + _LOGGER.info( + "processing inference request - %s", str({"prompt": question, "use_knowledge_base": use_knowledge_base}), + ) + + documents: Union[None, List[Dict[str, Union[str, float]]]] = None + if use_knowledge_base: + documents = client.search(prompt=question) + + for chunk in client.predict(query=question, use_knowledge_base=use_knowledge_base, num_tokens=OUTPUT_TOKENS): + if chunk: + chunks += chunk + yield "", chat_history + [[question, chunks]], documents, "" + else: + yield "", chat_history + [[question, chunks]], documents, chunks diff --git a/RetrievalAugmentedGeneration/frontend/frontend/pages/kb.py b/RAG/src/rag_playground/default/pages/kb.py similarity index 81% rename from RetrievalAugmentedGeneration/frontend/frontend/pages/kb.py rename to RAG/src/rag_playground/default/pages/kb.py index b66a2819..94e93666 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/pages/kb.py +++ b/RAG/src/rag_playground/default/pages/kb.py @@ -14,13 +14,12 @@ # limitations under the License. """This module contains the frontend gui for chat.""" +import os +from functools import partial from pathlib import Path from typing import List -import os import gradio as gr -from functools import partial - from frontend import assets, chat_client PATH = "/kb" @@ -37,9 +36,7 @@ def build_page(client: chat_client.ChatClient) -> gr.Blocks: gr.Markdown(f"# {TITLE}") with gr.Row(): - upload_button = gr.UploadButton( - "Add File", file_types=["pdf"], file_count="multiple" - ) + upload_button = gr.UploadButton("Add File", file_types=["pdf"], file_count="multiple") with gr.Row(): file_output = gr.File() @@ -48,37 +45,32 @@ def build_page(client: chat_client.ChatClient) -> gr.Blocks: headers=["File Uploaded"], datatype=["str"], col_count=(1, "fixed"), - value= lambda: get_uploaded_files(client), + value=lambda: get_uploaded_files(client), every=5, ) with gr.Row(): buffer_textbox = gr.Textbox(interactive=False, visible=False) - message_textbox = gr.Textbox( - label="Message", interactive=False, visible=True - ) - + message_textbox = gr.Textbox(label="Message", interactive=False, visible=True) + with gr.Row(): delete_button = gr.Button("Delete") # form actions # Upload dutton action - upload_button.upload( - lambda files: upload_file(files, client), upload_button, file_output - ) + upload_button.upload(lambda files: upload_file(files, client), upload_button, file_output) # Files dataframe action files_df.select(return_selected_file, inputs=[files_df], outputs=[buffer_textbox]) # Delete button action partial_delete_file = partial(delete_file, client=client) - delete_button.click( - fn=partial_delete_file, inputs=buffer_textbox, outputs=message_textbox - ) + delete_button.click(fn=partial_delete_file, inputs=buffer_textbox, outputs=message_textbox) page.queue() return page + def delete_file(input_text, client: chat_client.ChatClient): """Deletes selected files from knowledge base using client""" @@ -88,6 +80,7 @@ def delete_file(input_text, client: chat_client.ChatClient): except Exception as e: raise gr.Error(f"{e}") + def return_selected_file(selected_index: gr.SelectData, dataframe): """Returns selected files from DataFrame""" if selected_index: @@ -95,11 +88,12 @@ def return_selected_file(selected_index: gr.SelectData, dataframe): dataframe = dataframe.drop(selected_index.index[0]) return val.iloc[0] + def upload_file(files: List[Path], client: chat_client.ChatClient) -> List[str]: """Use the client to upload a file to the knowledge base.""" try: file_paths = [file.name for file in files] - client.upload_documents(file_paths = file_paths) + client.upload_documents(file_paths=file_paths) # Save the uploaded file names to the state file with open(STATE_FILE, 'a') as file: @@ -111,10 +105,11 @@ def upload_file(files: List[Path], client: chat_client.ChatClient) -> List[str]: except Exception as e: raise gr.Error(f"{e}") -def get_uploaded_files(client: chat_client.ChatClient)-> List[str]: + +def get_uploaded_files(client: chat_client.ChatClient) -> List[str]: """Load previously uploaded files if the file exists""" uploaded_files = [["No Files uploaded"]] resp = client.get_uploaded_documents() - if len(resp)>0: - uploaded_files=[[file] for file in resp] + if len(resp) > 0: + uploaded_files = [[file] for file in resp] return uploaded_files diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/404.html b/RAG/src/rag_playground/default/static/404.html similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/404.html rename to RAG/src/rag_playground/default/static/404.html diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/WuNGAl0x4o1D5HqLxhHMt/_buildManifest.js b/RAG/src/rag_playground/default/static/_next/static/WuNGAl0x4o1D5HqLxhHMt/_buildManifest.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/WuNGAl0x4o1D5HqLxhHMt/_buildManifest.js rename to RAG/src/rag_playground/default/static/_next/static/WuNGAl0x4o1D5HqLxhHMt/_buildManifest.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/WuNGAl0x4o1D5HqLxhHMt/_ssgManifest.js b/RAG/src/rag_playground/default/static/_next/static/WuNGAl0x4o1D5HqLxhHMt/_ssgManifest.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/WuNGAl0x4o1D5HqLxhHMt/_ssgManifest.js rename to RAG/src/rag_playground/default/static/_next/static/WuNGAl0x4o1D5HqLxhHMt/_ssgManifest.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/78-a36dca5d49fafb86.js b/RAG/src/rag_playground/default/static/_next/static/chunks/78-a36dca5d49fafb86.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/78-a36dca5d49fafb86.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/78-a36dca5d49fafb86.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/framework-7a7e500878b44665.js b/RAG/src/rag_playground/default/static/_next/static/chunks/framework-7a7e500878b44665.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/framework-7a7e500878b44665.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/framework-7a7e500878b44665.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/main-92011a1a7f336a6f.js b/RAG/src/rag_playground/default/static/_next/static/chunks/main-92011a1a7f336a6f.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/main-92011a1a7f336a6f.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/main-92011a1a7f336a6f.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/_app-f21c0780e30f5eb6.js b/RAG/src/rag_playground/default/static/_next/static/chunks/pages/_app-f21c0780e30f5eb6.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/_app-f21c0780e30f5eb6.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/pages/_app-f21c0780e30f5eb6.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/_app-f55c3b932a623280.js b/RAG/src/rag_playground/default/static/_next/static/chunks/pages/_app-f55c3b932a623280.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/_app-f55c3b932a623280.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/pages/_app-f55c3b932a623280.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/_error-54de1933a164a1ff.js b/RAG/src/rag_playground/default/static/_next/static/chunks/pages/_error-54de1933a164a1ff.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/_error-54de1933a164a1ff.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/pages/_error-54de1933a164a1ff.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/converse-39686323b565eff0.js b/RAG/src/rag_playground/default/static/_next/static/chunks/pages/converse-39686323b565eff0.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/converse-39686323b565eff0.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/pages/converse-39686323b565eff0.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/converse-61880f01babd873a.js b/RAG/src/rag_playground/default/static/_next/static/chunks/pages/converse-61880f01babd873a.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/converse-61880f01babd873a.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/pages/converse-61880f01babd873a.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/index-1a1d31dae38463f7.js b/RAG/src/rag_playground/default/static/_next/static/chunks/pages/index-1a1d31dae38463f7.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/index-1a1d31dae38463f7.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/pages/index-1a1d31dae38463f7.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/index-6a3f286eb0986c10.js b/RAG/src/rag_playground/default/static/_next/static/chunks/pages/index-6a3f286eb0986c10.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/index-6a3f286eb0986c10.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/pages/index-6a3f286eb0986c10.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/kb-cf0d102293dc0a74.js b/RAG/src/rag_playground/default/static/_next/static/chunks/pages/kb-cf0d102293dc0a74.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/kb-cf0d102293dc0a74.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/pages/kb-cf0d102293dc0a74.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/tuning-0b7bb1111c2d2a56.js b/RAG/src/rag_playground/default/static/_next/static/chunks/pages/tuning-0b7bb1111c2d2a56.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/pages/tuning-0b7bb1111c2d2a56.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/pages/tuning-0b7bb1111c2d2a56.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js b/RAG/src/rag_playground/default/static/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/webpack-5146130448d8adf7.js b/RAG/src/rag_playground/default/static/_next/static/chunks/webpack-5146130448d8adf7.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/chunks/webpack-5146130448d8adf7.js rename to RAG/src/rag_playground/default/static/_next/static/chunks/webpack-5146130448d8adf7.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/css/7636246223312442.css b/RAG/src/rag_playground/default/static/_next/static/css/7636246223312442.css similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/css/7636246223312442.css rename to RAG/src/rag_playground/default/static/_next/static/css/7636246223312442.css diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/css/98b512633409f7e1.css b/RAG/src/rag_playground/default/static/_next/static/css/98b512633409f7e1.css similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/css/98b512633409f7e1.css rename to RAG/src/rag_playground/default/static/_next/static/css/98b512633409f7e1.css diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/s7oUSppGTRWsY8BXJmxYB/_buildManifest.js b/RAG/src/rag_playground/default/static/_next/static/s7oUSppGTRWsY8BXJmxYB/_buildManifest.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/s7oUSppGTRWsY8BXJmxYB/_buildManifest.js rename to RAG/src/rag_playground/default/static/_next/static/s7oUSppGTRWsY8BXJmxYB/_buildManifest.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/s7oUSppGTRWsY8BXJmxYB/_ssgManifest.js b/RAG/src/rag_playground/default/static/_next/static/s7oUSppGTRWsY8BXJmxYB/_ssgManifest.js similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/_next/static/s7oUSppGTRWsY8BXJmxYB/_ssgManifest.js rename to RAG/src/rag_playground/default/static/_next/static/s7oUSppGTRWsY8BXJmxYB/_ssgManifest.js diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/converse.html b/RAG/src/rag_playground/default/static/converse.html similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/converse.html rename to RAG/src/rag_playground/default/static/converse.html diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/favicon.ico b/RAG/src/rag_playground/default/static/favicon.ico similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/favicon.ico rename to RAG/src/rag_playground/default/static/favicon.ico diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/index.html b/RAG/src/rag_playground/default/static/index.html similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/index.html rename to RAG/src/rag_playground/default/static/index.html diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/kb.html b/RAG/src/rag_playground/default/static/kb.html similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/kb.html rename to RAG/src/rag_playground/default/static/kb.html diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/next.svg b/RAG/src/rag_playground/default/static/next.svg similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/next.svg rename to RAG/src/rag_playground/default/static/next.svg diff --git a/RetrievalAugmentedGeneration/frontend/frontend/static/vercel.svg b/RAG/src/rag_playground/default/static/vercel.svg similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/static/vercel.svg rename to RAG/src/rag_playground/default/static/vercel.svg diff --git a/RetrievalAugmentedGeneration/frontend/frontend/tracing.py b/RAG/src/rag_playground/default/tracing.py similarity index 93% rename from RetrievalAugmentedGeneration/frontend/frontend/tracing.py rename to RAG/src/rag_playground/default/tracing.py index 945ae00f..2de40b13 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/tracing.py +++ b/RAG/src/rag_playground/default/tracing.py @@ -14,19 +14,18 @@ # limitations under the License. import os + from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.propagate import get_global_textmap, set_global_textmap +from opentelemetry.propagators.composite import CompositePropagator from opentelemetry.sdk.resources import SERVICE_NAME, Resource from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import SimpleSpanProcessor -from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator -from opentelemetry.propagate import set_global_textmap, get_global_textmap -from opentelemetry.propagators.composite import CompositePropagator # Configure tracer used by the Frontend to create spans -resource = Resource.create({ - SERVICE_NAME: "frontend" -}) +resource = Resource.create({SERVICE_NAME: "frontend"}) provider = TracerProvider(resource=resource) if os.environ.get("ENABLE_TRACING") == "true": processor = SimpleSpanProcessor(OTLPSpanExporter()) @@ -38,7 +37,7 @@ if os.environ.get("ENABLE_TRACING") == "true": propagator = TraceContextTextMapPropagator() else: - propagator = CompositePropagator([]) # No-op propagator + propagator = CompositePropagator([]) # No-op propagator set_global_textmap(propagator) @@ -49,6 +48,7 @@ def inject_context(ctx): get_global_textmap().inject(carrier, context=ctx) return carrier + # Wrapper Function to perform instrumentation def instrumentation_wrapper(func): def wrapper(self, *args, **kwargs): @@ -60,8 +60,10 @@ def wrapper(self, *args, **kwargs): result = func(self, carrier, *args, **kwargs) span.end() return result + return wrapper + # Wrapper function for the streaming predict call def predict_instrumentation_wrapper(func): def wrapper(self, *args, **kwargs): @@ -72,9 +74,10 @@ def wrapper(self, *args, **kwargs): carrier = inject_context(span_ctx) constructed_response = "" for chunk in func(self, carrier, *args, **kwargs): - if chunk: + if chunk: constructed_response += chunk yield chunk span.set_attribute("response", constructed_response) span.end() - return wrapper \ No newline at end of file + + return wrapper diff --git a/RetrievalAugmentedGeneration/frontend/requirements.txt b/RAG/src/rag_playground/requirements.txt similarity index 100% rename from RetrievalAugmentedGeneration/frontend/requirements.txt rename to RAG/src/rag_playground/requirements.txt diff --git a/RAG/src/rag_playground/speech/__init__.py b/RAG/src/rag_playground/speech/__init__.py new file mode 100644 index 00000000..e57aa978 --- /dev/null +++ b/RAG/src/rag_playground/speech/__init__.py @@ -0,0 +1,92 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Document Retrieval Service. + +Handle document ingestion and retrieval from a VectorDB. +""" + +import logging +import os +import sys +import typing + +if typing.TYPE_CHECKING: + from frontend.api import APIServer + + +_LOG_FMT = f"[{os.getpid()}] %(asctime)15s [%(levelname)7s] - %(name)s - %(message)s" +_LOG_DATE_FMT = "%b %d %H:%M:%S" +_LOGGER = logging.getLogger(__name__) + + +def bootstrap_logging(verbosity: int = 0) -> None: + """Configure Python's logger according to the given verbosity level. + + :param verbosity: The desired verbosity level. Must be one of 0, 1, or 2. + :type verbosity: typing.Literal[0, 1, 2] + """ + # determine log level + verbosity = min(2, max(0, verbosity)) # limit verbosity to 0-2 + log_level = [logging.WARN, logging.INFO, logging.DEBUG][verbosity] + + # configure python's logger + logging.basicConfig(format=_LOG_FMT, datefmt=_LOG_DATE_FMT, level=log_level) + # update existing loggers + _LOGGER.setLevel(log_level) + for logger in [ + __name__, + "uvicorn", + "uvicorn.access", + "uvicorn.error", + ]: + for handler in logging.getLogger(logger).handlers: + handler.setFormatter(logging.Formatter(fmt=_LOG_FMT, datefmt=_LOG_DATE_FMT)) + + +def main() -> "APIServer": + """Bootstrap and Execute the application. + + :returns: 0 if the application completed successfully, 1 if an error occurred. + :rtype: Literal[0,1] + """ + # boostrap python loggers + verbosity = int(os.environ.get("APP_VERBOSITY", "1")) + bootstrap_logging(verbosity) + + # load the application libraries + # pylint: disable=import-outside-toplevel; this is intentional to allow for the environment to be configured before + # any of the application libraries are loaded. + from frontend import api, chat_client, configuration + + # load config + config_file = os.environ.get("APP_CONFIG_FILE", "/dev/null") + _LOGGER.info("Loading application configuration.") + config = configuration.AppConfig.from_file(config_file) + if not config: + sys.exit(1) + _LOGGER.info("Configuration: \n%s", config.to_yaml()) + + # connect to other services + client = chat_client.ChatClient(f"{config.server_url}:{config.server_port}", config.model_name) + + # create api server + _LOGGER.info("Instantiating the API Server.") + server = api.APIServer(client) + server.configure_routes() + + # run until complete + _LOGGER.info("Starting the API Server.") + return server diff --git a/RAG/src/rag_playground/speech/__main__.py b/RAG/src/rag_playground/speech/__main__.py new file mode 100644 index 00000000..df0bda71 --- /dev/null +++ b/RAG/src/rag_playground/speech/__main__.py @@ -0,0 +1,97 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Entrypoint for the Conversation GUI. + +The functions in this module are responsible for bootstrapping then executing the Conversation GUI server. +""" + +import argparse +import os +import sys + +import uvicorn + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments for the program. + + :returns: A namespace containing the parsed arguments. + :rtype: argparse.Namespace + """ + parser = argparse.ArgumentParser(description="Document Retrieval Service") + + parser.add_argument( + "--help-config", action="store_true", default=False, help="show the configuration help text", + ) + + parser.add_argument( + "-c", + "--config", + metavar="CONFIGURATION_FILE", + default="/dev/null", + help="path to the configuration file (json or yaml)", + ) + parser.add_argument( + "-v", "--verbose", action="count", default=1, help="increase output verbosity", + ) + parser.add_argument( + "-q", "--quiet", action="count", default=0, help="decrease output verbosity", + ) + + parser.add_argument( + "--host", + metavar="HOSTNAME", + type=str, + default="0.0.0.0", # nosec # this is intentional + help="Bind socket to this host.", + ) + parser.add_argument( + "--port", metavar="PORT_NUM", type=int, default=8080, help="Bind socket to this port.", + ) + parser.add_argument( + "--workers", metavar="NUM_WORKERS", type=int, default=1, help="Number of worker processes.", + ) + parser.add_argument("--ssl-keyfile", metavar="SSL_KEY", type=str, default=None, help="SSL key file") + parser.add_argument( + "--ssl-certfile", metavar="SSL_CERT", type=str, default=None, help="SSL certificate file", + ) + + cliargs = parser.parse_args() + if cliargs.help_config: + # pylint: disable=import-outside-toplevel; this is intentional to allow for the environment to be configured + # before any of the application libraries are loaded. + from frontend.configuration import AppConfig + + sys.stdout.write("\nconfiguration file format:\n") + AppConfig.print_help(sys.stdout.write) + sys.exit(0) + + return cliargs + + +if __name__ == "__main__": + args = parse_args() + os.environ["APP_VERBOSITY"] = f"{args.verbose - args.quiet}" + os.environ["APP_CONFIG_FILE"] = args.config + uvicorn.run( + "frontend:main", + factory=True, + host=args.host, + port=args.port, + workers=args.workers, + ssl_keyfile=args.ssl_keyfile, + ssl_certfile=args.ssl_certfile, + ) diff --git a/RAG/src/rag_playground/speech/api.py b/RAG/src/rag_playground/speech/api.py new file mode 100644 index 00000000..96b5ec38 --- /dev/null +++ b/RAG/src/rag_playground/speech/api.py @@ -0,0 +1,65 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module contains the Server that will host the frontend and API.""" +import os + +import gradio as gr +from fastapi import FastAPI +from fastapi.responses import FileResponse +from fastapi.staticfiles import StaticFiles +from frontend import pages +from frontend.chat_client import ChatClient + +STATIC_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), "static") + + +class APIServer(FastAPI): + """A class that hosts the service api. + + :cvar title: The title of the server. + :type title: str + :cvar desc: A description of the server. + :type desc: str + """ + + title = "Chat" + desc = "This service provides a sample conversation frontend flow." + + def __init__(self, client: ChatClient) -> None: + """Initialize the API server.""" + self._client = client + super().__init__(title=self.title, description=self.desc) + + def configure_routes(self) -> None: + """Configure the routes in the API Server.""" + _ = gr.mount_gradio_app( + self, blocks=pages.converse.build_page(self._client), path=f"/content{pages.converse.PATH}", + ) + _ = gr.mount_gradio_app(self, blocks=pages.kb.build_page(self._client), path=f"/content{pages.kb.PATH}",) + + @self.get("/") + async def root_redirect() -> FileResponse: + return FileResponse(os.path.join(STATIC_DIR, "converse.html")) + + @self.get("/converse") + async def converse_redirect() -> FileResponse: + return FileResponse(os.path.join(STATIC_DIR, "converse.html")) + + @self.get("/kb") + async def kb_redirect() -> FileResponse: + return FileResponse(os.path.join(STATIC_DIR, "kb.html")) + + self.mount("/", StaticFiles(directory=STATIC_DIR, html=True)) diff --git a/RetrievalAugmentedGeneration/frontend/frontend/asr_utils.py b/RAG/src/rag_playground/speech/asr_utils.py similarity index 86% rename from RetrievalAugmentedGeneration/frontend/frontend/asr_utils.py rename to RAG/src/rag_playground/speech/asr_utils.py index e707bfb2..f4cd20e5 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/asr_utils.py +++ b/RAG/src/rag_playground/speech/asr_utils.py @@ -13,18 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import queue from threading import Thread -import logging -import grpc -import pycountry import gradio as gr +import grpc import numpy as np +import pycountry import riva.client import riva.client.proto.riva_asr_pb2 as riva_asr import riva.client.proto.riva_asr_pb2_grpc as rasr_srv + class ASRSession: def __init__(self): self.is_first_buffer = True @@ -33,12 +34,14 @@ def __init__(self): self.response_thread = None self.transcript = "" + _LOGGER = logging.getLogger(__name__) # Obtain the ASR languages available on the Riva server ASR_LANGS = dict() grpc_auth = None + def asr_init(auth): global ASR_LANGS global grpc_auth @@ -57,11 +60,16 @@ def asr_init(auth): except grpc.RpcError as e: _LOGGER.info(e.details()) ASR_LANGS["No ASR languages available"] = "No ASR languages available" - gr.Info('The app could not find any available ASR languages. Thus, none will appear in the "ASR Language" dropdown menu. Check that you are connected to a Riva server with ASR enabled.') - _LOGGER.info('The app could not find any available ASR languages. Thus, none will appear in the "ASR Language" dropdown menu. Check that you are connected to a Riva server with ASR enabled.') + gr.Info( + 'The app could not find any available ASR languages. Thus, none will appear in the "ASR Language" dropdown menu. Check that you are connected to a Riva server with ASR enabled.' + ) + _LOGGER.info( + 'The app could not find any available ASR languages. Thus, none will appear in the "ASR Language" dropdown menu. Check that you are connected to a Riva server with ASR enabled.' + ) ASR_LANGS = dict(sorted(ASR_LANGS.items())) + def print_streaming_response(asr_session): asr_session.transcript = "" final_transcript = "" @@ -88,12 +96,14 @@ def print_streaming_response(asr_session): asr_session.transcript = rpc_error.details() return + def start_recording(audio, language, asr_session): _LOGGER.info('start_recording') asr_session.is_first_buffer = True asr_session.request_queue = queue.Queue() return "", asr_session + def stop_recording(asr_session): _LOGGER.info('stop_recording') try: @@ -103,11 +113,16 @@ def stop_recording(asr_session): pass return asr_session + def transcribe_streaming(audio, language, asr_session): _LOGGER.info('transcribe_streaming') if language == 'No ASR languages available': - gr.Info('The app cannot access ASR services. Any attempt to transcribe audio will be unsuccessful. Check that you are connected to a Riva server with ASR enabled.') - _LOGGER.info('The app cannot access ASR services. Any attempt to transcribe audio will be unsuccessful. Check that you are connected to a Riva server with ASR enabled.') + gr.Info( + 'The app cannot access ASR services. Any attempt to transcribe audio will be unsuccessful. Check that you are connected to a Riva server with ASR enabled.' + ) + _LOGGER.info( + 'The app cannot access ASR services. Any attempt to transcribe audio will be unsuccessful. Check that you are connected to a Riva server with ASR enabled.' + ) return None, None rate, data = audio if len(data.shape) > 1: diff --git a/RetrievalAugmentedGeneration/common/base.py b/RAG/src/rag_playground/speech/assets/__init__.py similarity index 50% rename from RetrievalAugmentedGeneration/common/base.py rename to RAG/src/rag_playground/speech/assets/__init__.py index 7b61a51a..5e26052c 100644 --- a/RetrievalAugmentedGeneration/common/base.py +++ b/RAG/src/rag_playground/speech/assets/__init__.py @@ -13,21 +13,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Base interface that all RAG examples should implement.""" +"""This module contains theming assets.""" +import os.path +from typing import Tuple -from abc import ABC, abstractmethod -from typing import Generator +import gradio as gr -class BaseExample(ABC): +_ASSET_DIR = os.path.dirname(__file__) - @abstractmethod - def llm_chain(self, context: str, question: str, num_tokens: int) -> Generator[str, None, None]: - pass - @abstractmethod - def rag_chain(self, prompt: str, num_tokens: int) -> Generator[str, None, None]: - pass +def load_theme(name: str) -> Tuple[gr.Theme, str]: + """Load a pre-defined frontend theme. - @abstractmethod - def ingest_docs(self, data_dir: str, filename: str) -> None: - pass \ No newline at end of file + :param name: The name of the theme to load. + :type name: str + :returns: A tuple containing the Gradio theme and custom CSS. + :rtype: Tuple[gr.Theme, str] + """ + theme_json_path = os.path.join(_ASSET_DIR, f"{name}-theme.json") + theme_css_path = os.path.join(_ASSET_DIR, f"{name}-theme.css") + return ( + gr.themes.Default().load(theme_json_path), + open(theme_css_path, encoding="UTF-8").read(), + ) diff --git a/RetrievalAugmentedGeneration/frontend/frontend/assets/kaizen-theme.css b/RAG/src/rag_playground/speech/assets/kaizen-theme.css similarity index 100% rename from RetrievalAugmentedGeneration/frontend/frontend/assets/kaizen-theme.css rename to RAG/src/rag_playground/speech/assets/kaizen-theme.css diff --git a/experimental/fm-asr-streaming-rag/frontend/frontend/assets/kaizen-theme.json b/RAG/src/rag_playground/speech/assets/kaizen-theme.json similarity index 100% rename from experimental/fm-asr-streaming-rag/frontend/frontend/assets/kaizen-theme.json rename to RAG/src/rag_playground/speech/assets/kaizen-theme.json diff --git a/RAG/src/rag_playground/speech/chat_client.py b/RAG/src/rag_playground/speech/chat_client.py new file mode 100644 index 00000000..21816997 --- /dev/null +++ b/RAG/src/rag_playground/speech/chat_client.py @@ -0,0 +1,177 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The API client for the langchain-esque service.""" +import json +import logging +import mimetypes +import typing + +import requests +from frontend import tracing +from requests.exceptions import ConnectionError + +_LOGGER = logging.getLogger(__name__) + + +class ChatClient: + """A client for connecting the the lanchain-esque service.""" + + def __init__(self, server_url: str, model_name: str) -> None: + """Initialize the client.""" + self.server_url = server_url + self._model_name = model_name + self.default_model = "meta/llama3-70b-instruct" + + @property + def model_name(self) -> str: + """Return the friendly model name.""" + return self._model_name + + @tracing.instrumentation_wrapper + def search(self, carrier, prompt: str) -> typing.List[typing.Dict[str, typing.Union[str, float]]]: + """Search for relevant documents and return json data.""" + data = {"query": prompt, "top_k": 4} + headers = {**carrier, "accept": "application/json", "Content-Type": "application/json"} + url = f"{self.server_url}/search" + _LOGGER.debug("looking up documents - %s", str({"server_url": url, "post_data": data})) + + try: + with requests.post(url, headers=headers, json=data, timeout=30) as req: + req.raise_for_status() + response = req.json() + return typing.cast(typing.List[typing.Dict[str, typing.Union[str, float]]], response) + except Exception as e: + _LOGGER.error( + f"Failed to get response from /documentSearch endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details." + ) + return typing.cast(typing.List[typing.Dict[str, typing.Union[str, float]]], []) + + @tracing.predict_instrumentation_wrapper + def predict( + self, carrier, query: str, use_knowledge_base: bool, num_tokens: int + ) -> typing.Generator[str, None, None]: + """Make a model prediction.""" + data = { + "messages": [{"role": "user", "content": query}], + "use_knowledge_base": use_knowledge_base, + } + url = f"{self.server_url}/generate" + _LOGGER.debug("making inference request - %s", str({"server_url": url, "post_data": data})) + + try: + with requests.post(url, stream=True, json=data, timeout=50, headers=carrier) as req: + req.raise_for_status() + for chunk in req.iter_lines(): + raw_resp = chunk.decode("UTF-8") + if not raw_resp: + continue + resp_dict = None + try: + resp_dict = json.loads(raw_resp[6:]) + resp_choices = resp_dict.get("choices", []) + if len(resp_choices): + resp_str = resp_choices[0].get("message", {}).get("content", "") + yield resp_str + else: + yield "" + except Exception as e: + raise ValueError(f"Invalid response json: {raw_resp}") from e + + except Exception as e: + _LOGGER.error( + f"Failed to get response from /generate endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details." + ) + yield str( + "Failed to get response from /generate endpoint of chain-server. Check if the fastapi server in chain-server is up. Refer to chain-server logs for details." + ) + + # Send None to indicate end of response + yield None + + @tracing.instrumentation_wrapper + def upload_documents(self, carrier, file_paths: typing.List[str]) -> None: + """Upload documents to the kb.""" + url = f"{self.server_url}/documents" + headers = { + **carrier, + "accept": "application/json", + } + + try: + for fpath in file_paths: + mime_type, _ = mimetypes.guess_type(fpath) + # pylint: disable-next=consider-using-with # with pattern is not intuitive here + files = {"file": (fpath, open(fpath, "rb"), mime_type)} + + _LOGGER.debug( + "uploading file - %s", str({"server_url": url, "file": fpath}), + ) + + resp = requests.post( + url, headers=headers, files=files, timeout=600 # type: ignore [arg-type] + ) + if resp.status_code == 500: + raise ValueError(f"{resp.json().get('message', 'Failed to upload document')}") + except Exception as e: + _LOGGER.error( + f"Failed to get response from /documents endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details." + ) + raise ValueError(f"{e}") + + @tracing.instrumentation_wrapper + def delete_documents(self, carrier, file_name: str) -> str: + """ Delete Selected documents""" + headers = {**carrier, "accept": "application/json", "Content-Type": "application/json"} + params = {'filename': file_name} + url = f"{self.server_url}/documents" + + try: + _LOGGER.debug(f"Delete request received for file_name: {file_name}") + with requests.delete(url, headers=headers, params=params, timeout=30) as req: + req.raise_for_status() + response = req.json() + return response + except Exception as e: + _LOGGER.error( + f"Failed to delete {file_name} using /documents endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details." + ) + return "" + + @tracing.instrumentation_wrapper + def get_uploaded_documents(self, carrier) -> typing.List[str]: + """Get list of Uploaded documents.""" + url = f"{self.server_url}/documents" + headers = { + **carrier, + "accept": "application/json", + } + uploaded_files = [] + try: + resp = requests.get(url, headers=headers, timeout=600) + response = json.loads(resp.content) + if resp.status_code == 500: + raise ValueError(f"{resp.json().get('message', 'Failed to get uploaded documents')}") + else: + uploaded_files = response['documents'] + except ConnectionError as e: + # Avoid playground crash when chain server starts after rag-playground + _LOGGER.error(f"Failed to connect /documents endpoint of chain-server. Error details: {e}.") + except Exception as e: + _LOGGER.error( + f"Failed to get response from /documents endpoint of chain-server. Error details: {e}. Refer to chain-server logs for details." + ) + raise ValueError(f"{e}") + return uploaded_files diff --git a/RAG/src/rag_playground/speech/configuration.py b/RAG/src/rag_playground/speech/configuration.py new file mode 100644 index 00000000..2ae91fd8 --- /dev/null +++ b/RAG/src/rag_playground/speech/configuration.py @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The definition of the application configuration.""" +from frontend.configuration_wizard import ConfigWizard, configclass, configfield + + +@configclass +class AppConfig(ConfigWizard): + """Configuration class for the application. + + :cvar triton: The configuration of the chat server. + :type triton: ChatConfig + :cvar model: The configuration of the model + :type triton: ModelConfig + """ + + server_url: str = configfield( + "serverUrl", default="http://localhost", help_txt="The location of the chat server.", + ) + server_port: str = configfield( + "serverPort", default="8000", help_txt="The port on which the chat server is listening for HTTP requests.", + ) + model_name: str = configfield( + "modelName", default="meta/llama3-70b-instruct", help_txt="The name of the hosted LLM model.", + ) diff --git a/RAG/src/rag_playground/speech/configuration_wizard.py b/RAG/src/rag_playground/speech/configuration_wizard.py new file mode 100644 index 00000000..5e53a6b6 --- /dev/null +++ b/RAG/src/rag_playground/speech/configuration_wizard.py @@ -0,0 +1,377 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A module containing utilities for defining application configuration. + +This module provides a configuration wizard class that can read configuration data from YAML, JSON, and environment +variables. The configuration wizard is based heavily off of the JSON and YAML wizards from the `dataclass-wizard` +Python package. That package is in-turn based heavily off of the built-in `dataclass` module. + +This module adds Environment Variable parsing to config file reading. +""" +# pylint: disable=too-many-lines; this file is meant to be portable between projects so everything is put into one file + +import json +import logging +import os +from dataclasses import _MISSING_TYPE, dataclass +from typing import Any, Callable, Dict, List, Optional, TextIO, Tuple, Union + +import yaml +from dataclass_wizard import JSONWizard, LoadMeta, YAMLWizard, errors, fromdict, json_field +from dataclass_wizard.models import JSONField +from dataclass_wizard.utils.string_conv import to_camel_case + +configclass = dataclass(frozen=True) +ENV_BASE = "APP" +_LOGGER = logging.getLogger(__name__) + + +def configfield(name: str, *, env: bool = True, help_txt: str = "", **kwargs: Any) -> JSONField: + """Create a data class field with the specified name in JSON format. + + :param name: The name of the field. + :type name: str + :param env: Whether this field should be configurable from an environment variable. + :type env: bool + :param help_txt: The description of this field that is used in help docs. + :type help_txt: str + :param **kwargs: Optional keyword arguments to customize the JSON field. More information here: + https://dataclass-wizard.readthedocs.io/en/latest/dataclass_wizard.html#dataclass_wizard.json_field + :type **kwargs: Any + :returns: A JSONField instance with the specified name and optional parameters. + :rtype: JSONField + + :raises TypeError: If the provided name is not a string. + """ + # sanitize specified name + if not isinstance(name, str): + raise TypeError("Provided name must be a string.") + json_name = to_camel_case(name) + + # update metadata + meta = kwargs.get("metadata", {}) + meta["env"] = env + meta["help"] = help_txt + kwargs["metadata"] = meta + + # create the data class field + field = json_field(json_name, **kwargs) + return field + + +class _Color: + """A collection of colors used when writing output to the shell.""" + + # pylint: disable=too-few-public-methods; this class does not require methods. + + PURPLE = "\033[95m" + BLUE = "\033[94m" + GREEN = "\033[92m" + YELLOW = "\033[93m" + RED = "\033[91m" + BOLD = "\033[1m" + UNDERLINE = "\033[4m" + END = "\033[0m" + + +class ConfigWizard(JSONWizard, YAMLWizard): # type: ignore[misc] # dataclass-wizard doesn't provide stubs + """A configuration wizard class that can read configuration data from YAML, JSON, and environment variables.""" + + # pylint: disable=arguments-differ,arguments-renamed; this class intentionally reduces arguments for some methods. + + @classmethod + def print_help( + cls, + help_printer: Callable[[str], Any], + *, + env_parent: Optional[str] = None, + json_parent: Optional[Tuple[str, ...]] = None, + ) -> None: + """Print the help documentation for the application configuration with the provided `write` function. + + :param help_printer: The `write` function that will be used to output the data. + :param help_printer: Callable[[str], None] + :param env_parent: The name of the parent environment variable. Leave blank, used for recursion. + :type env_parent: Optional[str] + :param json_parent: The name of the parent JSON key. Leave blank, used for recursion. + :type json_parent: Optional[Tuple[str, ...]] + :returns: A list of tuples with one item per configuration value. Each item will have the environment variable + and a tuple to the path in configuration. + :rtype: List[Tuple[str, Tuple[str, ...]]] + """ + if not env_parent: + env_parent = "" + help_printer("---\n") + if not json_parent: + json_parent = () + + for ( + _, + val, + ) in ( + cls.__dataclass_fields__.items() # pylint: disable=no-member; false positive + ): # pylint: disable=no-member; member is added by dataclass. + jsonname = val.json.keys[0] + envname = jsonname.upper() + full_envname = f"{ENV_BASE}{env_parent}_{envname}" + is_embedded_config = hasattr(val.type, "envvars") + + # print the help data + indent = len(json_parent) * 2 + if is_embedded_config: + default = "" + elif not isinstance(val.default_factory, _MISSING_TYPE): + default = val.default_factory() + elif isinstance(val.default, _MISSING_TYPE): + default = "NO-DEFAULT-VALUE" + else: + default = val.default + help_printer(f"{_Color.BOLD}{' ' * indent}{jsonname}:{_Color.END} {default}\n") + + # print comments + if is_embedded_config: + indent += 2 + if val.metadata.get("help"): + help_printer(f"{' ' * indent}# {val.metadata['help']}\n") + if not is_embedded_config: + typestr = getattr(val.type, "__name__", None) or str(val.type).replace("typing.", "") + help_printer(f"{' ' * indent}# Type: {typestr}\n") + if val.metadata.get("env", True): + help_printer(f"{' ' * indent}# ENV Variable: {full_envname}\n") + # if not is_embedded_config: + help_printer("\n") + + if is_embedded_config: + new_env_parent = f"{env_parent}_{envname}" + new_json_parent = json_parent + (jsonname,) + val.type.print_help(help_printer, env_parent=new_env_parent, json_parent=new_json_parent) + + help_printer("\n") + + @classmethod + def envvars( + cls, env_parent: Optional[str] = None, json_parent: Optional[Tuple[str, ...]] = None, + ) -> List[Tuple[str, Tuple[str, ...], type]]: + """Calculate valid environment variables and their config structure location. + + :param env_parent: The name of the parent environment variable. + :type env_parent: Optional[str] + :param json_parent: The name of the parent JSON key. + :type json_parent: Optional[Tuple[str, ...]] + :returns: A list of tuples with one item per configuration value. Each item will have the environment variable, + a tuple to the path in configuration, and they type of the value. + :rtype: List[Tuple[str, Tuple[str, ...], type]] + """ + if not env_parent: + env_parent = "" + if not json_parent: + json_parent = () + output = [] + + for ( + _, + val, + ) in ( + cls.__dataclass_fields__.items() # pylint: disable=no-member; false positive + ): # pylint: disable=no-member; member is added by dataclass. + jsonname = val.json.keys[0] + envname = jsonname.upper() + full_envname = f"{ENV_BASE}{env_parent}_{envname}" + is_embedded_config = hasattr(val.type, "envvars") + + # add entry to output list + if is_embedded_config: + new_env_parent = f"{env_parent}_{envname}" + new_json_parent = json_parent + (jsonname,) + output += val.type.envvars(env_parent=new_env_parent, json_parent=new_json_parent) + elif val.metadata.get("env", True): + output += [(full_envname, json_parent + (jsonname,), val.type)] + + return output + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "ConfigWizard": + """Create a ConfigWizard instance from a dictionary. + + :param data: The dictionary containing the configuration data. + :type data: Dict[str, Any] + :returns: A ConfigWizard instance created from the input dictionary. + :rtype: ConfigWizard + + :raises RuntimeError: If the configuration data is not a dictionary. + """ + # sanitize data + if not data: + data = {} + if not isinstance(data, dict): + raise RuntimeError("Configuration data is not a dictionary.") + + # parse env variables + for envvar in cls.envvars(): + var_name, conf_path, var_type = envvar + var_value = os.environ.get(var_name) + if var_value: + var_value = try_json_load(var_value) + update_dict(data, conf_path, var_value) + _LOGGER.debug( + "Found EnvVar Config - %s:%s = %s", var_name, str(var_type), repr(var_value), + ) + + LoadMeta(key_transform="CAMEL").bind_to(cls) + return fromdict(cls, data) # type: ignore[no-any-return] # dataclass-wizard doesn't provide stubs + + @classmethod + def from_file(cls, filepath: str) -> Optional["ConfigWizard"]: + """Load the application configuration from the specified file. + + The file must be either in JSON or YAML format. + + :returns: The fully processed configuration file contents. If the file was unreadable, None will be returned. + :rtype: Optional["ConfigWizard"] + """ + # open the file + try: + # pylint: disable-next=consider-using-with; using a with would make exception handling even more ugly + file = open(filepath, encoding="utf-8") + except FileNotFoundError: + _LOGGER.error("The configuration file cannot be found.") + file = None + except PermissionError: + _LOGGER.error("Permission denied when trying to read the configuration file.") + file = None + if not file: + return None + + # read the file + try: + data = read_json_or_yaml(file) + except ValueError as err: + _LOGGER.error( + "Configuration file must be valid JSON or YAML. The following errors occured:\n%s", str(err), + ) + data = None + config = None + finally: + file.close() + + # parse the file + if data: + try: + config = cls.from_dict(data) + except errors.MissingFields as err: + _LOGGER.error("Configuration is missing required fields: \n%s", str(err)) + config = None + except errors.ParseError as err: + _LOGGER.error("Invalid configuration value provided:\n%s", str(err)) + config = None + else: + config = cls.from_dict({}) + + return config + + +def read_json_or_yaml(stream: TextIO) -> Dict[str, Any]: + """Read a file without knowing if it is JSON or YAML formatted. + + The file will first be assumed to be JSON formatted. If this fails, an attempt to parse the file with the YAML + parser will be made. If both of these fail, an exception will be raised that contains the exception strings returned + by both the parsers. + + :param stream: An IO stream that allows seeking. + :type stream: typing.TextIO + :returns: The parsed file contents. + :rtype: typing.Dict[str, typing.Any]: + :raises ValueError: If the IO stream is not seekable or if the file doesn't appear to be JSON or YAML formatted. + """ + exceptions: Dict[str, Union[None, ValueError, yaml.error.YAMLError]] = { + "JSON": None, + "YAML": None, + } + data: Dict[str, Any] + + # ensure we can rewind the file + if not stream.seekable(): + raise ValueError("The provided stream must be seekable.") + + # attempt to read json + try: + data = json.loads(stream.read()) + except ValueError as err: + exceptions["JSON"] = err + else: + return data + finally: + stream.seek(0) + + # attempt to read yaml + try: + data = yaml.safe_load(stream.read()) + except (yaml.error.YAMLError, ValueError) as err: + exceptions["YAML"] = err + else: + return data + + # neither json nor yaml + err_msg = "\n\n".join([key + " Parser Errors:\n" + str(val) for key, val in exceptions.items()]) + raise ValueError(err_msg) + + +def try_json_load(value: str) -> Any: + """Try parsing the value as JSON and silently ignore errors. + + :param value: The value on which a JSON load should be attempted. + :type value: str + :returns: Either the parsed JSON or the provided value. + :rtype: typing.Any + """ + try: + return json.loads(value) + except json.JSONDecodeError: + return value + + +def update_dict(data: Dict[str, Any], path: Tuple[str, ...], value: Any, overwrite: bool = False,) -> None: + """Update a dictionary with a new value at a given path. + + :param data: The dictionary to be updated. + :type data: Dict[str, Any] + :param path: The path to the key that should be updated. + :type path: Tuple[str, ...] + :param value: The new value to be set at the specified path. + :type value: Any + :param overwrite: If True, overwrite the existing value. Otherwise, don't update if the key already exists. + :type overwrite: bool + :returns: None + """ + end = len(path) + target = data + for idx, key in enumerate(path, 1): + # on the last field in path, update the dict if necessary + if idx == end: + if overwrite or not target.get(key): + target[key] = value + return + + # verify the next hop exists + if not target.get(key): + target[key] = {} + + # if the next hop is not a dict, exit + if not isinstance(target.get(key), dict): + return + + # get next hop + target = target.get(key) # type: ignore[assignment] # type has already been enforced. diff --git a/RAG/src/rag_playground/speech/pages/__init__.py b/RAG/src/rag_playground/speech/pages/__init__.py new file mode 100644 index 00000000..f2b6c770 --- /dev/null +++ b/RAG/src/rag_playground/speech/pages/__init__.py @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module contains definitions for all the frontend pages.""" +from frontend.pages import converse, kb + +__all__ = ["converse", "kb"] diff --git a/RetrievalAugmentedGeneration/frontend/frontend/pages/converse.py b/RAG/src/rag_playground/speech/pages/converse.py similarity index 69% rename from RetrievalAugmentedGeneration/frontend/frontend/pages/converse.py rename to RAG/src/rag_playground/speech/pages/converse.py index 2a39e85b..a355e0a9 100644 --- a/RetrievalAugmentedGeneration/frontend/frontend/pages/converse.py +++ b/RAG/src/rag_playground/speech/pages/converse.py @@ -18,11 +18,10 @@ import logging import os from typing import Any, Dict, List, Tuple, Union -import riva.client import gradio as gr - -from frontend import assets, chat_client, asr_utils, tts_utils +import riva.client +from frontend import asr_utils, assets, chat_client, tts_utils _LOGGER = logging.getLogger(__name__) PATH = "/converse" @@ -40,34 +39,39 @@ # Extract environmental variables RIVA_API_URI = os.getenv("RIVA_API_URI", None) -RIVA_API_KEY = os.getenv("RIVA_API_KEY", None) -RIVA_FUNCTION_ID = os.getenv("RIVA_FUNCTION_ID", None) +NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", None) +RIVA_ASR_FUNCTION_ID = os.getenv("RIVA_ASR_FUNCTION_ID", None) +RIVA_TTS_FUNCTION_ID = os.getenv("RIVA_TTS_FUNCTION_ID", None) # Establish a connection to the Riva server try: use_ssl = False - metadata = [] - if RIVA_API_KEY: + metadata_asr = [] + metadata_tts = [] + if NVIDIA_API_KEY: use_ssl = True - metadata.append(("authorization", "Bearer " + RIVA_API_KEY)) - if RIVA_FUNCTION_ID: + metadata_asr.append(("authorization", "Bearer " + NVIDIA_API_KEY)) + metadata_tts.append(("authorization", "Bearer " + NVIDIA_API_KEY)) + if RIVA_ASR_FUNCTION_ID: use_ssl = True - metadata.append(("function-id", RIVA_FUNCTION_ID)) - auth = riva.client.Auth( - None, use_ssl=use_ssl, - uri=RIVA_API_URI, - metadata_args=metadata - ) + metadata_asr.append(("function-id", RIVA_ASR_FUNCTION_ID)) + if RIVA_TTS_FUNCTION_ID: + use_ssl = True + metadata_tts.append(("function-id", RIVA_TTS_FUNCTION_ID)) + + auth_tts = riva.client.Auth(None, use_ssl=use_ssl, uri=RIVA_API_URI, metadata_args=metadata_tts) + auth_asr = riva.client.Auth(None, use_ssl=use_ssl, uri=RIVA_API_URI, metadata_args=metadata_asr) _LOGGER.info('Created riva.client.Auth success') except: _LOGGER.info('Error creating riva.client.Auth') + def build_page(client: chat_client.ChatClient) -> gr.Blocks: """Build the gradio page to be mounted in the frame.""" kui_theme, kui_styles = assets.load_theme("kaizen") - asr_utils.asr_init(auth) - tts_utils.tts_init(auth) + asr_utils.asr_init(auth_asr) + tts_utils.tts_init(auth_tts) with gr.Blocks(title=TITLE, theme=kui_theme, css=kui_styles + _LOCAL_CSS) as page: @@ -81,12 +85,7 @@ def build_page(client: chat_client.ChatClient) -> gr.Blocks: with gr.Row(equal_height=True): chatbot = gr.Chatbot(scale=2, label=client.model_name) latest_response = gr.Textbox(visible=False) - context = gr.JSON( - scale=1, - label="Knowledge Base Context", - visible=False, - elem_id="contextbox", - ) + context = gr.JSON(scale=1, label="Knowledge Base Context", visible=False, elem_id="contextbox",) # TTS output box # visible so that users can stop or replay playback @@ -97,34 +96,26 @@ def build_page(client: chat_client.ChatClient) -> gr.Blocks: interactive=False, streaming=True, visible=True, - show_download_button=False + show_download_button=False, ) # check boxes with gr.Row(): with gr.Column(scale=10, min_width=150): - kb_checkbox = gr.Checkbox( - label="Use knowledge base", info="", value=False - ) + kb_checkbox = gr.Checkbox(label="Use knowledge base", info="", value=False) with gr.Column(scale=10, min_width=150): - tts_checkbox = gr.Checkbox( - label="Enable TTS output", info="", value=False - ) + tts_checkbox = gr.Checkbox(label="Enable TTS output", info="", value=False) # dropdowns with gr.Accordion("ASR and TTS Settings"): with gr.Row(): asr_language_list = list(asr_utils.ASR_LANGS) asr_language_dropdown = gr.components.Dropdown( - label="ASR Language", - choices=asr_language_list, - value=asr_language_list[0], + label="ASR Language", choices=asr_language_list, value=asr_language_list[0], ) tts_language_list = list(tts_utils.TTS_MODELS) tts_language_dropdown = gr.components.Dropdown( - label="TTS Language", - choices=tts_language_list, - value=tts_language_list[0], + label="TTS Language", choices=tts_language_list, value=tts_language_list[0], ) all_voices = [] try: @@ -135,19 +126,13 @@ def build_page(client: chat_client.ChatClient) -> gr.Blocks: all_voices.append("No TTS voices available") default_voice = "No TTS voices available" tts_voice_dropdown = gr.components.Dropdown( - label="TTS Voice", - choices=all_voices, - value=default_voice, + label="TTS Voice", choices=all_voices, value=default_voice, ) # audio and text input boxes with gr.Row(): with gr.Column(scale=10, min_width=500): - msg = gr.Textbox( - show_label=False, - placeholder="Enter text and press ENTER", - container=False, - ) + msg = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False,) # For (at least) Gradio 3.39.0 and lower, the first argument # in the list below is named `source`. If not None, it must # be a single string, namely either "upload" or "microphone". @@ -193,50 +178,27 @@ def _toggle_context(btn: str) -> Dict[gr.component, Dict[Any, Any]]: # form actions _my_build_stream = functools.partial(_stream_predict, client) - msg.submit( - _my_build_stream, [kb_checkbox, msg, chatbot], [msg, chatbot, context, latest_response] - ) - submit_btn.click( - _my_build_stream, [kb_checkbox, msg, chatbot], [msg, chatbot, context, latest_response] - ) + msg.submit(_my_build_stream, [kb_checkbox, msg, chatbot], [msg, chatbot, context, latest_response]) + submit_btn.click(_my_build_stream, [kb_checkbox, msg, chatbot], [msg, chatbot, context, latest_response]) tts_language_dropdown.change( - tts_utils.update_voice_dropdown, - [tts_language_dropdown], - [tts_voice_dropdown], - api_name=False + tts_utils.update_voice_dropdown, [tts_language_dropdown], [tts_voice_dropdown], api_name=False ) audio_mic.start_recording( - asr_utils.start_recording, - [audio_mic, asr_language_dropdown, state], - [msg, state], - api_name=False, - ) - audio_mic.stop_recording( - asr_utils.stop_recording, - [state], - [state], - api_name=False + asr_utils.start_recording, [audio_mic, asr_language_dropdown, state], [msg, state], api_name=False, ) + audio_mic.stop_recording(asr_utils.stop_recording, [state], [state], api_name=False) audio_mic.stream( - asr_utils.transcribe_streaming, - [audio_mic, asr_language_dropdown, state], - [msg, state], - api_name=False - ) - audio_mic.clear( - lambda: "", - [], - [msg], - api_name=False + asr_utils.transcribe_streaming, [audio_mic, asr_language_dropdown, state], [msg, state], api_name=False ) + audio_mic.clear(lambda: "", [], [msg], api_name=False) latest_response.change( tts_utils.text_to_speech, [latest_response, tts_language_dropdown, tts_voice_dropdown, tts_checkbox], [output_audio], - api_name=False + api_name=False, ) page.queue() @@ -244,22 +206,18 @@ def _toggle_context(btn: str) -> Dict[gr.component, Dict[Any, Any]]: def _stream_predict( - client: chat_client.ChatClient, - use_knowledge_base: bool, - question: str, - chat_history: List[Tuple[str, str]], + client: chat_client.ChatClient, use_knowledge_base: bool, question: str, chat_history: List[Tuple[str, str]], ) -> Any: """Make a prediction of the response to the prompt.""" chunks = "" chat_history = chat_history or [] _LOGGER.info( - "processing inference request - %s", - str({"prompt": question, "use_knowledge_base": use_knowledge_base}), + "processing inference request - %s", str({"prompt": question, "use_knowledge_base": use_knowledge_base}), ) documents: Union[None, List[Dict[str, Union[str, float]]]] = None if use_knowledge_base: - documents = client.search(prompt = question) + documents = client.search(prompt=question) for chunk in client.predict(query=question, use_knowledge_base=use_knowledge_base, num_tokens=OUTPUT_TOKENS): if chunk: diff --git a/RAG/src/rag_playground/speech/pages/kb.py b/RAG/src/rag_playground/speech/pages/kb.py new file mode 100644 index 00000000..94e93666 --- /dev/null +++ b/RAG/src/rag_playground/speech/pages/kb.py @@ -0,0 +1,115 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module contains the frontend gui for chat.""" +import os +from functools import partial +from pathlib import Path +from typing import List + +import gradio as gr +from frontend import assets, chat_client + +PATH = "/kb" +TITLE = "Knowledge Base Management" +STATE_FILE = '/tmp/uploaded_files.txt' + + +def build_page(client: chat_client.ChatClient) -> gr.Blocks: + """Buiild the gradio page to be mounted in the frame.""" + kui_theme, kui_styles = assets.load_theme("kaizen") + + with gr.Blocks(title=TITLE, theme=kui_theme, css=kui_styles) as page: + # create the page header + gr.Markdown(f"# {TITLE}") + + with gr.Row(): + upload_button = gr.UploadButton("Add File", file_types=["pdf"], file_count="multiple") + with gr.Row(): + file_output = gr.File() + + with gr.Row(): + files_df = gr.Dataframe( + headers=["File Uploaded"], + datatype=["str"], + col_count=(1, "fixed"), + value=lambda: get_uploaded_files(client), + every=5, + ) + + with gr.Row(): + buffer_textbox = gr.Textbox(interactive=False, visible=False) + message_textbox = gr.Textbox(label="Message", interactive=False, visible=True) + + with gr.Row(): + delete_button = gr.Button("Delete") + + # form actions + # Upload dutton action + upload_button.upload(lambda files: upload_file(files, client), upload_button, file_output) + + # Files dataframe action + files_df.select(return_selected_file, inputs=[files_df], outputs=[buffer_textbox]) + + # Delete button action + partial_delete_file = partial(delete_file, client=client) + delete_button.click(fn=partial_delete_file, inputs=buffer_textbox, outputs=message_textbox) + + page.queue() + return page + + +def delete_file(input_text, client: chat_client.ChatClient): + """Deletes selected files from knowledge base using client""" + + try: + response = client.delete_documents(file_name=input_text) + return response + except Exception as e: + raise gr.Error(f"{e}") + + +def return_selected_file(selected_index: gr.SelectData, dataframe): + """Returns selected files from DataFrame""" + if selected_index: + val = dataframe.iloc[selected_index.index[0]] + dataframe = dataframe.drop(selected_index.index[0]) + return val.iloc[0] + + +def upload_file(files: List[Path], client: chat_client.ChatClient) -> List[str]: + """Use the client to upload a file to the knowledge base.""" + try: + file_paths = [file.name for file in files] + client.upload_documents(file_paths=file_paths) + + # Save the uploaded file names to the state file + with open(STATE_FILE, 'a') as file: + for file_path in file_paths: + file_path = os.path.basename(file_path) + file.write(file_path + '\n') + + return file_paths + except Exception as e: + raise gr.Error(f"{e}") + + +def get_uploaded_files(client: chat_client.ChatClient) -> List[str]: + """Load previously uploaded files if the file exists""" + uploaded_files = [["No Files uploaded"]] + resp = client.get_uploaded_documents() + if len(resp) > 0: + uploaded_files = [[file] for file in resp] + return uploaded_files diff --git a/RAG/src/rag_playground/speech/static/404.html b/RAG/src/rag_playground/speech/static/404.html new file mode 100644 index 00000000..bafa01be --- /dev/null +++ b/RAG/src/rag_playground/speech/static/404.html @@ -0,0 +1 @@ +
\ No newline at end of file diff --git a/RAG/src/rag_playground/speech/static/_next/static/WuNGAl0x4o1D5HqLxhHMt/_buildManifest.js b/RAG/src/rag_playground/speech/static/_next/static/WuNGAl0x4o1D5HqLxhHMt/_buildManifest.js new file mode 100644 index 00000000..9556320b --- /dev/null +++ b/RAG/src/rag_playground/speech/static/_next/static/WuNGAl0x4o1D5HqLxhHMt/_buildManifest.js @@ -0,0 +1 @@ +self.__BUILD_MANIFEST=function(e){return{__rewrites:{beforeFiles:[],afterFiles:[],fallback:[]},"/":["static/chunks/pages/index-1a1d31dae38463f7.js"],"/_error":["static/chunks/pages/_error-54de1933a164a1ff.js"],"/converse":[e,"static/chunks/pages/converse-61880f01babd873a.js"],"/kb":[e,"static/chunks/pages/kb-cf0d102293dc0a74.js"],sortedPages:["/","/_app","/_error","/converse","/kb"]}}("static/chunks/78-a36dca5d49fafb86.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB(); \ No newline at end of file diff --git a/experimental/fm-asr-streaming-rag/frontend/frontend/static/_next/static/ZuyLfTn0WWGPn0wKTmN0V/_ssgManifest.js b/RAG/src/rag_playground/speech/static/_next/static/WuNGAl0x4o1D5HqLxhHMt/_ssgManifest.js similarity index 100% rename from experimental/fm-asr-streaming-rag/frontend/frontend/static/_next/static/ZuyLfTn0WWGPn0wKTmN0V/_ssgManifest.js rename to RAG/src/rag_playground/speech/static/_next/static/WuNGAl0x4o1D5HqLxhHMt/_ssgManifest.js diff --git a/RAG/src/rag_playground/speech/static/_next/static/chunks/78-a36dca5d49fafb86.js b/RAG/src/rag_playground/speech/static/_next/static/chunks/78-a36dca5d49fafb86.js new file mode 100644 index 00000000..c26ec333 --- /dev/null +++ b/RAG/src/rag_playground/speech/static/_next/static/chunks/78-a36dca5d49fafb86.js @@ -0,0 +1 @@ +(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[78],{6649:function(e,n,t){"use strict";var i,o,r=t(5697),a=t.n(r),s=t(7294);function c(){return(c=Object.assign||function(e){for(var n=1;n=0||(o[t]=e[t]);return o}var u=(i=o={exports:{}},o.exports,function(e){if("undefined"!=typeof window){var n,t=0,o=!1,r=!1,a=7,s="[iFrameSizer]",c=s.length,d=null,u=window.requestAnimationFrame,l={max:1,scroll:1,bodyScroll:1,documentElementScroll:1},f={},m=null,g={autoResize:!0,bodyBackground:null,bodyMargin:null,bodyMarginV1:8,bodyPadding:null,checkOrigin:!0,inPageLinks:!1,enablePublicMethods:!0,heightCalculationMethod:"bodyOffset",id:"iFrameResizer",interval:32,log:!1,maxHeight:1/0,maxWidth:1/0,minHeight:0,minWidth:0,resizeFrom:"parent",scrolling:!1,sizeHeight:!0,sizeWidth:!1,warningTimeout:5e3,tolerance:0,widthCalculationMethod:"scroll",onClose:function(){return!0},onClosed:function(){},onInit:function(){},onMessage:function(){z("onMessage function not defined")},onMouseEnter:function(){},onMouseLeave:function(){},onResized:function(){},onScroll:function(){return!0}},h={};window.jQuery&&((n=window.jQuery).fn?n.fn.iFrameResize||(n.fn.iFrameResize=function(e){return this.filter("iframe").each(function(n,t){S(t,e)}).end()}):M("","Unable to bind to jQuery, it is not fully loaded.")),"function"==typeof e&&e.amd?e([],A):i.exports=A(),window.iFrameResize=window.iFrameResize||A()}function p(){return window.MutationObserver||window.WebKitMutationObserver||window.MozMutationObserver}function w(e,n,t){e.addEventListener(n,t,!1)}function b(e,n,t){e.removeEventListener(n,t,!1)}function y(e){return f[e]?f[e].log:o}function v(e,n){k("log",e,n,y(e))}function M(e,n){k("info",e,n,y(e))}function z(e,n){k("warn",e,n,!0)}function k(e,n,t,i){if(!0===i&&"object"==typeof window.console){var o;console[e](s+"["+(o="Host page: "+n,window.top!==window.self&&(o=window.parentIFrame&&window.parentIFrame.getId?window.parentIFrame.getId()+": "+n:"Nested host page: "+n),o)+"]",t)}}function x(e){function n(){t("Height"),t("Width"),W(function(){var e;C(L),T(A),R(A,"onResized",L)},L,"init")}function t(e){var n=Number(f[A]["max"+e]),t=Number(f[A]["min"+e]),i=e.toLowerCase(),o=Number(L[i]);v(A,"Checking "+i+" is in range "+t+"-"+n),on&&(o=n,v(A,"Set "+i+" to max value")),L[i]=""+o}function i(e){return j.substr(j.indexOf(":")+a+e)}function o(e,n){var t;t=function(){var t,i;P("Send Page Info","pageInfo:"+(t=document.body.getBoundingClientRect(),JSON.stringify({iframeHeight:(i=L.iframe.getBoundingClientRect()).height,iframeWidth:i.width,clientHeight:Math.max(document.documentElement.clientHeight,window.innerHeight||0),clientWidth:Math.max(document.documentElement.clientWidth,window.innerWidth||0),offsetTop:parseInt(i.top-t.top,10),offsetLeft:parseInt(i.left-t.left,10),scrollTop:window.pageYOffset,scrollLeft:window.pageXOffset,documentHeight:document.documentElement.clientHeight,documentWidth:document.documentElement.clientWidth,windowHeight:window.innerHeight,windowWidth:window.innerWidth})),e,n)},h[n]||(h[n]=setTimeout(function(){h[n]=null,t()},32))}function r(e){var n=e.getBoundingClientRect();return F(A),{x:Math.floor(Number(n.left)+Number(d.x)),y:Math.floor(Number(n.top)+Number(d.y))}}function u(e){var n=e?r(L.iframe):{x:0,y:0},t={x:Number(L.width)+n.x,y:Number(L.height)+n.y};v(A,"Reposition requested from iFrame (offset x:"+n.x+" y:"+n.y+")"),window.top!==window.self?window.parentIFrame?window.parentIFrame["scrollTo"+(e?"Offset":"")](t.x,t.y):z(A,"Unable to scroll to requested position, window.parentIFrame not found"):(d=t,l(),v(A,"--"))}function l(){var e;!1!==R(A,"onScroll",d)?T(A):d=null}function m(e){var n;R(A,e,{iframe:L.iframe,screenX:L.width,screenY:L.height,type:L.type})}var g,p,y,k,x,I,S,N,j=e.data,L={},A=null;"[iFrameResizerChild]Ready"===j?!function(){for(var e in f)P("iFrame requested init",H(e),f[e].iframe,e)}():s===(""+j).substr(0,c)&&j.substr(c).split(":")[0]in f?(p=(g=j.substr(c).split(":"))[1]?parseInt(g[1],10):0,y=f[g[0]]&&f[g[0]].iframe,k=getComputedStyle(y),f[A=(L={iframe:y,id:g[0],height:p+("border-box"!==k.boxSizing?0:(k.paddingTop?parseInt(k.paddingTop,10):0)+(k.paddingBottom?parseInt(k.paddingBottom,10):0))+("border-box"!==k.boxSizing?0:(k.borderTopWidth?parseInt(k.borderTopWidth,10):0)+(k.borderBottomWidth?parseInt(k.borderBottomWidth,10):0)),width:g[2],type:g[3]}).id]&&(f[A].loaded=!0),(x=L.type in{true:1,false:1,undefined:1})&&v(A,"Ignoring init message from meta parent page"),!x&&(S=!0,f[I=A]||(S=!1,z(L.type+" No settings for "+I+". Message was: "+j)),S)&&(v(A,"Received: "+j),N=!0,null===L.iframe&&(z(A,"IFrame ("+L.id+") not found"),N=!1),N&&function(){var n,t=e.origin,i=f[A]&&f[A].checkOrigin;if(i&&""+t!="null"&&!(i.constructor===Array?function(){var e=0,n=!1;for(v(A,"Checking connection is from allowed list of origins: "+i);ef[c]["max"+e])throw Error("Value for min"+e+" can not be greater than max"+e)}t("Height"),t("Width"),e("maxHeight"),e("minHeight"),e("maxWidth"),e("minWidth")}(),("number"==typeof(f[c]&&f[c].bodyMargin)||"0"===(f[c]&&f[c].bodyMargin))&&(f[c].bodyMarginV1=f[c].bodyMargin,f[c].bodyMargin=""+f[c].bodyMargin+"px"),a=H(c),(s=p())&&n.parentNode&&new s(function(e){e.forEach(function(e){Array.prototype.slice.call(e.removedNodes).forEach(function(e){e===n&&O(n)})})}).observe(n.parentNode,{childList:!0}),w(n,"load",function(){var t,i;P("iFrame.onload",a,n,e,!0),t=f[c]&&f[c].firstRun,i=f[c]&&f[c].heightCalculationMethod in l,!t&&i&&E({iframe:n,height:0,width:0,type:"init"})}),P("init",a,n,e,!0),f[c]&&(f[c].iframe.iFrameResizer={close:O.bind(null,f[c].iframe),removeListeners:I.bind(null,f[c].iframe),resize:P.bind(null,"Window resize","resize",f[c].iframe),moveToAnchor:function(e){P("Move to anchor","moveToAnchor:"+e,f[c].iframe,c)},sendMessage:function(e){P("Send Message","message:"+(e=JSON.stringify(e)),f[c].iframe,c)}}))}function N(e,n){null===m&&(m=setTimeout(function(){m=null,e()},n))}function j(){"hidden"!==document.visibilityState&&(v("document","Trigger event: Visiblity change"),N(function(){L("Tab Visable","resize")},16))}function L(e,n){Object.keys(f).forEach(function(t){f[t]&&"parent"===f[t].resizeFrom&&f[t].autoResize&&!f[t].firstRun&&P(e,n,f[t].iframe,t)})}function A(){var n;function t(e,t){t&&(function(){if(t.tagName){if("IFRAME"!==t.tagName.toUpperCase())throw TypeError("Expected