diff --git a/.dockerignore b/.dockerignore index b4d50964..91540e39 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,7 +5,7 @@ .gitmodules # Ignore temperory volumes -deploy/compose/volumes +RAG/examples/**/volumes # creating a docker image .dockerignore diff --git a/.gitattributes b/.gitattributes index c8a8d73b..82f8bfb0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -notebooks/dataset.zip filter=lfs diff=lfs merge=lfs -text +notebooks/dataset.zip filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/.github/workflows/docs-build.yaml b/.github/workflows/docs-build.yaml deleted file mode 100644 index c0abe122..00000000 --- a/.github/workflows/docs-build.yaml +++ /dev/null @@ -1,153 +0,0 @@ -name: docs-build - -on: - pull_request: - branches: [ main, release-* ] - types: [ opened, synchronize ] - - push: - branches: [ main ] - tags: - - v* - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -defaults: - run: - shell: bash - -jobs: - build-docs: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Build image - run: | - docker build --pull --tag docs-builder:latest --file docs/Dockerfile . - - name: Build docs - run: | - docker run -v $(pwd):/work -w /work docs-builder:latest sphinx-build -b html -d /tmp docs docs/_build/output - - name: Delete unnecessary files - run: | - sudo rm -rf docs/_build/jupyter_execute - sudo rm -rf docs/_build/.buildinfo - - name: Upload HTML - uses: actions/upload-artifact@v4 - with: - name: html-build-artifact - path: docs/_build/ - if-no-files-found: error - retention-days: 1 - - name: Store PR information - if: ${{ github.event_name == 'pull_request' }} - run: | - mkdir ./pr - echo ${{ github.event.number }} > ./pr/pr.txt - echo ${{ github.event.pull_request.merged }} > ./pr/merged.txt - echo ${{ github.event.action }} > ./pr/action.txt - - name: Upload PR information - if: ${{ github.event_name == 'pull_request' }} - uses: actions/upload-artifact@v4 - with: - name: pr - path: pr/ - - store-html: - needs: [ build-docs ] - if: ${{ github.event_name == 'push' }} - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - ref: "gh-pages" - - name: Initialize Git configuration - run: | - git config user.name docs-build - git config user.email do-not-send@github.com - - name: Download artifacts - uses: actions/download-artifact@v4 - with: - name: html-build-artifact - - name: Copy HTML directories - run: | - ls -asl - - name: Store bleeding edge docs from main - if: ${{ github.ref == 'refs/heads/main' }} - run: | - mkdir main || true - rsync -av --progress --delete output/ main/ - git add main - - name: Store docs for a release tag - if: ${{ startsWith(github.ref, 'refs/tags/v') }} - env: - LATEST: ${{ contains(github.event.head_commit.message, '/not-latest') && 'not-true' || 'true' }} - run: | - printenv LATEST - if [[ "${GITHUB_REF}" =~ "-rc" ]]; then - echo "Not saving documents for release candidates." - exit 0 - fi - if [[ "${GITHUB_REF}" =~ v([0-9]+\.[0-9]+\.[0-9]+) ]]; then - TAG="${BASH_REMATCH[1]}" - mkdir "${TAG}" || true - rsync -av --progress --delete output/ "${TAG}/" - git add "${TAG}/" - if [[ "${LATEST}" == 'true' ]]; then - mkdir latest || true - rsync -av --progress --delete output/ latest/ - cp output/versions.json . - git add latest - git add versions.json - fi - fi - - name: Check or create dot-no-jekyll file - run: | - if [ -f ".nojekyll" ]; then - echo "The dot-no-jekyll file already exists." - exit 0 - fi - touch .nojekyll - git add .nojekyll - - name: Check or create redirect page - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - resp=$(grep 'http-equiv="refresh"' index.html 2>/dev/null) || true - if [ -n "${resp}" ]; then - echo "The redirect file already exists." - exit 0 - fi - # If any of these commands fail, fail the build. - html_url=$(gh api "repos/${GITHUB_REPOSITORY}/pages" --jq ".html_url") - # Beware ugly quotation mark avoidance in the foll lines. - echo '' > index.html - echo '' >> index.html - echo '
' >> index.html - echo 'Please follow the link to the ' >> index.html - echo 'latest documentation.
' >> index.html - echo ' ' >> index.html - echo '' >> index.html - git add index.html - - name: Commit changes to the GitHub Pages branch - run: | - git status - if git commit -m 'Pushing changes to GitHub Pages.'; then - git push -f - else - echo "Nothing changed." - fi diff --git a/.github/workflows/docs-preview-pr.yaml b/.github/workflows/docs-preview-pr.yaml deleted file mode 100644 index 362db16e..00000000 --- a/.github/workflows/docs-preview-pr.yaml +++ /dev/null @@ -1,117 +0,0 @@ -name: docs-preview-pr - -on: - workflow_run: - workflows: [docs-build] - types: [completed] - -env: - WF_ID: ${{ github.event.workflow_run.id }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - -jobs: - # Always determine if GitHub Pages are configured for this repo. - get-gh-pages-url: - if: - github.event.workflow_run.event == 'pull_request' && - github.event.workflow_run.conclusion == 'success' - runs-on: ubuntu-latest - outputs: - url: ${{ steps.api-resp.outputs.html_url || '' }} - branch: ${{ steps.api-resp.outputs.branch || '' }} - steps: - - name: Check for GitHub Pages - id: api-resp - run: | - has_pages=$(gh api "repos/${GITHUB_REPOSITORY}" -q '.has_pages') - if [ "true" != "${has_pages}" ]; then - echo "GitHub pages is not active for the repository. Quitting." - return - fi - - url=$(gh api "repos/${GITHUB_REPOSITORY}/pages" -q '.html_url') - branch=$(gh api "repos/${GITHUB_REPOSITORY}/pages" -q '.source.branch') - - echo "html_url=${url}" >> $GITHUB_OUTPUT - echo "branch=${branch}" >> $GITHUB_OUTPUT - - # Identify the dir for the HTML. - store-html: - runs-on: ubuntu-latest - needs: [get-gh-pages-url] - if: needs.get-gh-pages-url.outputs.url != '' - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ needs.get-gh-pages-url.outputs.branch }} - - name: Initialize Git configuration - run: | - git config user.name docs-preview - git config user.email do-not-send-@github.com - - name: Download artifacts - run: | - gh run view "${WF_ID}" - gh run download "${WF_ID}" - PR=$(cat ./pr/pr.txt) - MERGED=$(cat ./pr/merged.txt) - ACTION=$(cat ./pr/action.txt) - echo "PR_NO=${PR}" >> $GITHUB_ENV - echo "MERGE_STATUS=${MERGED}" >> $GITHUB_ENV - echo "PR_ACTION=${ACTION}" >> $GITHUB_ENV - echo "REVIEW_DIR=review/" >> $GITHUB_ENV - echo "PR_REVIEW_DIR=review/pr-${PR}" >> $GITHUB_ENV - - # Remove the pr artifact directory so that it does not - # appear in listings or confuse git with untracked files. - rm -rf ./pr - - # Permutations: - # - PR was updated, PR_ACTION is !closed, need to delete review directory and update it. - # - PR was closed (regardless of merge), PR_ACTION is closed, need to delete review directory. - - # If this PR is still open, store HTML in a review directory. - - name: Handle HTML review directory for open PRs and updates to PRs - if: env.MERGE_STATUS == 'false' && env.PR_ACTION != 'closed' - run: | - rm -rf "${{ env.PR_REVIEW_DIR }}" 2>/dev/null || true - if [ ! -d "${{ env.REVIEW_DIR }}" ]; then - mkdir "${{ env.REVIEW_DIR }}" - fi - mv ./html-build-artifact/latest/ "${{ env.PR_REVIEW_DIR }}" - git add "${{ env.PR_REVIEW_DIR }}" - # If the PR was closed, merged or not, delete review directory. - - name: Delete HTML review directory for closed PRs - if: env.PR_ACTION == 'closed' - run: | - if [ -d ./html-build-artifact/ ]; then - rm -rf ./html-build-artifact/ 2>/dev/null - fi - if [ -d "${{ env.PR_REVIEW_DIR }}" ]; then - git rm -rf "${{ env.PR_REVIEW_DIR }}" - fi - - name: Commit changes to the GitHub Pages branch - run: | - git status - if git commit -m 'Pushing changes to GitHub Pages.'; then - git push -f - else - echo "Nothing changed." - fi - - name: Check for existing documentation review comment - run: | - result=$(gh pr view ${{ env.PR_NO }} --json comments -q 'any(.comments[].body; contains("Documentation preview"))') - echo "COMMENT_EXISTS=${result}" >> $GITHUB_ENV - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Add HTML review URL comment to a newly opened PR - if: env.MERGE_STATUS == 'false' && env.COMMENT_EXISTS == 'false' - env: - URL: ${{ needs.get-gh-pages-url.outputs.url }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - shell: bash - run: | - echo -e "## Documentation preview" > body - echo -e "" >> body - echo -e "<${{ env.URL }}${{ env.PR_REVIEW_DIR }}>" >> body - cat body - gh pr comment ${{ env.PR_NO }} --body-file body diff --git a/.github/workflows/docs-remove-stale-reviews.yaml b/.github/workflows/docs-remove-stale-reviews.yaml deleted file mode 100644 index 8b758c37..00000000 --- a/.github/workflows/docs-remove-stale-reviews.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: docs-remove-stale-reviews - -on: - schedule: - # 42 minutes after 0:00 UTC on Sundays - - cron: "42 0 * * 0" - workflow_dispatch: - -jobs: - remove: - uses: nvidia-merlin/.github/.github/workflows/docs-remove-stale-reviews-common.yaml@main diff --git a/.gitignore b/.gitignore index 7094b42f..9d14994c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,14 +3,10 @@ **__pycache__** # Helm Exclusions -**/charts/*.tgz - -# project temp files -deploy/*.log -deploy/*.txt +**/helm-charts/*.tgz # Docker Compose exclusions -volumes/ +RAG/examples/**/volumes uploaded_files/ # Visual Studio Code @@ -26,5 +22,10 @@ docs/experimental docs/tools # Developing examples -RetrievalAugmentedGeneration/examples/simple_rag_api_catalog/ -deploy/compose/simple-rag-api-catalog.yaml +RAG/examples/simple_rag_api_catalog/ +RAG/examples/simple-rag-api-catalog.yaml + +# Notebook checkpoints +RAG/notebooks/langchain/.ipynb_checkpoints +RAG/notebooks/langchain/data/nv_embedding +RAG/notebooks/langchain/data/save_embedding \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ab5a3e8b..11fada31 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,11 +13,15 @@ repos: rev: 19.10b0 hooks: - id: black + files: ^RAG/ args: ["--skip-string-normalization", "--line-length=119"] additional_dependencies: ['click==8.0.4'] - repo: https://github.com/pycqa/isort rev: 5.12.0 hooks: - id: isort + files: ^RAG/ name: isort (python) args: ["--multi-line=3", "--trailing-comma", "--force-grid-wrap=0", "--use-parenthese", "--line-width=119", "--ws"] + + diff --git a/CHANGELOG.md b/CHANGELOG.md index 6112a884..810186d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,60 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.8.0] - 2024-08-19 + +This release completely refactors the directory structure of the repository for a more seamless and intuitive developer journey. It also adds support to deploy latest accelerated embedding and reranking models across the cloud, data center, and workstation using [NVIDIA NeMo Retriever NIM microservices](https://docs.nvidia.com/nim/index.html#nemo-retriever). + +### Added +- [End-to-end RAG examples](./RAG/examples/) enhancements + - [Single-command deployment](./README.md#try-it-now) for all the examples using Docker Compose. + - All end to end RAG examples are now more encapsulated with documentation, code and deployment assets residing in dedicated example specific directory. + - Segregated examples into [basic and advanced RAG](./RAG/examples/) with dedicated READMEs. + - Added reranker model support to [multi-turn RAG example](./RAG/examples/advanced_rag/multi_turn_rag/). + - Added [dedicated prompt configuration file for every example](./docs/prompt-customization.md). + - Removed Python dev packages from containers to enhance security. + - Updated to latest version of [langchain-nvidia-ai-endpoints](https://python.langchain.com/v0.2/docs/integrations/providers/nvidia/). +- [Speech support using RAG Playground]((./docs/riva-asr-tts.md)) + - Added support to access [RIVA speech models from NVIDIA API Catalog](https://build.nvidia.com/explore/speech). + - Speech support in RAG Playground is opt-in. +- Documentation enhancements + - Added more comprehensive [how-to guides](./README.md#how-to-guides) for end to end RAG examples. + - Added [example specific architecture diagrams](./RAG/examples/basic_rag/langchain/) in each example directory. +- Added a new industry specific [top level directory](./industries/) + - Added [health care domain specific Medical Device Training Assistant RAG](./industries/healthcare/medical-device-training-assistant/). +- Added notebooks showcasing new usecases + - [Basic langchain based RAG pipeline](./RAG/notebooks/langchain/langchain_basic_RAG.ipynb) using latest NVIDIA API Catalog connectors. + - [Basic llamaindex based RAG pipeline](./RAG/notebooks/llamaindex/llamaindex_basic_RAG.ipynb) using latest NVIDIA API Catalog connectors. + - [NeMo Guardrails with basic langchain RAG](./RAG/notebooks/langchain/NeMo_Guardrails_with_LangChain_RAG/). + - [NVIDIA NIM microservices using NeMo Guardrails based RAG](./RAG/notebooks/langchain/Using_NVIDIA_NIMs_with_NeMo_Guardrails/). + - [Using NeMo Evaluator using Llama 3.1 8B Instruct](./RAG/notebooks/nemo/Nemo%20Evaluator%20Llama%203.1%20Workbook/). + - [Agentic RAG pipeline with Nemo Retriever and NIM for LLMs](./RAG/notebooks/langchain/agentic_rag_with_nemo_retriever_nim.ipynb). +- Added new `community` (before `experimental`) example + - Create a simple web interface to interact with different [selectable NIM endpoints](./community/llm-prompt-design-helper/). The provided interface of this project supports designing a system prompt to call the LLM. + +### Changed +- Major restructuring and reorganisation of the assets within the repository + - Top level `experimental` directory has been renamed as `community`. + - Top level `RetrievalAugmentedGeneration` directory has been renamed as just `RAG`. + - The Docker Compose files inside top level `deploy` directory has been migrated to example-specific directories under `RAG/examples`. The vector database and on-prem NIM microservices deployment files are under `RAG/examples/local_deploy`. + - Top level `models` has been renamed to `finetuning`. + - Top level `notebooks` directory has been moved to under `RAG/notebooks` and has been organised framework wise. + - Top level `tools` directory has been migrated to `RAG/tools`. + - Top level `integrations` directory has been moved into `RAG/src`. + - `RetreivalAugmentedGeneration/common` is now residing under `RAG/src/chain_server`. + - `RetreivalAugmentedGeneration/frontend` is now residing under `RAG/src/rag_playground/default`. + - `5 mins RAG No GPU` example under top level `examples` directory, is now under `community`. + +### Deprecated + - Github pages based documentation is now replaced with markdown based documentation. + - Top level `examples` directory has been removed. + - Following notebooks were removed + - [02_Option(1)_NVIDIA_AI_endpoint_simple.ipynb](https://github.com/NVIDIA/GenerativeAIExamples/blob/v0.7.0/notebooks/02_Option(1)_NVIDIA_AI_endpoint_simple.ipynb) + - [notebooks/02_Option(2)_minimalistic_RAG_with_langchain_local_HF_LLM.ipynb](https://github.com/NVIDIA/GenerativeAIExamples/blob/v0.7.0/notebooks/02_Option(2)_minimalistic_RAG_with_langchain_local_HF_LLM.ipynb) + - [notebooks/03_Option(1)_llama_index_with_NVIDIA_AI_endpoint.ipynb](https://github.com/NVIDIA/GenerativeAIExamples/blob/v0.7.0/notebooks/03_Option(1)_llama_index_with_NVIDIA_AI_endpoint.ipynb) + - [notebooks/03_Option(2)_llama_index_with_HF_local_LLM.ipynb](https://github.com/NVIDIA/GenerativeAIExamples/blob/v0.7.0/notebooks/03_Option(2)_llama_index_with_HF_local_LLM.ipynb) + + ## [0.7.0] - 2024-06-18 This release switches all examples to use cloud hosted GPU accelerated LLM and embedding models from [Nvidia API Catalog](https://build.nvidia.com) as default. It also deprecates support to deploy on-prem models using NeMo Inference Framework Container and adds support to deploy accelerated generative AI models across the cloud, data center, and workstation using [latest Nvidia NIM-LLM](https://docs.nvidia.com/nim/large-language-models/latest/introduction.html). @@ -17,7 +71,7 @@ This release switches all examples to use cloud hosted GPU accelerated LLM and e - Improved accuracy of image parsing by using [tesseract-ocr](https://pypi.org/project/tesseract-ocr/) - Added a [new notebook showcasing RAG usecase using accelerated NIM based on-prem deployed models](./notebooks/08_RAG_Langchain_with_Local_NIM.ipynb) - Added a [new experimental example](./experimental/rag-developer-chatbot/) showcasing how to create a developer-focused RAG chatbot using RAPIDS cuDF source code and API documentation. -- Added a [new experimental example](./experimental/event-driven-rag-cve-analysis/) demonstrating how NVIDIA Morpheus, NIMs, and RAG pipelines can be integrated to create LLM-based agent pipelines. +- Added a [new experimental example](./experimental/event-driven-rag-cve-analysis/) demonstrating how NVIDIA Morpheus, NIM microservices, and RAG pipelines can be integrated to create LLM-based agent pipelines. ### Changed - All examples now use llama3 models from [Nvidia API Catalog](https://build.nvidia.com/search?term=llama3) as default. Summary of updated examples and the model it uses is available [here](https://nvidia.github.io/GenerativeAIExamples/latest/index.html#developer-rag-examples). diff --git a/RetrievalAugmentedGeneration/common/__init__.py b/RAG/__init__.py similarity index 100% rename from RetrievalAugmentedGeneration/common/__init__.py rename to RAG/__init__.py diff --git a/RetrievalAugmentedGeneration/examples/multimodal_rag/llm/__init__.py b/RAG/examples/README.md similarity index 100% rename from RetrievalAugmentedGeneration/examples/multimodal_rag/llm/__init__.py rename to RAG/examples/README.md diff --git a/RetrievalAugmentedGeneration/examples/developer_rag/__init__.py b/RAG/examples/__init__.py similarity index 100% rename from RetrievalAugmentedGeneration/examples/developer_rag/__init__.py rename to RAG/examples/__init__.py diff --git a/RAG/examples/advanced_rag/multi_turn_rag/README.md b/RAG/examples/advanced_rag/multi_turn_rag/README.md new file mode 100644 index 00000000..59d9dbc8 --- /dev/null +++ b/RAG/examples/advanced_rag/multi_turn_rag/README.md @@ -0,0 +1,85 @@ + + +# Multi-Turn RAG Example + +## Example Features + +This example showcases multi-turn conversational AI in a RAG pipeline. +The chain server stores the conversation history and knowledge base in a vector database and retrieves them at runtime to understand contextual queries. + +The example supports ingestion of PDF and text files. +The documents are ingested in a dedicated document vector store, multi_turn_rag. +The prompt for the example is tuned to act as a document chat bot. +To maintain the conversation history, the chain server stores the previously asked query and the model's generated answer as a text entry in a different and dedicated vector store for conversation history, conv_store. +Both of these vector stores are part of a LangChain [LCEL](https://python.langchain.com/docs/expression_language/) chain as LangChain Retrievers. +When the chain is invoked with a query, the query passes through both the retrievers. +The retriever retrieves context from the document vector store and the closest-matching conversation history from conversation history vector store. The document chunks retrieved from the document vector store are then passed through a reranker model to determine the most relevant top_k context. The context is then passed onto the LLM prompt for response generation. +Afterward, the chunks are added into the LLM prompt as part of the chain. + +| Model | Embedding | Ranking (Optional) | Framework | Vector Database | File Types | +| ----------------------- | ----------------------- | -------------------------------- | --------- | --------------- | ------------ | +| meta/llama3-8b-instruct | nvidia/nv-embedqa-e5-v5 | nvidia/nv-rerankqa-mistral-4b-v3 | LangChain | Milvus | TXT, PDF, MD | + +![Diagram](../../../../docs/images/multiturn_rag_arch.png) + +## Prerequisites + +Complete the [common prerequisites](../../../../docs/common-prerequisites.md). + +## Build and Start the Containers + +1. Export your NVIDIA API key as an environment variable: + + ```text + export NVIDIA_API_KEY="nvapi-<...>" + ``` + +1. Start the containers: + + ```console + cd RAG/examples/advanced_rag/multi_turn_rag/ + docker compose up -d --build + ``` + + *Example Output* + + ```output + ✔ Network nvidia-rag Created + ✔ Container milvus-etcd Running + ✔ Container milvus-minio Running + ✔ Container milvus-standalone Running + ✔ Container chain-server Started + ✔ Container rag-playground Started + ``` + +1. Confirm the containers are running: + + ```console + docker ps --format "table {{.ID}}\t{{.Names}}\t{{.Status}}" + ``` + + *Example Output* + + ```output + CONTAINER ID NAMES STATUS + dd4fc3da6c9c rag-playground Up About a minute + ac1f039a1db8 chain-server Up About a minute + cd0a57ee20e0 milvus-standalone Up 2 hours + a36370e7ed75 milvus-minio Up 2 hours (healthy) + a796a4e59b68 milvus-etcd Up 2 hours (healthy) + ``` + +1. Open a web browser and access