Pushing changes to GitHub Pages.

NVIDIA · Mar 20, 2024 · 5e0daa4 · 5e0daa4
1 parent 6bafbeb
commit 5e0daa4
Show file tree

Hide file tree

Showing 99 changed files with 1,060 additions and 622 deletions.
diff --git a/0.5.0/_images/ai-foundations-topology.png b/0.5.0/_images/ai-foundations-topology.png
diff --git a/0.5.0/_images/api-catalog-generate-api-key.png b/0.5.0/_images/api-catalog-generate-api-key.png
diff --git a/0.5.0/_images/catalog-and-vector-db.png b/0.5.0/_images/catalog-and-vector-db.png
diff --git a/0.5.0/_images/chrome-flags-fix-media-device-access-error.png b/0.5.0/_images/chrome-flags-fix-media-device-access-error.png
diff --git a/0.5.0/_images/evaluation-topology.png b/0.5.0/_images/evaluation-topology.png
diff --git a/0.5.0/_images/image8.png b/0.5.0/_images/image8.png
diff --git a/0.5.0/_images/key-generated.png b/0.5.0/_images/key-generated.png
diff --git a/0.5.0/_images/llama-2-70b-card.png b/0.5.0/_images/llama-2-70b-card.png
diff --git a/0.5.0/_images/llama-2-generate-key.png b/0.5.0/_images/llama-2-generate-key.png
diff --git a/0.5.0/_images/local-gpus-topology.png b/0.5.0/_images/local-gpus-topology.png
diff --git a/0.5.0/_images/media-device-access-error.png b/0.5.0/_images/media-device-access-error.png
diff --git a/0.5.0/_images/mixtral-8x7b-instruct.png b/0.5.0/_images/mixtral-8x7b-instruct.png
diff --git a/0.5.0/_static/openapi_schema.json b/0.5.0/_static/openapi_schema.json
@@ -194,6 +194,7 @@
 				"properties": {
 					"id": {
 						"type": "string",
+						"maxLength": 100000,
 						"title": "Id",
 						"default": ""
 					},
@@ -202,17 +203,9 @@
 							"$ref": "#/components/schemas/ChainResponseChoices"
 						},
 						"type": "array",
+						"maxItems": 256,
 						"title": "Choices",
-						"default": [
-							{
-								"index": 0,
-								"message": {
-									"content": "",
-									"role": "assistant"
-								},
-								"finish_reason": ""
-							}
-						]
+						"default": []
 					}
 				},
 				"type": "object",
@@ -223,6 +216,8 @@
 				"properties": {
 					"index": {
 						"type": "integer",
+						"maximum": 256,
+						"minimum": 0,
 						"title": "Index",
 						"default": 0
 					},
@@ -232,13 +227,15 @@
 								"$ref": "#/components/schemas/Message"
 							}
 						],
+						"title": "Message",
 						"default": {
 							"role": "assistant",
 							"content": ""
 						}
 					},
 					"finish_reason": {
 						"type": "string",
+						"maxLength": 4096,
 						"title": "Finish Reason",
 						"default": ""
 					}
@@ -251,11 +248,13 @@
 				"properties": {
 					"content": {
 						"type": "string",
+						"maxLength": 131072,
 						"title": "Content",
 						"description": "The content of the document chunk."
 					},
 					"filename": {
 						"type": "string",
+						"maxLength": 4096,
 						"title": "Filename",
 						"description": "The name of the file the chunk belongs to."
 					},
@@ -278,11 +277,14 @@
 				"properties": {
 					"query": {
 						"type": "string",
+						"maxLength": 131072,
 						"title": "Query",
 						"description": "The content or keywords to search for within documents."
 					},
 					"top_k": {
 						"type": "integer",
+						"maximum": 256,
+						"minimum": 0,
 						"title": "Top K",
 						"description": "The maximum number of documents to return in the response.",
 						"default": 4
@@ -302,6 +304,7 @@
 							"$ref": "#/components/schemas/DocumentChunk"
 						},
 						"type": "array",
+						"maxItems": 256,
 						"title": "Chunks",
 						"description": "List of document chunks."
 					}
@@ -317,9 +320,11 @@
 				"properties": {
 					"documents": {
 						"items": {
-							"type": "string"
+							"type": "string",
+							"maxLength": 131072
 						},
 						"type": "array",
+						"maxItems": 1000000,
 						"title": "Documents",
 						"description": "List of filenames."
 					}
@@ -348,20 +353,20 @@
 				"properties": {
 					"role": {
 						"type": "string",
+						"maxLength": 256,
 						"title": "Role",
-						"description": "Role for a message AI, User and System"
+						"description": "Role for a message AI, User and System",
+						"default": "user"
 					},
 					"content": {
 						"type": "string",
+						"maxLength": 131072,
 						"title": "Content",
-						"description": "The input query/prompt to the pipeline."
+						"description": "The input query/prompt to the pipeline.",
+						"default": "I am going to Paris, what should I see?"
 					}
 				},
 				"type": "object",
-				"required": [
-					"role",
-					"content"
-				],
 				"title": "Message",
 				"description": "Definition of the Chat Message type."
 			},
@@ -372,6 +377,7 @@
 							"$ref": "#/components/schemas/Message"
 						},
 						"type": "array",
+						"maxItems": 50000,
 						"title": "Messages",
 						"description": "A list of messages comprising the conversation so far. The roles of the messages must be alternating between user and assistant. The last input message should have role user. A message with the the system role is optional, and must be the very first message if it is present."
 					},
@@ -382,27 +388,35 @@
 					},
 					"temperature": {
 						"type": "number",
+						"maximum": 1,
+						"minimum": 0.1,
 						"title": "Temperature",
 						"description": "The sampling temperature to use for text generation. The higher the temperature value is, the less deterministic the output text will be. It is not recommended to modify both temperature and top_p in the same call.",
 						"default": 0.2
 					},
 					"top_p": {
 						"type": "number",
+						"maximum": 1,
+						"minimum": 0.1,
 						"title": "Top P",
 						"description": "The top-p sampling mass used for text generation. The top-p value determines the probability mass that is sampled at sampling time. For example, if top_p = 0.2, only the most likely tokens (summing to 0.2 cumulative probability) will be sampled. It is not recommended to modify both temperature and top_p in the same call.",
 						"default": 0.7
 					},
 					"max_tokens": {
 						"type": "integer",
+						"maximum": 1024,
+						"minimum": 0,
 						"title": "Max Tokens",
 						"description": "The maximum number of tokens to generate in any given call. Note that the model is not aware of this value, and generation will simply stop at the number of tokens specified.",
 						"default": 1024
 					},
 					"stop": {
 						"items": {
-							"type": "string"
+							"type": "string",
+							"maxLength": 256
 						},
 						"type": "array",
+						"maxItems": 256,
 						"title": "Stop",
 						"description": "A string or a list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence."
 					}

diff --git a/latest/ai-foundation-models.html → 0.5.0/api-catalog.html b/latest/ai-foundation-models.html → 0.5.0/api-catalog.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Using the NVIDIA AI Foundation Models &mdash; NVIDIA Generative AI Examples 0.5.0 documentation</title>
+  <title>Using the NVIDIA API Catalog &mdash; NVIDIA Generative AI Examples 0.5.0 documentation</title>
       <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
       <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
       <link rel="stylesheet" href="_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" type="text/css" />
@@ -54,7 +54,7 @@
 <ul class="current">
 <li class="toctree-l1"><a class="reference internal" href="index.html">About the RAG Pipelines</a></li>
 <li class="toctree-l1"><a class="reference internal" href="support-matrix.html">Support Matrix</a></li>
-<li class="toctree-l1 current"><a class="current reference internal" href="#">AI Foundation Models</a></li>
+<li class="toctree-l1 current"><a class="current reference internal" href="#">API Catalog Models</a></li>
 <li class="toctree-l1"><a class="reference internal" href="local-gpu.html">Local GPUs</a></li>
 <li class="toctree-l1"><a class="reference internal" href="multi-gpu.html">Multi-GPU for Inference</a></li>
 <li class="toctree-l1"><a class="reference internal" href="query-decomposition.html">Query Decomposition</a></li>
@@ -109,7 +109,7 @@
           <div role="navigation" aria-label="Page navigation">
   <ul class="wy-breadcrumbs">
       <li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
-      <li>Using the NVIDIA AI Foundation Models</li>
+      <li>Using the NVIDIA API Catalog</li>
       <li class="wy-breadcrumbs-aside">
       </li>
   </ul>
@@ -134,8 +134,8 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 -->
-<section id="using-the-nvidia-ai-foundation-models">
-<h1>Using the NVIDIA AI Foundation Models<a class="headerlink" href="#using-the-nvidia-ai-foundation-models" title="Permalink to this headline"></a></h1>
+<section id="using-the-nvidia-api-catalog">
+<h1>Using the NVIDIA API Catalog<a class="headerlink" href="#using-the-nvidia-api-catalog" title="Permalink to this headline"></a></h1>
 <div class="contents local topic" id="contents">
 <ul class="simple">
 <li><p><a class="reference internal" href="#example-features" id="id1">Example Features</a></p></li>
@@ -147,7 +147,7 @@ <h1>Using the NVIDIA AI Foundation Models<a class="headerlink" href="#using-the-
 </div>
 <section id="example-features">
 <h2>Example Features<a class="headerlink" href="#example-features" title="Permalink to this headline"></a></h2>
-<p>This example deploys a developer RAG pipeline for chat Q&amp;A and serves inferencing from an NVIDIA AI Foundation Models endpoint
+<p>This example deploys a developer RAG pipeline for chat Q&amp;A and serves inferencing from an NVIDIA API Catalog endpoint
 instead of NVIDIA Triton Inference Server, a local Llama 2 model, or local GPUs.</p>
 <p>Developers get free credits for 10K requests to any of the available models.</p>
 <table class="docutils align-default">
@@ -169,19 +169,19 @@ <h2>Example Features<a class="headerlink" href="#example-features" title="Permal
 <th class="head"><p>Description</p></th>
 <th class="head"><p>Multi-GPU</p></th>
 <th class="head"><p>TRT-LLM</p></th>
-<th class="head"><p>NVIDIA AI Foundation</p></th>
+<th class="head"><p>Model Location</p></th>
 <th class="head"><p>Triton</p></th>
 <th class="head"><p>Vector Database</p></th>
 </tr>
 </thead>
 <tbody>
-<tr class="row-even"><td><p>mixtral_8x7b</p></td>
+<tr class="row-even"><td><p>ai-mixtral-8x7b-instruct</p></td>
 <td><p>nvolveqa_40k</p></td>
 <td><p>Langchain</p></td>
 <td><p>QA chatbot</p></td>
 <td><p>NO</p></td>
 <td><p>NO</p></td>
-<td><p>YES</p></td>
+<td><p>API Catalog</p></td>
 <td><p>NO</p></td>
 <td><p>Milvus</p></td>
 </tr>
@@ -190,11 +190,11 @@ <h2>Example Features<a class="headerlink" href="#example-features" title="Permal
 <p>The following figure shows the sample topology:</p>
 <ul class="simple">
 <li><p>The sample chat bot web application communicates with the chain server.
-The chain server sends inference requests to an NVIDIA AI Foundation Models endpoint.</p></li>
+The chain server sends inference requests to an NVIDIA API Catalog endpoint.</p></li>
 <li><p>Optionally, you can deploy NVIDIA Riva. Riva can use automatic speech recognition to transcribe
 your questions and use text-to-speech to speak the answers aloud.</p></li>
 </ul>
-<p><img alt="Using NVIDIA AI Foundation Models endpoints for inference instead of local components." src="_images/ai-foundations-topology.png" /></p>
+<p><img alt="Using NVIDIA API Catalog endpoints for inference instead of local components." src="_images/ai-foundations-topology.png" /></p>
 </section>
 <section id="prerequisites">
 <h2>Prerequisites<a class="headerlink" href="#prerequisites" title="Permalink to this headline"></a></h2>
@@ -234,32 +234,38 @@ <h2>Get an API Key for the Mixtral 8x7B Instruct API Endpoint<a class="headerlin
 <p>Perform the following steps if you do not already have an API key.
 You can use different model API endpoints with the same API key.</p>
 <ol class="arabic">
-<li><p>Navigate to <a class="reference external" href="https://catalog.ngc.nvidia.com/ai-foundation-models">https://catalog.ngc.nvidia.com/ai-foundation-models</a>.</p></li>
-<li><p>Find the <strong>Mixtral 8x7B Instruct</strong> card and click <strong>Learn More</strong>.</p>
+<li><p>Navigate to <a class="reference external" href="https://build.nvidia.com/explore/discover">https://build.nvidia.com/explore/discover</a>.</p></li>
+<li><p>Find the <strong>Mixtral 8x7B Instruct</strong> card and click the card.</p>
 <p><img alt="Mixtral 8x7B Instruct model card" src="_images/mixtral-8x7b-instruct.png" /></p>
 </li>
-<li><p>Click the <strong>API</strong> button and then click <strong>Generate Key</strong>.</p>
-<p><img alt="API section of the playground tab." src="_images/image8.png" /></p>
+<li><p>Click <strong>Get API Key</strong>.</p>
+<p><img alt="API section of the model page." src="_images/image8.png" /></p>
+</li>
+<li><p>Click <strong>Generate Key</strong>.</p>
+<p><img alt="Generate key window." src="_images/api-catalog-generate-api-key.png" /></p>
+</li>
+<li><p>Click <strong>Copy Key</strong> and then save the API key.
+The key begins with the letters nvapi-.</p>
+<p><img alt="Key Generated widnow." src="_images/key-generated.png" /></p>
 </li>
-<li><p>Save the generated API key.</p></li>
 </ol>
 </section>
 <section id="build-and-start-the-containers">
 <h2>Build and Start the Containers<a class="headerlink" href="#build-and-start-the-containers" title="Permalink to this headline"></a></h2>
 <ol class="arabic">
 <li><p>In the Generative AI examples repository, export this variable in terminal.</p>
-<p>Add the API for the model endpoint:</p>
+<p>Add the API key for the model endpoint:</p>
 <div class="highlight-text notranslate"><div class="highlight"><pre><span></span>export NVIDIA_API_KEY=&quot;nvapi-&lt;...&gt;&quot;
 </pre></div>
 </div>
 </li>
 <li><p>From the root of the repository, build the containers:</p>
-<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>docker<span class="w"> </span>compose<span class="w"> </span>--env-file<span class="w"> </span>deploy/compose/compose.env<span class="w"> </span>-f<span class="w"> </span>deploy/compose/rag-app-ai-foundation-text-chatbot.yaml<span class="w"> </span>build
+<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>docker<span class="w"> </span>compose<span class="w"> </span>--env-file<span class="w"> </span>deploy/compose/compose.env<span class="w"> </span>-f<span class="w"> </span>deploy/compose/rag-app-api-catalog-text-chatbot.yaml<span class="w"> </span>build
 </pre></div>
 </div>
 </li>
 <li><p>Start the containers:</p>
-<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>docker<span class="w"> </span>compose<span class="w"> </span>--env-file<span class="w"> </span>deploy/compose/compose.env<span class="w"> </span>-f<span class="w"> </span>deploy/compose/rag-app-ai-foundation-text-chatbot.yaml<span class="w"> </span>up<span class="w"> </span>-d
+<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>docker<span class="w"> </span>compose<span class="w"> </span>--env-file<span class="w"> </span>deploy/compose/compose.env<span class="w"> </span>-f<span class="w"> </span>deploy/compose/rag-app-api-catalog-text-chatbot.yaml<span class="w"> </span>up<span class="w"> </span>-d
 </pre></div>
 </div>
 <p><em>Example Output</em></p>
@@ -302,7 +308,7 @@ <h2>Next Steps<a class="headerlink" href="#next-steps" title="Permalink to this
 <li><p>Access the web interface for the chat server.
 Refer to <a class="reference internal" href="using-sample-web-application.html"><span class="doc std std-doc">Using the Sample Chat Web Application</span></a> for information about using the web interface.</p></li>
 <li><p><a class="reference internal" href="vector-database.html"><span class="doc std std-doc">Configuring an Alternative Vector Database</span></a></p></li>
-<li><p>Stop the containers by running <code class="docutils literal notranslate"><span class="pre">docker</span> <span class="pre">compose</span> <span class="pre">-f</span> <span class="pre">deploy/compose/rag-app-ai-foundation-text-chatbot.yaml</span> <span class="pre">down</span></code> and
+<li><p>Stop the containers by running <code class="docutils literal notranslate"><span class="pre">docker</span> <span class="pre">compose</span> <span class="pre">-f</span> <span class="pre">deploy/compose/rag-app-api-catalog-text-chatbot.yaml</span> <span class="pre">down</span></code> and
 <code class="docutils literal notranslate"><span class="pre">docker</span> <span class="pre">compose</span> <span class="pre">-f</span> <span class="pre">deploy/compose/docker-compose-vectordb.yaml</span> <span class="pre">down</span></code>.</p></li>
 </ul>
 </section>

diff --git a/0.5.0/architecture.html b/0.5.0/architecture.html
@@ -26,7 +26,7 @@
     <script src="_static/js/theme.js"></script>
     <link rel="search" title="Search" href="search.html" />
     <link rel="next" title="NeMo Framework Inference Server" href="llm-inference-server.html" />
-    <link rel="prev" title="Multimodal Models from NVIDIA AI Endpoints with LangChain Agent" href="notebooks/09_Agent_use_tools_leveraging_NVIDIA_AI_endpoints.html" />
+    <link rel="prev" title="Build a RAG chain by generating embeddings for NVIDIA Triton documentation" href="notebooks/10_RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.html" />
 
 
 
@@ -54,8 +54,7 @@
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="index.html">About the RAG Pipelines</a></li>
 <li class="toctree-l1"><a class="reference internal" href="support-matrix.html">Support Matrix</a></li>
-<li class="toctree-l1"><a class="reference internal" href="release-notes.html">Release Notes</a></li>
-<li class="toctree-l1"><a class="reference internal" href="ai-foundation-models.html">AI Foundation Models</a></li>
+<li class="toctree-l1"><a class="reference internal" href="api-catalog.html">API Catalog Models</a></li>
 <li class="toctree-l1"><a class="reference internal" href="local-gpu.html">Local GPUs</a></li>
 <li class="toctree-l1"><a class="reference internal" href="multi-gpu.html">Multi-GPU for Inference</a></li>
 <li class="toctree-l1"><a class="reference internal" href="query-decomposition.html">Query Decomposition</a></li>
@@ -84,6 +83,7 @@
 <li class="toctree-l1"><a class="reference internal" href="notebooks/08_Option%281%29_llama_index_with_NVIDIA_AI_endpoint.html">NVIDIA AI Endpoints, LlamaIndex, and LangChain</a></li>
 <li class="toctree-l1"><a class="reference internal" href="notebooks/08_Option%282%29_llama_index_with_HF_local_LLM.html">HF Checkpoints with LlamaIndex and LangChain</a></li>
 <li class="toctree-l1"><a class="reference internal" href="notebooks/09_Agent_use_tools_leveraging_NVIDIA_AI_endpoints.html">Multimodal Models from NVIDIA AI Endpoints with LangChain Agent</a></li>
+<li class="toctree-l1"><a class="reference internal" href="notebooks/10_RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.html">Build a RAG chain by generating embeddings for NVIDIA Triton documentation</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Software Components</span></p>
 <ul class="current">
@@ -254,7 +254,7 @@ <h2>Vector DB<a class="headerlink" href="#vector-db" title="Permalink to this he
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="notebooks/09_Agent_use_tools_leveraging_NVIDIA_AI_endpoints.html" class="btn btn-neutral float-left" title="Multimodal Models from NVIDIA AI Endpoints with LangChain Agent" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="notebooks/10_RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.html" class="btn btn-neutral float-left" title="Build a RAG chain by generating embeddings for NVIDIA Triton documentation" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="llm-inference-server.html" class="btn btn-neutral float-right" title="NeMo Framework Inference Server" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>