Skip to content

Commit

Permalink
Pushing changes to GitHub Pages.
Browse files Browse the repository at this point in the history
  • Loading branch information
docs-build committed Mar 20, 2024
1 parent 6bafbeb commit 5e0daa4
Show file tree
Hide file tree
Showing 99 changed files with 1,060 additions and 622 deletions.
Binary file modified 0.5.0/_images/ai-foundations-topology.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added 0.5.0/_images/api-catalog-generate-api-key.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added 0.5.0/_images/catalog-and-vector-db.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 0.5.0/_images/evaluation-topology.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 0.5.0/_images/image8.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added 0.5.0/_images/key-generated.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 0.5.0/_images/llama-2-70b-card.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 0.5.0/_images/llama-2-generate-key.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 0.5.0/_images/local-gpus-topology.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added 0.5.0/_images/media-device-access-error.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified 0.5.0/_images/mixtral-8x7b-instruct.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
50 changes: 32 additions & 18 deletions 0.5.0/_static/openapi_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@
"properties": {
"id": {
"type": "string",
"maxLength": 100000,
"title": "Id",
"default": ""
},
Expand All @@ -202,17 +203,9 @@
"$ref": "#/components/schemas/ChainResponseChoices"
},
"type": "array",
"maxItems": 256,
"title": "Choices",
"default": [
{
"index": 0,
"message": {
"content": "",
"role": "assistant"
},
"finish_reason": ""
}
]
"default": []
}
},
"type": "object",
Expand All @@ -223,6 +216,8 @@
"properties": {
"index": {
"type": "integer",
"maximum": 256,
"minimum": 0,
"title": "Index",
"default": 0
},
Expand All @@ -232,13 +227,15 @@
"$ref": "#/components/schemas/Message"
}
],
"title": "Message",
"default": {
"role": "assistant",
"content": ""
}
},
"finish_reason": {
"type": "string",
"maxLength": 4096,
"title": "Finish Reason",
"default": ""
}
Expand All @@ -251,11 +248,13 @@
"properties": {
"content": {
"type": "string",
"maxLength": 131072,
"title": "Content",
"description": "The content of the document chunk."
},
"filename": {
"type": "string",
"maxLength": 4096,
"title": "Filename",
"description": "The name of the file the chunk belongs to."
},
Expand All @@ -278,11 +277,14 @@
"properties": {
"query": {
"type": "string",
"maxLength": 131072,
"title": "Query",
"description": "The content or keywords to search for within documents."
},
"top_k": {
"type": "integer",
"maximum": 256,
"minimum": 0,
"title": "Top K",
"description": "The maximum number of documents to return in the response.",
"default": 4
Expand All @@ -302,6 +304,7 @@
"$ref": "#/components/schemas/DocumentChunk"
},
"type": "array",
"maxItems": 256,
"title": "Chunks",
"description": "List of document chunks."
}
Expand All @@ -317,9 +320,11 @@
"properties": {
"documents": {
"items": {
"type": "string"
"type": "string",
"maxLength": 131072
},
"type": "array",
"maxItems": 1000000,
"title": "Documents",
"description": "List of filenames."
}
Expand Down Expand Up @@ -348,20 +353,20 @@
"properties": {
"role": {
"type": "string",
"maxLength": 256,
"title": "Role",
"description": "Role for a message AI, User and System"
"description": "Role for a message AI, User and System",
"default": "user"
},
"content": {
"type": "string",
"maxLength": 131072,
"title": "Content",
"description": "The input query/prompt to the pipeline."
"description": "The input query/prompt to the pipeline.",
"default": "I am going to Paris, what should I see?"
}
},
"type": "object",
"required": [
"role",
"content"
],
"title": "Message",
"description": "Definition of the Chat Message type."
},
Expand All @@ -372,6 +377,7 @@
"$ref": "#/components/schemas/Message"
},
"type": "array",
"maxItems": 50000,
"title": "Messages",
"description": "A list of messages comprising the conversation so far. The roles of the messages must be alternating between user and assistant. The last input message should have role user. A message with the the system role is optional, and must be the very first message if it is present."
},
Expand All @@ -382,27 +388,35 @@
},
"temperature": {
"type": "number",
"maximum": 1,
"minimum": 0.1,
"title": "Temperature",
"description": "The sampling temperature to use for text generation. The higher the temperature value is, the less deterministic the output text will be. It is not recommended to modify both temperature and top_p in the same call.",
"default": 0.2
},
"top_p": {
"type": "number",
"maximum": 1,
"minimum": 0.1,
"title": "Top P",
"description": "The top-p sampling mass used for text generation. The top-p value determines the probability mass that is sampled at sampling time. For example, if top_p = 0.2, only the most likely tokens (summing to 0.2 cumulative probability) will be sampled. It is not recommended to modify both temperature and top_p in the same call.",
"default": 0.7
},
"max_tokens": {
"type": "integer",
"maximum": 1024,
"minimum": 0,
"title": "Max Tokens",
"description": "The maximum number of tokens to generate in any given call. Note that the model is not aware of this value, and generation will simply stop at the number of tokens specified.",
"default": 1024
},
"stop": {
"items": {
"type": "string"
"type": "string",
"maxLength": 256
},
"type": "array",
"maxItems": 256,
"title": "Stop",
"description": "A string or a list of strings where the API will stop generating further tokens. The returned text will not contain the stop sequence."
}
Expand Down
46 changes: 26 additions & 20 deletions latest/ai-foundation-models.html → 0.5.0/api-catalog.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />

<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Using the NVIDIA AI Foundation Models &mdash; NVIDIA Generative AI Examples 0.5.0 documentation</title>
<title>Using the NVIDIA API Catalog &mdash; NVIDIA Generative AI Examples 0.5.0 documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" type="text/css" />
Expand Down Expand Up @@ -54,7 +54,7 @@
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="index.html">About the RAG Pipelines</a></li>
<li class="toctree-l1"><a class="reference internal" href="support-matrix.html">Support Matrix</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">AI Foundation Models</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">API Catalog Models</a></li>
<li class="toctree-l1"><a class="reference internal" href="local-gpu.html">Local GPUs</a></li>
<li class="toctree-l1"><a class="reference internal" href="multi-gpu.html">Multi-GPU for Inference</a></li>
<li class="toctree-l1"><a class="reference internal" href="query-decomposition.html">Query Decomposition</a></li>
Expand Down Expand Up @@ -109,7 +109,7 @@
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
<li>Using the NVIDIA AI Foundation Models</li>
<li>Using the NVIDIA API Catalog</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
Expand All @@ -134,8 +134,8 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
<section id="using-the-nvidia-ai-foundation-models">
<h1>Using the NVIDIA AI Foundation Models<a class="headerlink" href="#using-the-nvidia-ai-foundation-models" title="Permalink to this headline"></a></h1>
<section id="using-the-nvidia-api-catalog">
<h1>Using the NVIDIA API Catalog<a class="headerlink" href="#using-the-nvidia-api-catalog" title="Permalink to this headline"></a></h1>
<div class="contents local topic" id="contents">
<ul class="simple">
<li><p><a class="reference internal" href="#example-features" id="id1">Example Features</a></p></li>
Expand All @@ -147,7 +147,7 @@ <h1>Using the NVIDIA AI Foundation Models<a class="headerlink" href="#using-the-
</div>
<section id="example-features">
<h2>Example Features<a class="headerlink" href="#example-features" title="Permalink to this headline"></a></h2>
<p>This example deploys a developer RAG pipeline for chat Q&amp;A and serves inferencing from an NVIDIA AI Foundation Models endpoint
<p>This example deploys a developer RAG pipeline for chat Q&amp;A and serves inferencing from an NVIDIA API Catalog endpoint
instead of NVIDIA Triton Inference Server, a local Llama 2 model, or local GPUs.</p>
<p>Developers get free credits for 10K requests to any of the available models.</p>
<table class="docutils align-default">
Expand All @@ -169,19 +169,19 @@ <h2>Example Features<a class="headerlink" href="#example-features" title="Permal
<th class="head"><p>Description</p></th>
<th class="head"><p>Multi-GPU</p></th>
<th class="head"><p>TRT-LLM</p></th>
<th class="head"><p>NVIDIA AI Foundation</p></th>
<th class="head"><p>Model Location</p></th>
<th class="head"><p>Triton</p></th>
<th class="head"><p>Vector Database</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>mixtral_8x7b</p></td>
<tr class="row-even"><td><p>ai-mixtral-8x7b-instruct</p></td>
<td><p>nvolveqa_40k</p></td>
<td><p>Langchain</p></td>
<td><p>QA chatbot</p></td>
<td><p>NO</p></td>
<td><p>NO</p></td>
<td><p>YES</p></td>
<td><p>API Catalog</p></td>
<td><p>NO</p></td>
<td><p>Milvus</p></td>
</tr>
Expand All @@ -190,11 +190,11 @@ <h2>Example Features<a class="headerlink" href="#example-features" title="Permal
<p>The following figure shows the sample topology:</p>
<ul class="simple">
<li><p>The sample chat bot web application communicates with the chain server.
The chain server sends inference requests to an NVIDIA AI Foundation Models endpoint.</p></li>
The chain server sends inference requests to an NVIDIA API Catalog endpoint.</p></li>
<li><p>Optionally, you can deploy NVIDIA Riva. Riva can use automatic speech recognition to transcribe
your questions and use text-to-speech to speak the answers aloud.</p></li>
</ul>
<p><img alt="Using NVIDIA AI Foundation Models endpoints for inference instead of local components." src="_images/ai-foundations-topology.png" /></p>
<p><img alt="Using NVIDIA API Catalog endpoints for inference instead of local components." src="_images/ai-foundations-topology.png" /></p>
</section>
<section id="prerequisites">
<h2>Prerequisites<a class="headerlink" href="#prerequisites" title="Permalink to this headline"></a></h2>
Expand Down Expand Up @@ -234,32 +234,38 @@ <h2>Get an API Key for the Mixtral 8x7B Instruct API Endpoint<a class="headerlin
<p>Perform the following steps if you do not already have an API key.
You can use different model API endpoints with the same API key.</p>
<ol class="arabic">
<li><p>Navigate to <a class="reference external" href="https://catalog.ngc.nvidia.com/ai-foundation-models">https://catalog.ngc.nvidia.com/ai-foundation-models</a>.</p></li>
<li><p>Find the <strong>Mixtral 8x7B Instruct</strong> card and click <strong>Learn More</strong>.</p>
<li><p>Navigate to <a class="reference external" href="https://build.nvidia.com/explore/discover">https://build.nvidia.com/explore/discover</a>.</p></li>
<li><p>Find the <strong>Mixtral 8x7B Instruct</strong> card and click the card.</p>
<p><img alt="Mixtral 8x7B Instruct model card" src="_images/mixtral-8x7b-instruct.png" /></p>
</li>
<li><p>Click the <strong>API</strong> button and then click <strong>Generate Key</strong>.</p>
<p><img alt="API section of the playground tab." src="_images/image8.png" /></p>
<li><p>Click <strong>Get API Key</strong>.</p>
<p><img alt="API section of the model page." src="_images/image8.png" /></p>
</li>
<li><p>Click <strong>Generate Key</strong>.</p>
<p><img alt="Generate key window." src="_images/api-catalog-generate-api-key.png" /></p>
</li>
<li><p>Click <strong>Copy Key</strong> and then save the API key.
The key begins with the letters nvapi-.</p>
<p><img alt="Key Generated widnow." src="_images/key-generated.png" /></p>
</li>
<li><p>Save the generated API key.</p></li>
</ol>
</section>
<section id="build-and-start-the-containers">
<h2>Build and Start the Containers<a class="headerlink" href="#build-and-start-the-containers" title="Permalink to this headline"></a></h2>
<ol class="arabic">
<li><p>In the Generative AI examples repository, export this variable in terminal.</p>
<p>Add the API for the model endpoint:</p>
<p>Add the API key for the model endpoint:</p>
<div class="highlight-text notranslate"><div class="highlight"><pre><span></span>export NVIDIA_API_KEY=&quot;nvapi-&lt;...&gt;&quot;
</pre></div>
</div>
</li>
<li><p>From the root of the repository, build the containers:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>docker<span class="w"> </span>compose<span class="w"> </span>--env-file<span class="w"> </span>deploy/compose/compose.env<span class="w"> </span>-f<span class="w"> </span>deploy/compose/rag-app-ai-foundation-text-chatbot.yaml<span class="w"> </span>build
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>docker<span class="w"> </span>compose<span class="w"> </span>--env-file<span class="w"> </span>deploy/compose/compose.env<span class="w"> </span>-f<span class="w"> </span>deploy/compose/rag-app-api-catalog-text-chatbot.yaml<span class="w"> </span>build
</pre></div>
</div>
</li>
<li><p>Start the containers:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>docker<span class="w"> </span>compose<span class="w"> </span>--env-file<span class="w"> </span>deploy/compose/compose.env<span class="w"> </span>-f<span class="w"> </span>deploy/compose/rag-app-ai-foundation-text-chatbot.yaml<span class="w"> </span>up<span class="w"> </span>-d
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>docker<span class="w"> </span>compose<span class="w"> </span>--env-file<span class="w"> </span>deploy/compose/compose.env<span class="w"> </span>-f<span class="w"> </span>deploy/compose/rag-app-api-catalog-text-chatbot.yaml<span class="w"> </span>up<span class="w"> </span>-d
</pre></div>
</div>
<p><em>Example Output</em></p>
Expand Down Expand Up @@ -302,7 +308,7 @@ <h2>Next Steps<a class="headerlink" href="#next-steps" title="Permalink to this
<li><p>Access the web interface for the chat server.
Refer to <a class="reference internal" href="using-sample-web-application.html"><span class="doc std std-doc">Using the Sample Chat Web Application</span></a> for information about using the web interface.</p></li>
<li><p><a class="reference internal" href="vector-database.html"><span class="doc std std-doc">Configuring an Alternative Vector Database</span></a></p></li>
<li><p>Stop the containers by running <code class="docutils literal notranslate"><span class="pre">docker</span> <span class="pre">compose</span> <span class="pre">-f</span> <span class="pre">deploy/compose/rag-app-ai-foundation-text-chatbot.yaml</span> <span class="pre">down</span></code> and
<li><p>Stop the containers by running <code class="docutils literal notranslate"><span class="pre">docker</span> <span class="pre">compose</span> <span class="pre">-f</span> <span class="pre">deploy/compose/rag-app-api-catalog-text-chatbot.yaml</span> <span class="pre">down</span></code> and
<code class="docutils literal notranslate"><span class="pre">docker</span> <span class="pre">compose</span> <span class="pre">-f</span> <span class="pre">deploy/compose/docker-compose-vectordb.yaml</span> <span class="pre">down</span></code>.</p></li>
</ul>
</section>
Expand Down
8 changes: 4 additions & 4 deletions 0.5.0/architecture.html
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
<script src="_static/js/theme.js"></script>
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="NeMo Framework Inference Server" href="llm-inference-server.html" />
<link rel="prev" title="Multimodal Models from NVIDIA AI Endpoints with LangChain Agent" href="notebooks/09_Agent_use_tools_leveraging_NVIDIA_AI_endpoints.html" />
<link rel="prev" title="Build a RAG chain by generating embeddings for NVIDIA Triton documentation" href="notebooks/10_RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.html" />



Expand Down Expand Up @@ -54,8 +54,7 @@
<ul>
<li class="toctree-l1"><a class="reference internal" href="index.html">About the RAG Pipelines</a></li>
<li class="toctree-l1"><a class="reference internal" href="support-matrix.html">Support Matrix</a></li>
<li class="toctree-l1"><a class="reference internal" href="release-notes.html">Release Notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="ai-foundation-models.html">AI Foundation Models</a></li>
<li class="toctree-l1"><a class="reference internal" href="api-catalog.html">API Catalog Models</a></li>
<li class="toctree-l1"><a class="reference internal" href="local-gpu.html">Local GPUs</a></li>
<li class="toctree-l1"><a class="reference internal" href="multi-gpu.html">Multi-GPU for Inference</a></li>
<li class="toctree-l1"><a class="reference internal" href="query-decomposition.html">Query Decomposition</a></li>
Expand Down Expand Up @@ -84,6 +83,7 @@
<li class="toctree-l1"><a class="reference internal" href="notebooks/08_Option%281%29_llama_index_with_NVIDIA_AI_endpoint.html">NVIDIA AI Endpoints, LlamaIndex, and LangChain</a></li>
<li class="toctree-l1"><a class="reference internal" href="notebooks/08_Option%282%29_llama_index_with_HF_local_LLM.html">HF Checkpoints with LlamaIndex and LangChain</a></li>
<li class="toctree-l1"><a class="reference internal" href="notebooks/09_Agent_use_tools_leveraging_NVIDIA_AI_endpoints.html">Multimodal Models from NVIDIA AI Endpoints with LangChain Agent</a></li>
<li class="toctree-l1"><a class="reference internal" href="notebooks/10_RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.html">Build a RAG chain by generating embeddings for NVIDIA Triton documentation</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Software Components</span></p>
<ul class="current">
Expand Down Expand Up @@ -254,7 +254,7 @@ <h2>Vector DB<a class="headerlink" href="#vector-db" title="Permalink to this he
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="notebooks/09_Agent_use_tools_leveraging_NVIDIA_AI_endpoints.html" class="btn btn-neutral float-left" title="Multimodal Models from NVIDIA AI Endpoints with LangChain Agent" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="notebooks/10_RAG_for_HTML_docs_with_Langchain_NVIDIA_AI_Endpoints.html" class="btn btn-neutral float-left" title="Build a RAG chain by generating embeddings for NVIDIA Triton documentation" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="llm-inference-server.html" class="btn btn-neutral float-right" title="NeMo Framework Inference Server" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>

Expand Down
Loading

0 comments on commit 5e0daa4

Please sign in to comment.