Merge branch 'main' into xren/cp_debug

xrennvidia · Aug 12, 2024 · 87348ef · 87348ef
2 parents c7dff10 + d6cfdc0
commit 87348ef
Show file tree

Hide file tree

Showing 175 changed files with 7,321 additions and 2,543 deletions.
diff --git a/.github/workflows/_test_template.yml b/.github/workflows/_test_template.yml
@@ -34,30 +34,18 @@ on:
         description: Last 2000 characters of the test step's log
         value: ${{ jobs.main.outputs.log }} 
 jobs:
-  runner-auto-clean:
-    runs-on: ${{ inputs.RUNNER }}
-    steps:
-        - name: Docker system cleanup
-          run: |
-            docker system prune -a --filter "until=48h" --force
-
   main:
     runs-on: ${{ inputs.RUNNER }} 
     outputs:
       conclusion: ${{ steps.main.conclusion }}
       log: ${{ steps.main.outputs.log }}
-    container:
-      image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
-      options: 
-        --device=/dev/nvidia0
-        --gpus all
-        --shm-size=8g
-        --env TRANSFORMERS_OFFLINE=0 
-        --env HYDRA_FULL_ERROR=1
-        --volume /mnt/datadrive/TestData:/home/TestData
+    permissions:
+      actions: write # Required for cancelling workflows 
     steps:
-        - name: Checkout repository
-          uses: actions/checkout@v4
+        - name: Docker system cleanup
+          run: |
+            docker system prune -a --filter "until=48h" --force
+
         - id: main
           name: Run main script
           timeout-minutes: ${{ inputs.TIMEOUT }}
@@ -66,7 +54,7 @@ jobs:
             (  
               set -e
 
-              ${{ inputs.SCRIPT }}
+              docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c '${{ inputs.SCRIPT }}'
             ) 2> >(tee err.log)
 
             EXIT_CODE=$?
@@ -77,6 +65,9 @@ jobs:
             
         - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
           if: failure() && inputs.IS_OPTIONAL == false
+
         - name: after_script
           if: always() && inputs.AFTER_SCRIPT != ':'
-          run: ${{ inputs.AFTER_SCRIPT }}
+          run: |
+            docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c '${{ inputs.AFTER_SCRIPT }}'
+            
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
diff --git a/.github/workflows/import-test.yml b/.github/workflows/import-test.yml
@@ -12,7 +12,7 @@ jobs:
   test-asr-imports:
     runs-on: ubuntu-latest
     container:
-      image: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
+      image: pytorch/pytorch:2.4.0-cuda11.8-cudnn9-runtime
     steps:
     - name: Checkout repo
       uses: actions/checkout@v2
@@ -43,7 +43,7 @@ jobs:
   test-tts-imports:
     runs-on: ubuntu-latest
     container:
-      image: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
+      image: pytorch/pytorch:2.4.0-cuda11.8-cudnn9-runtime
     steps:
     - name: Checkout repo
       uses: actions/checkout@v2
@@ -70,4 +70,4 @@ jobs:
         # Run import checks
         python tests/core_ptl/check_imports.py --domain "tts"
         # Uninstall NeMo
-        pip uninstall -y nemo_toolkit
+        pip uninstall -y nemo_toolkit
diff --git a/Dockerfile.ci b/Dockerfile.ci
@@ -33,8 +33,8 @@ WORKDIR /workspace
 
 # Install NeMo requirements
 ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea
-ARG MODELOPT_VERSION=0.13.0
-ARG MCORE_TAG=2bbe55be32e2d478c4b2ce575af1cccb8fc3d9b9
+ARG MODELOPT_VERSION=0.15.0
+ARG MCORE_TAG=2fd6e2b74efca73a1f2d27b89bb5419384b4d3bf
 ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c
 RUN \
 --mount=type=bind,source=requirements,target=requirements \

diff --git a/README.md b/README.md
@@ -10,10 +10,38 @@
 # **NVIDIA NeMo Framework**
 
 ## Latest News
+
 <!-- markdownlint-disable -->
 <details open>
-  <summary><b>Large Language Models and Multimodal</b></summary>
+  <summary><b>Large Language Models and Multimodal Models</b></summary>
+      <details>
+      <summary>
+        <a href="https://docs.nvidia.com/nemo-framework/user-guide/latest/llms/llama/index.html#new-llama-3-1-support for more information/">
+        New Llama 3.1 Support
+        </a> (2024-07-23)
+      </summary>
+        The NeMo Framework now supports training and customizing the Llama 3.1 collection of LLMs from Meta.
+      <br><br>
+    </details>
     <details>
+      <summary>
+        <a href="https://aws.amazon.com/blogs/machine-learning/accelerate-your-generative-ai-distributed-training-workloads-with-the-nvidia-nemo-framework-on-amazon-eks/">
+          Accelerate your Generative AI Distributed Training Workloads with the NVIDIA NeMo Framework on Amazon EKS
+        </a> (2024-07-16)
+      </summary>
+     NVIDIA NeMo Framework now runs distributed training workloads on an Amazon Elastic Kubernetes Service (Amazon EKS) cluster. For step-by-step instructions on creating an EKS cluster and running distributed training workloads with NeMo, see the GitHub repository <a href="https://github.com/aws-samples/awsome-distributed-training/tree/main/3.test_cases/2.nemo-launcher/EKS/"> here.</a>
+      <br><br>
+    </details>
+    <details>
+      <summary>
+        <a href="https://developer.nvidia.com/blog/nvidia-nemo-accelerates-llm-innovation-with-hybrid-state-space-model-support/">
+          NVIDIA NeMo Accelerates LLM Innovation with Hybrid State Space Model Support
+        </a> (2024/06/17)
+      </summary>
+     NVIDIA NeMo and Megatron Core now support pre-training and fine-tuning of state space models (SSMs). NeMo also supports training models based on the Griffin architecture as described by Google DeepMind. 
+      <br><br>
+    </details>
+      <details>
       <summary>
         <a href="https://huggingface.co/models?sort=trending&search=nvidia%2Fnemotron-4-340B">
           NVIDIA releases 340B base, instruct, and reward models pretrained on a total of 9T tokens.
@@ -46,45 +74,6 @@
         The walkthrough includes detailed instructions on how to set up a Google Cloud Project and pre-train a GPT model using the NeMo Framework.
         <br><br>
       </details>
-    <details>
-      <summary>
-        <a href="https://blogs.nvidia.com/blog/bria-builds-responsible-generative-ai-using-nemo-picasso/">
-          Bria Builds Responsible Generative AI for Enterprises Using NVIDIA NeMo, Picasso
-        </a> (2024/03/06)
-      </summary>
-      Bria, a Tel Aviv startup at the forefront of visual generative AI for enterprises now leverages the NVIDIA NeMo Framework. 
-      The Bria.ai platform uses reference implementations from the NeMo Multimodal collection, trained on NVIDIA Tensor Core GPUs, to enable high-throughput and low-latency image generation. 
-      Bria has also adopted NVIDIA Picasso, a foundry for visual generative AI models, to run inference.
-      <br><br>
-    </details>
-    <details>
-      <summary>
-        <a href="https://developer.nvidia.com/blog/new-nvidia-nemo-framework-features-and-nvidia-h200-supercharge-llm-training-performance-and-versatility/">
-          New NVIDIA NeMo Framework Features and NVIDIA H200
-        </a> (2023/12/06)
-      </summary>
-      NVIDIA NeMo Framework now includes several optimizations and enhancements, 
-      including: 
-      1) Fully Sharded Data Parallelism (FSDP) to improve the efficiency of training large-scale AI models, 
-      2) Mix of Experts (MoE)-based LLM architectures with expert parallelism for efficient LLM training at scale, 
-      3) Reinforcement Learning from Human Feedback (RLHF) with TensorRT-LLM for inference stage acceleration, and 
-      4) up to 4.2x speedups for Llama 2 pre-training on NVIDIA H200 Tensor Core GPUs.
-      <br><br>
-      <a href="https://developer.nvidia.com/blog/new-nvidia-nemo-framework-features-and-nvidia-h200-supercharge-llm-training-performance-and-versatility">
-      <img src="https://github.com/sbhavani/TransformerEngine/blob/main/docs/examples/H200-NeMo-performance.png" alt="H200-NeMo-performance" style="width: 600px;"></a>
-      <br><br>
-    </details>
-    <details>
-      <summary>
-        <a href="https://blogs.nvidia.com/blog/nemo-amazon-titan/">
-          NVIDIA now powers training for Amazon Titan Foundation models
-        </a> (2023/11/28)
-      </summary>
-      NVIDIA NeMo Framework now empowers the Amazon Titan foundation models (FM) with efficient training of large language models (LLMs). 
-      The Titan FMs form the basis of Amazon’s generative AI service, Amazon Bedrock. 
-      The NeMo Framework provides a versatile framework for building, customizing, and running LLMs.
-      <br><br>
-    </details>
 </details>
 
 <details open>
@@ -604,6 +593,53 @@ to the `gh-pages-src` branch of this repository. For detailed
 information, please consult the README located at the [gh-pages-src
 branch](https://github.com/NVIDIA/NeMo/tree/gh-pages-src#readme).
 
+## Blogs
+
+<!-- markdownlint-disable -->
+<details open>
+  <summary><b>Large Language Models and Multimodal Models</b></summary>
+    <details>
+      <summary>
+        <a href="https://blogs.nvidia.com/blog/bria-builds-responsible-generative-ai-using-nemo-picasso/">
+          Bria Builds Responsible Generative AI for Enterprises Using NVIDIA NeMo, Picasso
+        </a> (2024/03/06)
+      </summary>
+      Bria, a Tel Aviv startup at the forefront of visual generative AI for enterprises now leverages the NVIDIA NeMo Framework. 
+      The Bria.ai platform uses reference implementations from the NeMo Multimodal collection, trained on NVIDIA Tensor Core GPUs, to enable high-throughput and low-latency image generation. 
+      Bria has also adopted NVIDIA Picasso, a foundry for visual generative AI models, to run inference.
+      <br><br>
+    </details>
+    <details>
+      <summary>
+        <a href="https://developer.nvidia.com/blog/new-nvidia-nemo-framework-features-and-nvidia-h200-supercharge-llm-training-performance-and-versatility/">
+          New NVIDIA NeMo Framework Features and NVIDIA H200
+        </a> (2023/12/06)
+      </summary>
+      NVIDIA NeMo Framework now includes several optimizations and enhancements, 
+      including: 
+      1) Fully Sharded Data Parallelism (FSDP) to improve the efficiency of training large-scale AI models, 
+      2) Mix of Experts (MoE)-based LLM architectures with expert parallelism for efficient LLM training at scale, 
+      3) Reinforcement Learning from Human Feedback (RLHF) with TensorRT-LLM for inference stage acceleration, and 
+      4) up to 4.2x speedups for Llama 2 pre-training on NVIDIA H200 Tensor Core GPUs.
+      <br><br>
+      <a href="https://developer.nvidia.com/blog/new-nvidia-nemo-framework-features-and-nvidia-h200-supercharge-llm-training-performance-and-versatility">
+      <img src="https://github.com/sbhavani/TransformerEngine/blob/main/docs/examples/H200-NeMo-performance.png" alt="H200-NeMo-performance" style="width: 600px;"></a>
+      <br><br>
+    </details>
+    <details>
+      <summary>
+        <a href="https://blogs.nvidia.com/blog/nemo-amazon-titan/">
+          NVIDIA now powers training for Amazon Titan Foundation models
+        </a> (2023/11/28)
+      </summary>
+      NVIDIA NeMo Framework now empowers the Amazon Titan foundation models (FM) with efficient training of large language models (LLMs). 
+      The Titan FMs form the basis of Amazon’s generative AI service, Amazon Bedrock. 
+      The NeMo Framework provides a versatile framework for building, customizing, and running LLMs.
+      <br><br>
+    </details>
+</details>
+<!-- markdownlint-enable -->
+
 ## Licenses
 
 - [NeMo GitHub Apache 2.0

diff --git a/docs/source/asr/speech_intent_slot/api.rst b/docs/source/asr/speech_intent_slot/api.rst
@@ -15,10 +15,10 @@ Mixins
 .. autoclass:: nemo.collections.asr.parts.mixins.ASRModuleMixin
     :show-inheritance:
     :members:
-    :no-index:
+    :noindex:
 
 .. autoclass:: nemo.collections.asr.parts.mixins.ASRBPEMixin
     :show-inheritance:
     :members:
-    :no-index:
+    :noindex:
 
diff --git a/docs/source/asr/ssl/api.rst b/docs/source/asr/ssl/api.rst
@@ -15,12 +15,12 @@ Mixins
 .. autoclass:: nemo.collections.asr.parts.mixins.mixins.ASRModuleMixin
     :show-inheritance:
     :members:
-    :no-index:
+    :noindex:
 
 .. autoclass:: nemo.core.classes.mixins.access_mixins.AccessMixin
     :show-inheritance:
     :members:
-    :no-index:
+    :noindex:
 
 
 
diff --git a/docs/source/core/adapters/api.rst b/docs/source/core/adapters/api.rst
@@ -9,7 +9,7 @@ Core
     :members:
     :member-order: bysource
     :undoc-members: adapter_module_names
-    :no-index:
+    :noindex:
 
 -----
 
@@ -18,7 +18,7 @@ Core
     :members:
     :member-order: bysource
     :undoc-members: adapter_module_names
-    :no-index:
+    :noindex:
 
 -----
 
@@ -30,15 +30,15 @@ Adapter Networks
     :show-inheritance:
     :members:
     :member-order: bysource
-    :no-index:
+    :noindex:
 
 -----
 
 .. autoclass:: nemo.collections.common.parts.adapter_modules.LinearAdapter
     :show-inheritance:
     :members:
     :member-order: bysource
-    :no-index:
+    :noindex:
 
 -----
 
@@ -51,7 +51,7 @@ Adapter Strategies
     :members:
     :member-order: bysource
     :undoc-members: adapter_module_names
-    :no-index:
+    :noindex:
 
 -----
 
@@ -60,7 +60,7 @@ Adapter Strategies
     :members:
     :member-order: bysource
     :undoc-members: adapter_module_names
-    :no-index:
+    :noindex:
 
 -----
 
@@ -69,4 +69,4 @@ Adapter Strategies
     :members:
     :member-order: bysource
     :undoc-members: adapter_module_names
-    :no-index:
+    :noindex:
diff --git a/docs/source/core/adapters/components.rst b/docs/source/core/adapters/components.rst
@@ -28,15 +28,15 @@ Adapter modules represent the functional form of the adapter. We discuss an exam
     :show-inheritance:
     :members:
     :member-order: bysource
-    :no-index:
+    :noindex:
 
 -----
 
 .. autoclass:: nemo.collections.common.parts.adapter_modules.LinearAdapter
     :show-inheritance:
     :members:
     :member-order: bysource
-    :no-index:
+    :noindex:
 
 
 Insertion Form - Module Adapters
@@ -72,7 +72,7 @@ We discuss a simple residual additional connection strategy below - that accepts
     :members:
     :member-order: bysource
     :undoc-members: adapter_module_names
-    :no-index:
+    :noindex:
 
 -----
 
@@ -81,7 +81,7 @@ We discuss a simple residual additional connection strategy below - that accepts
     :members:
     :member-order: bysource
     :undoc-members: adapter_module_names
-    :no-index:
+    :noindex:
 
 -----