Skip to content

Commit

Permalink
Merge branch 'main' into xren/nsys_profiling
Browse files Browse the repository at this point in the history
  • Loading branch information
xrennvidia committed Sep 11, 2024
2 parents 5d330be + fd8c6a4 commit 87104c1
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 63 deletions.
78 changes: 45 additions & 33 deletions .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,25 +123,27 @@ jobs:
### \'\'
# L0: GPU unit tests
L0_Unit_Tests_GPU_ASR:
OPTIONAL_L0_Unit_Tests_GPU_ASR:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_ASR') || needs.cicd-test-container-setup.outputs.all == 'true'
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_GPU_ASR') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
TIMEOUT: 20
SCRIPT: |
NEMO_NUMBA_MINVER=0.53 pytest tests/collections/asr -m "not pleasefixme" --with_downloads
IS_OPTIONAL: true

L0_Unit_Tests_GPU_Audio:
OPTIONAL_L0_Unit_Tests_GPU_Audio:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_Audio') || needs.cicd-test-container-setup.outputs.all == 'true'
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_GPU_Audio') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
TIMEOUT: 20
SCRIPT: |
NEMO_NUMBA_MINVER=0.53 pytest tests/collections/audio -m "not pleasefixme" --with_downloads
IS_OPTIONAL: true

L0_Unit_Tests_GPU_Common:
needs: [cicd-test-container-setup]
Expand Down Expand Up @@ -170,23 +172,25 @@ jobs:
SCRIPT: |
NEMO_NUMBA_MINVER=0.53 pytest tests/collections/multimodal -m "not pleasefixme" --with_downloads
L0_Unit_Tests_GPU_NLP:
OPTIONAL_L0_Unit_Tests_GPU_NLP:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_NLP') || needs.cicd-test-container-setup.outputs.all == 'true'
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_GPU_NLP') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: |
NEMO_NUMBA_MINVER=0.53 pytest tests/collections/nlp -m "not pleasefixme" --with_downloads
IS_OPTIONAL: true

L0_Unit_Tests_GPU_TTS:
OPTIONAL_L0_Unit_Tests_GPU_TTS:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_TTS') || needs.cicd-test-container-setup.outputs.all == 'true'
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_GPU_TTS') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: |
NEMO_NUMBA_MINVER=0.53 pytest tests/collections/tts -m "not pleasefixme" --with_downloads
IS_OPTIONAL: true

OPTIONAL_L0_Unit_Tests_GPU_Core:
needs: [cicd-test-container-setup]
Expand Down Expand Up @@ -240,24 +244,25 @@ jobs:
--ignore=tests/utils
# L0: CPU unit tests
L0_Unit_Tests_CPU_ASR:
OPTIONAL_L0_Unit_Tests_CPU_ASR:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_ASR') || needs.cicd-test-container-setup.outputs.all == 'true'
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_CPU_ASR') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure-cpu
TIMEOUT: 20
SCRIPT: |
CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/asr -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat
L0_Unit_Tests_CPU_Audio:
OPTIONAL_L0_Unit_Tests_CPU_Audio:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_Audio') || needs.cicd-test-container-setup.outputs.all == 'true'
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_CPU_Audio') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure-cpu
SCRIPT: |
CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/audio -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat
IS_OPTIONAL: true

L0_Unit_Tests_CPU_Common:
needs: [cicd-test-container-setup]
Expand Down Expand Up @@ -286,32 +291,37 @@ jobs:
SCRIPT: |
CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/multimodal -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat
L0_Unit_Tests_CPU_NLP:
OPTIONAL_L0_Unit_Tests_CPU_NLP:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_NLP') || needs.cicd-test-container-setup.outputs.all == 'true'
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_CPU_NLP') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure-cpu
TIMEOUT: 20
SCRIPT: |
CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/nlp -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat
IS_OPTIONAL: true

L0_Unit_Tests_CPU_TTS:
OPTIONAL_L0_Unit_Tests_CPU_TTS:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_TTS') || needs.cicd-test-container-setup.outputs.all == 'true'
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_CPU_TTS') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure-cpu
SCRIPT: |
CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/tts -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat
IS_OPTIONAL: true

L0_Unit_Tests_CPU_Core:
OPTIONAL_L0_Unit_Tests_CPU_Core:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_Core') || needs.cicd-test-container-setup.outputs.all == 'true'
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_CPU_Core') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure-cpu
TIMEOUT: 20
SCRIPT: |
CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/core tests/core_ptl -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat
IS_OPTIONAL: true

L0_Unit_Tests_CPU_Hydra:
needs: [cicd-test-container-setup]
Expand Down Expand Up @@ -5073,10 +5083,10 @@ jobs:
rm -rf examples/llm/gpt_pretrain_results
rm -rf examples/llm/gpt_index_mappings
L2_NeMo_2_SSM_Pretraining:
OPTIONAL_L2_NeMo_2_SSM_Pretraining:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_SSM_Pretraining') || needs.cicd-test-container-setup.outputs.all == 'true'
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L2_NeMo_2_SSM_Pretraining') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: |
Expand All @@ -5089,11 +5099,12 @@ jobs:
AFTER_SCRIPT: |
rm -rf /home/TestData/nlp/megatron_mamba/nemo-ux-mamba/cicd_test_pretrain
IS_OPTIONAL: true

L2_NeMo_2_SSM_Finetuning:
OPTIONAL_L2_NeMo_2_SSM_Finetuning:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_SSM_Finetuning') || needs.cicd-test-container-setup.outputs.all == 'true'
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L2_NeMo_2_SSM_Finetuning') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: |
Expand All @@ -5106,33 +5117,34 @@ jobs:
AFTER_SCRIPT: |
rm -rf /home/TestData/nlp/megatron_mamba/nemo-ux-mamba/cicd_test_sft
IS_OPTIONAL: true

Nemo_CICD_Test:
needs:
- pre-flight
- gpu-test
- cicd-test-container-setup

- L0_Unit_Tests_GPU_ASR
- L0_Unit_Tests_GPU_Audio
#- OPTIONAL_L0_Unit_Tests_GPU_ASR
#- OPTIONAL_L0_Unit_Tests_GPU_Audio
- L0_Unit_Tests_GPU_Common
- L0_Unit_Tests_GPU_LLM
- L0_Unit_Tests_GPU_Multimodal
- L0_Unit_Tests_GPU_NLP
- L0_Unit_Tests_GPU_TTS
#- OPTIONAL_L0_Unit_Tests_GPU_NLP
#- OPTIONAL_L0_Unit_Tests_GPU_TTS
#- OPTIONAL_L0_Unit_Tests_GPU_Core
- L0_Unit_Tests_GPU_Hydra
#- OPTIONAL_L0_Unit_Tests_GPU_Lightning
- L0_Unit_Tests_GPU_Others

- L0_Unit_Tests_CPU_ASR
- L0_Unit_Tests_CPU_Audio
#- OPTIONAL_L0_Unit_Tests_CPU_ASR
#- OPTIONAL_L0_Unit_Tests_CPU_Audio
- L0_Unit_Tests_CPU_Common
- L0_Unit_Tests_CPU_LLM
- L0_Unit_Tests_CPU_Multimodal
- L0_Unit_Tests_CPU_NLP
- L0_Unit_Tests_CPU_TTS
- L0_Unit_Tests_CPU_Core
#- OPTIONAL_L0_Unit_Tests_CPU_NLP
#- OPTIONAL_L0_Unit_Tests_CPU_TTS
#- OPTIONAL_L0_Unit_Tests_CPU_Core
- L0_Unit_Tests_CPU_Hydra
- L0_Unit_Tests_CPU_Lightning
- L0_Unit_Tests_CPU_Others
Expand Down Expand Up @@ -5238,8 +5250,8 @@ jobs:
#- OPTIONAL_L2_Stable_Diffusion_Training
- L2_NeMo_2_GPT_Pretraining_no_transformer_engine
- L2_NeMo_2_GPT_DDP_Param_Parity_check
- L2_NeMo_2_SSM_Pretraining
- L2_NeMo_2_SSM_Finetuning
#- OPTIONAL_L2_NeMo_2_SSM_Pretraining
#- OPTIONAL_L2_NeMo_2_SSM_Finetuning
if: always()
runs-on: ubuntu-latest
steps:
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/secrets-detector.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ jobs:
uses: actions/checkout@v4
with:
path: ${{ github.run_id }}
ref: ${{ inputs.branch-name || github.head_ref }}
fetch-depth: 0

- name: Install secrets detector
Expand Down
6 changes: 6 additions & 0 deletions nemo/collections/asr/models/hybrid_rnnt_ctc_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,4 +667,10 @@ def list_available_models(cls) -> Optional[PretrainedModelInfo]:
List of available pre-trained models.
"""
results = []
model = PretrainedModelInfo(
pretrained_model_name="parakeet-tdt_ctc-110m",
description="For details on this model, please refer to https://ngc.nvidia.com/catalog/models/nvidia:nemo:parakeet-tdt_ctc-110m",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/parakeet-tdt_ctc-110m/versions/v1/files/parakeet-tdt_ctc-110m.nemo",
)
results.append(model)
return results
4 changes: 2 additions & 2 deletions nemo/utils/exp_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -951,8 +951,8 @@ def get_git_hash():
True,
subprocess.check_output(['git', 'rev-parse', 'HEAD'], stderr=subprocess.STDOUT).decode(),
)
except subprocess.CalledProcessError as err:
return False, "{}\n".format(err.output.decode("utf-8"))
except (subprocess.CalledProcessError, FileNotFoundError) as err:
return False, "{}\n".format(err)


def get_git_diff():
Expand Down
58 changes: 58 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,64 @@
# See the License for the specific language governing permissions and
# limitations under the License.

[build-system]
requires = ["setuptools >= 61.0"]
build-backend = "setuptools.build_meta"

[project]
name = "nemo-toolkit"
dynamic = ["dependencies", "optional-dependencies", "version"]
description = "NeMo - a toolkit for Conversational AI"
readme = "README.md"
license = {file = "LICENSE"}
requires-python = ">=3.10"
authors = [{ name = "NVIDIA", email = "[email protected]" }]
maintainers = [{ name = "NVIDIA", email = "[email protected]" }]
keywords = [
"NLP",
"NeMo",
"deep",
"gpu",
"language",
"learning",
"learning",
"machine",
"nvidia",
"pytorch",
"speech",
"torch",
"tts",
]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Environment :: Console",
"Intended Audience :: Developers",
"Intended Audience :: Information Technology",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Image Recognition",
"Topic :: Scientific/Engineering :: Mathematics",
"Topic :: Scientific/Engineering",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Software Development :: Libraries",
"Topic :: Utilities",
]

[tool.setuptools.dynamic]
dependencies = { file = ["requirements/requirements.txt"] }

[project.entry-points."run.factories"]
llm = "nemo.collections.llm"

[project.urls]
Download = "https://github.com/NVIDIA/NeMo/releases"
Homepage = "https://github.com/nvidia/nemo"

[tool.isort]
profile = "black" # black-compatible
line_length = 119 # should match black parameters
Expand Down
1 change: 1 addition & 0 deletions requirements/requirements_nlp.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ markdown2
matplotlib>=3.3.2
#megatron_core>0.6.0 # add back once mcore on pypi is compatible again
nltk>=3.6.5
numpy<2 # tensorstore has an implicit compiled dependency on numpy<2
opencc<1.1.7
pangu
rapidfuzz
Expand Down
46 changes: 19 additions & 27 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,54 +80,46 @@ def req_file(filename, folder="requirements"):
}


extras_require['all'] = list(chain(extras_require.values()))
extras_require['all'] = list(chain(*extras_require.values()))

# Add lightning requirements as needed
extras_require['common'] = list(chain([extras_require['common'], extras_require['core']]))
extras_require['common'] = list(chain(extras_require['common'], extras_require['core']))
extras_require['test'] = list(
chain(
[
extras_require['tts'],
extras_require['core'],
extras_require['common'],
]
extras_require['tts'],
extras_require['core'],
extras_require['common'],
)
)
extras_require['asr'] = list(chain([extras_require['asr'], extras_require['core'], extras_require['common']]))
extras_require['asr'] = list(chain(extras_require['asr'], extras_require['core'], extras_require['common']))
extras_require['nlp'] = list(
chain(
[
extras_require['nlp'],
extras_require['core'],
extras_require['common'],
]
extras_require['nlp'],
extras_require['core'],
extras_require['common'],
)
)
extras_require['tts'] = list(
chain(
[
extras_require['tts'],
extras_require['core'],
extras_require['common'],
]
extras_require['tts'],
extras_require['core'],
extras_require['common'],
)
)
extras_require['multimodal'] = list(
chain(
[
extras_require['multimodal'],
extras_require['nlp'],
extras_require['core'],
extras_require['common'],
]
extras_require['multimodal'],
extras_require['nlp'],
extras_require['core'],
extras_require['common'],
)
)
extras_require['audio'] = list(chain([extras_require['audio'], extras_require['core'], extras_require['common']]))
extras_require['audio'] = list(chain(extras_require['audio'], extras_require['core'], extras_require['common']))

# TTS has extra dependencies
extras_require['tts'] = list(chain([extras_require['tts'], extras_require['asr']]))
extras_require['tts'] = list(chain(extras_require['tts'], extras_require['asr']))

extras_require['slu'] = list(chain([extras_require['slu'], extras_require['asr']]))
extras_require['slu'] = list(chain(extras_require['slu'], extras_require['asr']))


###############################################################################
Expand Down

0 comments on commit 87104c1

Please sign in to comment.