Skip to content

Commit

Permalink
Merge branch 'main' into speechllm-develop
Browse files Browse the repository at this point in the history
Signed-off-by: Piotr Żelasko <[email protected]>
  • Loading branch information
pzelasko committed Dec 3, 2024
2 parents 09b9f4b + 9abd81b commit a672ecc
Show file tree
Hide file tree
Showing 1,268 changed files with 333,632 additions and 12,811 deletions.
39 changes: 35 additions & 4 deletions .github/workflows/_test_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,43 @@ on:
log:
description: Last 2000 characters of the test step's log
value: ${{ jobs.main.outputs.log }}
potential_infra_failure:
description: Boolean flag when infra-related keyword spotted in logs.
value: ${{ jobs.main.outputs.potential_infra_failure }}
jobs:

main:
runs-on: ${{ inputs.RUNNER }}
outputs:
conclusion: ${{ steps.main.conclusion }}
log: ${{ steps.main.outputs.log }}
potential_infra_failure: ${{ steps.main.outputs.potential_infra_failure }}
steps:
- name: Docker system cleanup
run: |
docker system prune -a --filter "until=48h" --force
docker system prune -a --filter "until=48h" --force || true
- name: Docker pull image
run: |
docker pull nemoci.azurecr.io/nemo_container_${{ github.run_id }}
docker pull nemoci.azurecr.io/nemo_container:${{ github.run_id }}
- name: Start container
run: |
ARG=("")
if [[ "${{ inputs.RUNNER }}" != *cpu* ]]; then
ARG=("--runtime=nvidia --gpus all")
fi
docker run \
--rm \
-d \
--name nemo_container_${{ github.run_id }} ${ARG[@]} \
--shm-size=64g \
--env TRANSFORMERS_OFFLINE=0 \
--env HYDRA_FULL_ERROR=1 \
--env HF_HOME=/home/TestData/HF_HOME \
--volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container:${{ github.run_id }} \
bash -c "sleep $(( ${{ inputs.TIMEOUT }} * 60 + 60 ))"
- id: main
name: Run main script
Expand All @@ -59,18 +81,27 @@ jobs:
(
set -e
docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c '${{ inputs.SCRIPT }}'
docker exec nemo_container_${{ github.run_id }} bash -c '${{ inputs.SCRIPT }}'
) 2> >(tee err.log)
EXIT_CODE=$?
echo "log=$(tail -c 2000 err.log | base64 -w 0)" >> "$GITHUB_OUTPUT"
potential_infra_failure=$(cat err.log | grep -Eqiw "device" && echo true || echo false)
echo "potential_infra_failure=$potential_infra_failure" >> "$GITHUB_OUTPUT"
exit $EXIT_CODE
- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
if: failure() && inputs.IS_OPTIONAL == false
- name: after_script
if: always() && inputs.AFTER_SCRIPT != ':'
run: |
docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c '${{ inputs.AFTER_SCRIPT }}'
docker exec nemo_container_${{ github.run_id }} bash -c '${{ inputs.AFTER_SCRIPT }}'
- name: Container shutdown
if: always()
run: |
docker container stop nemo_container_${{ github.run_id }} || true
docker container rm nemo_container_${{ github.run_id }} || true
45 changes: 45 additions & 0 deletions .github/workflows/build-test-publish-wheel.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Build, test, and publish a PyPi wheel (to testpypi)

on:
push:
branches:
- main
- 'r**'

defaults:
run:
shell: bash -x -e -u -o pipefail {0}

jobs:
build-test-publish-wheel:
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/[email protected]
with:
image-name: nemo_container
dockerfile: Dockerfile.ci
image-label: nemo-core
build-args: |
IMAGE_LABEL=nemo-core
prune-filter-timerange: 24h
dry-run: true
python-package: nemo
container-workdir: /workspace
environment: public
secrets:
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
30 changes: 8 additions & 22 deletions .github/workflows/cherry-pick-release-commit.yml
Original file line number Diff line number Diff line change
@@ -1,28 +1,14 @@
name: Create PR to main with cherry-pick from release

on:
pull_request_target:
push:
branches:
- 'r*.*.*'
types: ["closed"]
- main

jobs:
cherry-pick-release-commit:
name: Cherry-pick release commit
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: github-cherry-pick-action v1.0.3
uses: carloscastrojumo/github-cherry-pick-action@bb0869df47c27be4ae4c7a2d93d22827aa5a0054
with:
branch: main
labels: |
cherry-pick
reviewers: |
${{ github.event.pull_request.user.login }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
cherry-pick:
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/[email protected]
secrets:
PAT: ${{ secrets.PAT }}
SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
Loading

0 comments on commit a672ecc

Please sign in to comment.