From 9d4aa0d3bf82814be6575fbf79a6b66c039be4a1 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Wed, 7 Jun 2023 20:29:18 -0700 Subject: [PATCH 01/21] Add 2 ranks tests --- .github/workflows/ci.yml | 1 + cmake/versions.json | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c147cd17da..5c855bd46d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -76,6 +76,7 @@ jobs: - {name: 2 CPUs test, options: --cpus 2 --debug, log: cpus} - {name: GPU test, options: --use cuda --gpus 1 --debug, log: gpu} - {name: 2 GPUs test, options: --use cuda --gpus 2 --debug, log: gpus} + - {name: 2 ranks test, options: --use cuda --gpus 1 --debug --ranks-per-node 2, log: ranks} - {name: OpenMP test, options: --use openmp --omps 1 --ompthreads 2 --debug, log: omp} - {name: 2 NUMA OpenMPs test, options: --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug, log: omps} - {name: Eager execution test, options: --use eager --debug, log: eager} diff --git a/cmake/versions.json b/cmake/versions.json index d7e1d81336..3c82379943 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "4b79075eb5d7035d501c334c87a87939af79abc2" + "git_tag" : "4b0205179ea59ff387fe61afeb9175d2ee4cdac4" } } } From 98aa5c10a09912e1db9c7ba6dfece7431c738dc2 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Wed, 28 Jun 2023 13:47:42 -0700 Subject: [PATCH 02/21] Moving multi-rank CI into a separate workflow. --- .github/workflows/ci-ucx.yml | 138 +++++++++++++++++++++++++++++++++++ .github/workflows/ci.yml | 1 - 2 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/ci-ucx.yml diff --git a/.github/workflows/ci-ucx.yml b/.github/workflows/ci-ucx.yml new file mode 100644 index 0000000000..9d2112c777 --- /dev/null +++ b/.github/workflows/ci-ucx.yml @@ -0,0 +1,138 @@ +name: Build cunumeric +on: + push: + branches-ignore: + - gh-pages # deployment target branch (this workflow should not exist on that branch anyway) + pull_request: + branches-ignore: + - gh-pages # deployment target branch (this workflow should not exist on that branch anyway) + schedule: + # * is a special character in YAML so you have to quote this string + - cron: '0 */6 * * *' +env: + COMMIT: ${{ github.event.pull_request.head.sha || github.sha }} + PROJECT: github-cunumeric-ci + REF: ${{ github.event.pull_request.head.ref || github.ref }} + BASE_REF: ${{ github.event.pull_request.base.ref || github.ref }} + EVENT_NAME: ${{ github.event_name }} + LABEL: ${{ github.event.pull_request.head.label }} + # Prevent output buffering + PYTHONUNBUFFERED: 1 +jobs: + build: + permissions: + id-token: write # This is required for configure-aws-credentials + contents: read # This is required for actions/checkout + + if: ${{ github.repository == 'nv-legate/cunumeric' }} + runs-on: self-hosted + steps: + - name: Dump GitHub context + env: + GITHUB_CONTEXT: ${{ toJSON(github) }} + run: echo "$GITHUB_CONTEXT" + - name: Dump job context + env: + JOB_CONTEXT: ${{ toJSON(job) }} + run: echo "$JOB_CONTEXT" + - name: Dump steps context + env: + STEPS_CONTEXT: ${{ toJSON(steps) }} + run: echo "$STEPS_CONTEXT" + - name: Dump runner context + env: + RUNNER_CONTEXT: ${{ toJSON(runner) }} + run: echo "$RUNNER_CONTEXT" + - name: Dump strategy context + env: + STRATEGY_CONTEXT: ${{ toJSON(strategy) }} + run: echo "$STRATEGY_CONTEXT" + - name: Dump matrix context + env: + MATRIX_CONTEXT: ${{ toJSON(matrix) }} + run: echo "$MATRIX_CONTEXT" + - name: Run CI build + run: | + /data/github-runner/legate-bin/setup.sh + cd legate-ci/github-ci/cunumeric + rm -rf ngc-artifacts || true + ./build-separate-ucx.sh > ${COMMIT}-build.log 2>&1 + - name: Process Output + run: | + cd legate-ci/github-ci/cunumeric + cat *artifacts/*/* + if: always() + - name: Upload Build Log + if: always() + uses: actions/upload-artifact@v3 + with: + name: build-log + path: ./**/${{ env.COMMIT }}-build.log.gpg + test: + if: ${{ github.repository == 'nv-legate/cunumeric' }} + runs-on: self-hosted + needs: build + strategy: + fail-fast: false + matrix: + include: + - {name: 1 CPU test, options: --ranks-per-node 2 --launcher mpirun --cpus 1 --unit --debug, log: cpu, instance: 1} + - {name: 2 CPUs test, options: --ranks-per-node 2 --launcher mpirun --cpus 2 --debug, log: cpus, instance: 1} + - {name: GPU test, options: --ranks-per-node 2 --launcher mpirun --use cuda --gpus 1 --debug, log: gpu, instance: 2} + - {name: 2 GPUs test, options: --ranks-per-node 2 --launcher mpirun --use cuda --gpus 2 --debug, log: gpus, instance: 4} + - {name: OpenMP test, options: --ranks-per-node 2 --launcher mpirun --use openmp --omps 1 --ompthreads 2 --debug, log: omp, instance: 1} + - {name: 2 NUMA OpenMPs test, options: --ranks-per-node 2 --launcher mpirun --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug, log: omps, instance: 1} + - {name: Eager execution test, options: --use eager --debug, log: eager, instance: 1} + name: ${{ matrix.name }} + steps: + - name: Dump GitHub context + env: + GITHUB_CONTEXT: ${{ toJSON(github) }} + run: echo "$GITHUB_CONTEXT" + - name: Dump job context + env: + JOB_CONTEXT: ${{ toJSON(job) }} + run: echo "$JOB_CONTEXT" + - name: Dump steps context + env: + STEPS_CONTEXT: ${{ toJSON(steps) }} + run: echo "$STEPS_CONTEXT" + - name: Dump runner context + env: + RUNNER_CONTEXT: ${{ toJSON(runner) }} + run: echo "$RUNNER_CONTEXT" + - name: Dump strategy context + env: + STRATEGY_CONTEXT: ${{ toJSON(strategy) }} + run: echo "$STRATEGY_CONTEXT" + - name: Dump matrix context + env: + MATRIX_CONTEXT: ${{ toJSON(matrix) }} + run: echo "$MATRIX_CONTEXT" + - name: Prepare + run: | + /data/github-runner/legate-bin/setup.sh + cd legate-ci/github-ci/cunumeric + if [[ ! -d ngc-artifacts ]] + then + mkdir ngc-artifacts + else + rm -rf ngc-artifacts/* + fi + - name: Test + run: | + cd legate-ci/github-ci/cunumeric + [[ "${{ matrix.name }}" == "Eager"* ]] && export PYTHONFAULTHANDLER=1 + INSTANCE=${{ matrix.instance }} ./test.sh ${{ matrix.options }} > ${COMMIT}-test-${{ matrix.log }}.log 2>&1 + - name: Process output + if: always() + run: | + cd legate-ci/github-ci/cunumeric + /data/github-runner/legate-bin/encrypt.sh ${COMMIT}-test-${{ matrix.log }}.log + cat *artifacts/*/* + - name: Upload Log + if: always() + uses: actions/upload-artifact@v3 + with: + name: test-${{ matrix.log }}-log + path: ./**/${{ env.COMMIT }}-test-${{ matrix.log }}.log.gpg diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5c855bd46d..c147cd17da 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -76,7 +76,6 @@ jobs: - {name: 2 CPUs test, options: --cpus 2 --debug, log: cpus} - {name: GPU test, options: --use cuda --gpus 1 --debug, log: gpu} - {name: 2 GPUs test, options: --use cuda --gpus 2 --debug, log: gpus} - - {name: 2 ranks test, options: --use cuda --gpus 1 --debug --ranks-per-node 2, log: ranks} - {name: OpenMP test, options: --use openmp --omps 1 --ompthreads 2 --debug, log: omp} - {name: 2 NUMA OpenMPs test, options: --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug, log: omps} - {name: Eager execution test, options: --use eager --debug, log: eager} From e9df8bf2b46cedaf78e8289548367f7062c72a0a Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Wed, 28 Jun 2023 14:07:31 -0700 Subject: [PATCH 03/21] Try to use a generic build script --- .github/workflows/ci-ucx.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-ucx.yml b/.github/workflows/ci-ucx.yml index 9d2112c777..29c51d2b18 100644 --- a/.github/workflows/ci-ucx.yml +++ b/.github/workflows/ci-ucx.yml @@ -56,7 +56,7 @@ jobs: /data/github-runner/legate-bin/setup.sh cd legate-ci/github-ci/cunumeric rm -rf ngc-artifacts || true - ./build-separate-ucx.sh > ${COMMIT}-build.log 2>&1 + DOCKERFILE_EXT=.separate.ucx ./build.sh > ${COMMIT}-build.log 2>&1 - name: Process Output run: | cd legate-ci/github-ci/cunumeric From f7eddea1943ef6eb0a9cf41254ff246648ab5353 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Thu, 6 Jul 2023 16:27:24 -0700 Subject: [PATCH 04/21] Update workflow names --- .github/workflows/ci-ucx.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci-ucx.yml b/.github/workflows/ci-ucx.yml index 29c51d2b18..3f164f050e 100644 --- a/.github/workflows/ci-ucx.yml +++ b/.github/workflows/ci-ucx.yml @@ -1,4 +1,4 @@ -name: Build cunumeric +name: Build UCX cunumeric on: push: branches-ignore: @@ -76,12 +76,12 @@ jobs: fail-fast: false matrix: include: - - {name: 1 CPU test, options: --ranks-per-node 2 --launcher mpirun --cpus 1 --unit --debug, log: cpu, instance: 1} - - {name: 2 CPUs test, options: --ranks-per-node 2 --launcher mpirun --cpus 2 --debug, log: cpus, instance: 1} - - {name: GPU test, options: --ranks-per-node 2 --launcher mpirun --use cuda --gpus 1 --debug, log: gpu, instance: 2} - - {name: 2 GPUs test, options: --ranks-per-node 2 --launcher mpirun --use cuda --gpus 2 --debug, log: gpus, instance: 4} - - {name: OpenMP test, options: --ranks-per-node 2 --launcher mpirun --use openmp --omps 1 --ompthreads 2 --debug, log: omp, instance: 1} - - {name: 2 NUMA OpenMPs test, options: --ranks-per-node 2 --launcher mpirun --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug, log: omps, instance: 1} + - {name: 2 rank 1 CPU test, options: --ranks-per-node 2 --launcher mpirun --cpus 1 --unit --debug, log: cpu, instance: 1} + - {name: 2 rank 2 CPUs test, options: --ranks-per-node 2 --launcher mpirun --cpus 2 --debug, log: cpus, instance: 1} + - {name: 2 rank GPU test, options: --ranks-per-node 2 --launcher mpirun --use cuda --gpus 1 --debug, log: gpu, instance: 2} + - {name: 2 rank 2 GPUs test, options: --ranks-per-node 2 --launcher mpirun --use cuda --gpus 2 --debug, log: gpus, instance: 4} + - {name: 2 rank OpenMP test, options: --ranks-per-node 2 --launcher mpirun --use openmp --omps 1 --ompthreads 2 --debug, log: omp, instance: 1} + - {name: 2 rank 2 NUMA OpenMPs test, options: --ranks-per-node 2 --launcher mpirun --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug, log: omps, instance: 1} - {name: Eager execution test, options: --use eager --debug, log: eager, instance: 1} name: ${{ matrix.name }} steps: From 23f51059a675a6c8626257a6df2b953cf7611b5f Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Tue, 8 Aug 2023 20:59:16 -0700 Subject: [PATCH 05/21] Update legion --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index 3c82379943..98dbd8c3a1 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "4b0205179ea59ff387fe61afeb9175d2ee4cdac4" + "git_tag" : "a405f595603238c8557cb5fefd3981d190a2fb1d" } } } From 64fd6c8c8b1dc18776fd0f0622dd67c0f8f3aa1e Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Thu, 10 Aug 2023 02:01:16 -0700 Subject: [PATCH 06/21] add ext to testing --- .github/workflows/ci-ucx.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-ucx.yml b/.github/workflows/ci-ucx.yml index 3f164f050e..f1149409cd 100644 --- a/.github/workflows/ci-ucx.yml +++ b/.github/workflows/ci-ucx.yml @@ -123,7 +123,7 @@ jobs: run: | cd legate-ci/github-ci/cunumeric [[ "${{ matrix.name }}" == "Eager"* ]] && export PYTHONFAULTHANDLER=1 - INSTANCE=${{ matrix.instance }} ./test.sh ${{ matrix.options }} > ${COMMIT}-test-${{ matrix.log }}.log 2>&1 + INSTANCE=${{ matrix.instance }} DOCKERFILE_EXT=.separate.ucx ./test.sh ${{ matrix.options }} > ${COMMIT}-test-${{ matrix.log }}.log 2>&1 - name: Process output if: always() run: | From 6c91535ee2d589b573236679c650197ee79995ad Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Thu, 17 Aug 2023 14:09:46 -0700 Subject: [PATCH 07/21] Only on push request --- .github/workflows/ci-ucx.yml | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci-ucx.yml b/.github/workflows/ci-ucx.yml index f1149409cd..c8254e44e4 100644 --- a/.github/workflows/ci-ucx.yml +++ b/.github/workflows/ci-ucx.yml @@ -1,14 +1,10 @@ name: Build UCX cunumeric on: - push: - branches-ignore: - - gh-pages # deployment target branch (this workflow should not exist on that branch anyway) - pull_request: - branches-ignore: - - gh-pages # deployment target branch (this workflow should not exist on that branch anyway) - schedule: - # * is a special character in YAML so you have to quote this string - - cron: '0 */6 * * *' + on: + push: + branches: + - "pull-request/[0-9]+" + - "branch-*" env: COMMIT: ${{ github.event.pull_request.head.sha || github.sha }} PROJECT: github-cunumeric-ci From 9453b94e1f9e5278b9c0bdc973f8c8105fd3d1ff Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Thu, 17 Aug 2023 14:23:46 -0700 Subject: [PATCH 08/21] fix a typo --- .github/workflows/ci-ucx.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-ucx.yml b/.github/workflows/ci-ucx.yml index c8254e44e4..3202a91dc6 100644 --- a/.github/workflows/ci-ucx.yml +++ b/.github/workflows/ci-ucx.yml @@ -1,10 +1,9 @@ name: Build UCX cunumeric on: - on: - push: - branches: - - "pull-request/[0-9]+" - - "branch-*" + push: + branches: + - "pull-request/[0-9]+" + - "branch-*" env: COMMIT: ${{ github.event.pull_request.head.sha || github.sha }} PROJECT: github-cunumeric-ci From 1ea026f46f87f14038130396f734578c4ce6f83a Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Tue, 29 Aug 2023 08:53:46 -0700 Subject: [PATCH 09/21] Point legate.core at a version that understands launcher arguments --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index 98dbd8c3a1..28d3ff3fb8 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "a405f595603238c8557cb5fefd3981d190a2fb1d" + "git_tag" : "4736c3706b6b8dfdcd4b19b015fd186f7fe53f3a" } } } From 1bf728db6d4fdec1debedcc73c795201b1056b9a Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Thu, 31 Aug 2023 19:46:20 -0700 Subject: [PATCH 10/21] update legate sha --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index 28d3ff3fb8..30e0437f02 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "4736c3706b6b8dfdcd4b19b015fd186f7fe53f3a" + "git_tag" : "920499de5f778dc351a8dda6931ad1330a536335" } } } From e9e5ba18327ca0385a4fd03899c6316849624ab3 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Thu, 31 Aug 2023 19:48:55 -0700 Subject: [PATCH 11/21] add oversubscribe --- .github/workflows/ci-ucx.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-ucx.yml b/.github/workflows/ci-ucx.yml index 3202a91dc6..298c5e77f8 100644 --- a/.github/workflows/ci-ucx.yml +++ b/.github/workflows/ci-ucx.yml @@ -71,12 +71,12 @@ jobs: fail-fast: false matrix: include: - - {name: 2 rank 1 CPU test, options: --ranks-per-node 2 --launcher mpirun --cpus 1 --unit --debug, log: cpu, instance: 1} - - {name: 2 rank 2 CPUs test, options: --ranks-per-node 2 --launcher mpirun --cpus 2 --debug, log: cpus, instance: 1} - - {name: 2 rank GPU test, options: --ranks-per-node 2 --launcher mpirun --use cuda --gpus 1 --debug, log: gpu, instance: 2} - - {name: 2 rank 2 GPUs test, options: --ranks-per-node 2 --launcher mpirun --use cuda --gpus 2 --debug, log: gpus, instance: 4} - - {name: 2 rank OpenMP test, options: --ranks-per-node 2 --launcher mpirun --use openmp --omps 1 --ompthreads 2 --debug, log: omp, instance: 1} - - {name: 2 rank 2 NUMA OpenMPs test, options: --ranks-per-node 2 --launcher mpirun --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug, log: omps, instance: 1} + - {name: 2 rank 1 CPU test, options: --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --cpus 1 --unit --debug, log: cpu, instance: 1} + - {name: 2 rank 2 CPUs test, options: --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --cpus 2 --debug, log: cpus, instance: 1} + - {name: 2 rank GPU test, options: --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use cuda --gpus 1 --debug, log: gpu, instance: 2} + - {name: 2 rank 2 GPUs test, options: --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use cuda --gpus 2 --debug, log: gpus, instance: 4} + - {name: 2 rank OpenMP test, options: --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use openmp --omps 1 --ompthreads 2 --debug, log: omp, instance: 1} + - {name: 2 rank 2 NUMA OpenMPs test, options: --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug, log: omps, instance: 1} - {name: Eager execution test, options: --use eager --debug, log: eager, instance: 1} name: ${{ matrix.name }} steps: From dd443ac56c844313444cbbeb8a0ee9b1d1e1ffa6 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Fri, 1 Sep 2023 15:01:44 -0700 Subject: [PATCH 12/21] Update legate sha --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index 30e0437f02..9774077a4c 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "920499de5f778dc351a8dda6931ad1330a536335" + "git_tag" : "5bb6f50c4232684dd671eec5ceca88b6a69fa863" } } } From 9e47361c4d157f892c867157d1a60ee9abc44243 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Tue, 5 Sep 2023 19:22:46 -0700 Subject: [PATCH 13/21] bump up legate --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index 9774077a4c..3303ee7df5 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "5bb6f50c4232684dd671eec5ceca88b6a69fa863" + "git_tag" : "72acbb5cf4237233ff6b0143bd4530375a87b903" } } } From c71a2f9eb601025404c7f90460d3e13dd9ee7894 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Wed, 6 Sep 2023 11:54:38 -0700 Subject: [PATCH 14/21] bump up legate --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index 3303ee7df5..644d8f6d4f 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "72acbb5cf4237233ff6b0143bd4530375a87b903" + "git_tag" : "25518c64508a126395282fe76f38586d631a4eb2" } } } From 0351cf31f50792d6b364ea7417d7cf20ba5cb43c Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Wed, 6 Sep 2023 19:03:20 -0700 Subject: [PATCH 15/21] Reduce the number of jobs to 1 --- .github/workflows/ci-ucx.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-ucx.yml b/.github/workflows/ci-ucx.yml index 298c5e77f8..1dde80d191 100644 --- a/.github/workflows/ci-ucx.yml +++ b/.github/workflows/ci-ucx.yml @@ -71,12 +71,12 @@ jobs: fail-fast: false matrix: include: - - {name: 2 rank 1 CPU test, options: --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --cpus 1 --unit --debug, log: cpu, instance: 1} - - {name: 2 rank 2 CPUs test, options: --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --cpus 2 --debug, log: cpus, instance: 1} - - {name: 2 rank GPU test, options: --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use cuda --gpus 1 --debug, log: gpu, instance: 2} - - {name: 2 rank 2 GPUs test, options: --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use cuda --gpus 2 --debug, log: gpus, instance: 4} - - {name: 2 rank OpenMP test, options: --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use openmp --omps 1 --ompthreads 2 --debug, log: omp, instance: 1} - - {name: 2 rank 2 NUMA OpenMPs test, options: --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug, log: omps, instance: 1} + - {name: 2 rank 1 CPU test, options: -j 1 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --cpus 1 --unit --debug, log: cpu, instance: 1} + - {name: 2 rank 2 CPUs test, options: -j 1 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --cpus 2 --debug, log: cpus, instance: 1} + - {name: 2 rank GPU test, options: -j 1 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use cuda --gpus 1 --debug, log: gpu, instance: 2} + - {name: 2 rank 2 GPUs test, options: -j 1 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use cuda --gpus 2 --debug, log: gpus, instance: 4} + - {name: 2 rank OpenMP test, options: -j 1 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use openmp --omps 1 --ompthreads 2 --debug, log: omp, instance: 1} + - {name: 2 rank 2 NUMA OpenMPs test, options: -j 1 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug, log: omps, instance: 1} - {name: Eager execution test, options: --use eager --debug, log: eager, instance: 1} name: ${{ matrix.name }} steps: From f5c292707a540f9fd816a93c8a399ec2c8607ae1 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Thu, 7 Sep 2023 08:48:17 -0700 Subject: [PATCH 16/21] Try more workers --- .github/workflows/ci-ucx.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-ucx.yml b/.github/workflows/ci-ucx.yml index 1dde80d191..4496a98239 100644 --- a/.github/workflows/ci-ucx.yml +++ b/.github/workflows/ci-ucx.yml @@ -71,12 +71,12 @@ jobs: fail-fast: false matrix: include: - - {name: 2 rank 1 CPU test, options: -j 1 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --cpus 1 --unit --debug, log: cpu, instance: 1} - - {name: 2 rank 2 CPUs test, options: -j 1 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --cpus 2 --debug, log: cpus, instance: 1} - - {name: 2 rank GPU test, options: -j 1 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use cuda --gpus 1 --debug, log: gpu, instance: 2} - - {name: 2 rank 2 GPUs test, options: -j 1 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use cuda --gpus 2 --debug, log: gpus, instance: 4} - - {name: 2 rank OpenMP test, options: -j 1 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use openmp --omps 1 --ompthreads 2 --debug, log: omp, instance: 1} - - {name: 2 rank 2 NUMA OpenMPs test, options: -j 1 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug, log: omps, instance: 1} + - {name: 2 rank 1 CPU test, options: -j 8 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --cpus 1 --unit --debug, log: cpu, instance: 1} + - {name: 2 rank 2 CPUs test, options: -j 8 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --cpus 2 --debug, log: cpus, instance: 1} + - {name: 2 rank GPU test, options: -j 4 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use cuda --gpus 1 --debug, log: gpu, instance: 2} + - {name: 2 rank 2 GPUs test, options: -j 4 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use cuda --gpus 2 --debug, log: gpus, instance: 4} + - {name: 2 rank OpenMP test, options: -j 8 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use openmp --omps 1 --ompthreads 2 --debug, log: omp, instance: 1} + - {name: 2 rank 2 NUMA OpenMPs test, options: -j 4 --ranks-per-node 2 --launcher mpirun --launcher-extra=--oversubscribe --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug, log: omps, instance: 1} - {name: Eager execution test, options: --use eager --debug, log: eager, instance: 1} name: ${{ matrix.name }} steps: From 34cdee0bd8cdeb8df261d1d79b24225b9bdd8beb Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Mon, 11 Sep 2023 09:55:50 -0700 Subject: [PATCH 17/21] bump up legate --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index 644d8f6d4f..6762ea02bf 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "25518c64508a126395282fe76f38586d631a4eb2" + "git_tag" : "d30e062848565c9556a4b8716ba335cf1aa499d2" } } } From 3b7a0bed9020aff4e6102271574e061db992fff1 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Tue, 12 Sep 2023 13:55:11 -0700 Subject: [PATCH 18/21] bump up legate --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index 6762ea02bf..f3264d6510 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "d30e062848565c9556a4b8716ba335cf1aa499d2" + "git_tag" : "8736b2995c9e01b3f9ee80e2225f5bd153d98177" } } } From ed425188153749bef886157b6c94afa68f8a8b3c Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Tue, 19 Sep 2023 16:29:54 -0700 Subject: [PATCH 19/21] bump up legate --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index f3264d6510..a41ef2d173 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "8736b2995c9e01b3f9ee80e2225f5bd153d98177" + "git_tag" : "f545f9326c21c12fc3a69a83a2bea12b2d2bfa54" } } } From 4e2fa7fdbf537a2b6d61376b3b233203a718f2f3 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Fri, 22 Sep 2023 11:38:48 -0700 Subject: [PATCH 20/21] bump up legate --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index a41ef2d173..eeefd8b1ee 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "f545f9326c21c12fc3a69a83a2bea12b2d2bfa54" + "git_tag" : "c23e4dd1be998bb7e17adc763405f0206f3528e6" } } } From d4d066c249c07e7dcc17b9cec3eb282f9fa2cd86 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Thu, 5 Oct 2023 17:15:24 -0700 Subject: [PATCH 21/21] bump up legate --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index d5cfcf0d53..4bc9b9a7e3 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "c23e4dd1be998bb7e17adc763405f0206f3528e6" + "git_tag" : "ac33eb20af330bfada2710fdfe7ac4848bc0d990" } } }