diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml index 816536b6c..733ce9760 100644 --- a/.github/workflows/e2e_tests.yaml +++ b/.github/workflows/e2e_tests.yaml @@ -38,8 +38,8 @@ jobs: - name: Checkout common repo code uses: actions/checkout@v4 with: - repository: 'project-codeflare/codeflare-common' - ref: 'main' + repository: 'sutaakar/codeflare-common' + ref: 'gpu-time-slicing' path: 'common' - name: Set Go @@ -61,6 +61,8 @@ jobs: - name: Install NVidia GPU operator for KinD uses: ./common/github-actions/nvidia-gpu-operator + with: + enable-time-slicing: true - name: Deploy CodeFlare stack id: deploy @@ -115,7 +117,7 @@ jobs: - name: Upload logs uses: actions/upload-artifact@v4 - if: always() && steps.deploy.outcome == 'success' + if: always() && steps.kind-install.outcome == 'success' with: name: logs retention-days: 10 diff --git a/test/e2e/mnist_pytorch_appwrapper_test.go b/test/e2e/mnist_pytorch_appwrapper_test.go index 94239f57c..5f9a6efcb 100644 --- a/test/e2e/mnist_pytorch_appwrapper_test.go +++ b/test/e2e/mnist_pytorch_appwrapper_test.go @@ -41,6 +41,7 @@ func TestMnistPyTorchAppWrapperGpu(t *testing.T) { // Trains the MNIST dataset as a batch Job in an AppWrapper, and asserts successful completion of the training job. func runMnistPyTorchAppWrapper(t *testing.T, accelerator string) { test := With(t) + test.T().Parallel() // Create a namespace and localqueue in that namespace namespace := test.NewTestNamespace() diff --git a/test/e2e/mnist_rayjob_raycluster_test.go b/test/e2e/mnist_rayjob_raycluster_test.go index 0f2490c21..d27b9162a 100644 --- a/test/e2e/mnist_rayjob_raycluster_test.go +++ b/test/e2e/mnist_rayjob_raycluster_test.go @@ -48,6 +48,7 @@ func TestMnistRayJobRayClusterGpu(t *testing.T) { func runMnistRayJobRayCluster(t *testing.T, accelerator string, numberOfGpus int) { test := With(t) + test.T().Parallel() // Create a namespace and localqueue in that namespace namespace := test.NewTestNamespace() @@ -108,6 +109,7 @@ func TestMnistRayJobRayClusterAppWrapperGpu(t *testing.T) { // Same as TestMNISTRayJobRayCluster, except the RayCluster is wrapped in an AppWrapper func runMnistRayJobRayClusterAppWrapper(t *testing.T, accelerator string, numberOfGpus int) { test := With(t) + test.T().Parallel() // Create a namespace and localqueue in that namespace namespace := test.NewTestNamespace() diff --git a/test/e2e/setup.sh b/test/e2e/setup.sh index a7f442e55..2eddda1b3 100755 --- a/test/e2e/setup.sh +++ b/test/e2e/setup.sh @@ -93,5 +93,5 @@ spec: - name: "memory" nominalQuota: "20G" - name: "nvidia.com/gpu" - nominalQuota: "1" + nominalQuota: "4" EOF