From 31e544a9306fcbe2c29fc709d6ef0bf6e3e1acb8 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Tue, 30 Jul 2024 23:02:48 -0700 Subject: [PATCH 1/3] Using lustre for CI tests; Using new cleanup strategy. --- .gitlab/jobs-mpi.yml | 20 ++++++++++++++++++++ .gitlab/jobs-prod.yml | 7 +++++-- .gitlab/jobs-seq.yml | 12 ++++++++++++ .gitlab/machines.yml | 2 ++ .gitlab/os.yml | 3 +++ .gitlab/scripts.yml | 24 ++++++++++++++++++++---- 6 files changed, 62 insertions(+), 6 deletions(-) diff --git a/.gitlab/jobs-mpi.yml b/.gitlab/jobs-mpi.yml index d187c0953..2f9ed1556 100644 --- a/.gitlab/jobs-mpi.yml +++ b/.gitlab/jobs-mpi.yml @@ -8,6 +8,10 @@ toss_gcc_mvapich2_cxxonly_build: extends: [.toss_resource1, .gcc_mvapich2_cxxonly, .build] needs: [toss_gcc_mvapich2_cxxonly_tpls] +toss_gcc_mvapich2_cxxonly_cleanup: + extends: [.toss_resource1, .gcc_mvapich2_cxxonly, .cleanup_dir] + needs: [toss_gcc_mvapich2_cxxonly_build] + toss_gcc_mvapich2_tpls: @@ -21,6 +25,10 @@ toss_gcc_mvapich2_test: extends: [.toss_resource2, .gcc_mvapich2, .run_ats] needs: [toss_gcc_mvapich2_build] +toss_gcc_mvapich2_cleanup: + extends: [.toss_resource2, .gcc_mvapich2, .cleanup_dir] + needs: [toss_gcc_mvapich2_test] + toss_clang_mvapich2_tpls: @@ -34,6 +42,10 @@ toss_clang_mvapich2_test: extends: [.toss_resource2, .clang_mvapich2, .run_ats] needs: [toss_clang_mvapich2_build] +toss_clang_mvapich2_cleanup: + extends: [.toss_resource2, .clang_mvapich2, .cleanup_dir] + needs: [toss_clang_mvapich2_test] + #toss_oneapi_2022_1_mvapich2: @@ -52,6 +64,10 @@ blueos_gcc_spectrum_test: extends: [.blueos_resource1, .gcc_spectrum, .run_ats] needs: [blueos_gcc_spectrum_build] +blueos_gcc_spectrum_cleanup: + extends: [.blueos_resource1, .gcc_spectrum, .cleanup_dir] + needs: [blueos_gcc_spectrum_test] + blueos_cuda_11_gcc_spectrum_tpls: @@ -66,5 +82,9 @@ blueos_cuda_11_gcc_spectrum_test: needs: [blueos_cuda_11_gcc_spectrum_build] allow_failure: true +blueos_cuda_11_gcc_spectrum_cleanup: + extends: [.blueos_resource2, .cuda_11_gcc_spectrum, .cleanup_dir] + needs: [blueos_cuda_11_gcc_spectrum_test] + diff --git a/.gitlab/jobs-prod.yml b/.gitlab/jobs-prod.yml index 488ac824e..7bd4a7cef 100644 --- a/.gitlab/jobs-prod.yml +++ b/.gitlab/jobs-prod.yml @@ -34,6 +34,9 @@ toss_release_permissions: # ------------------------------------------------------------------------------ # CLEAN OLD BUILD DIRS -cleanup_build_dirs: - extends: [.clean_dirs] +cleanup_old_dirs_toss: + extends: [.toss_resource_general, .clean_old_dirs] + +cleanup_old_dirs_blueos: + extends: [.blueos_resource_general, .clean_old_dirs] diff --git a/.gitlab/jobs-seq.yml b/.gitlab/jobs-seq.yml index 34847a7b9..d423f620d 100644 --- a/.gitlab/jobs-seq.yml +++ b/.gitlab/jobs-seq.yml @@ -12,6 +12,10 @@ toss_gcc_~mpi_test: extends: [.gcc_~mpi, .run_ats, .toss_resource1] needs: [toss_gcc_~mpi_build] +toss_gcc_~mpi_cleanup: + extends: [.gcc_~mpi, .cleanup_dir, .toss_resource1] + needs: [toss_gcc_~mpi_test] + blueos_cuda_11_gcc_~mpi_tpls: extends: [.blueos_resource2, .cuda_11_gcc_~mpi, .tpls] @@ -24,6 +28,10 @@ blueos_cuda_11_gcc_~mpi_test: extends: [.blueos_resource2, .cuda_11_gcc_~mpi, .run_ats] needs: [blueos_cuda_11_gcc_~mpi_build] +blueos_cuda_11_gcc_~mpi_cleanup: + extends: [.blueos_resource2, .cuda_11_gcc_~mpi, .cleanup_dir] + needs: [blueos_cuda_11_gcc_~mpi_test] + blueos_gcc_~mpi_Debug_tpls: extends: [.blueos_resource1, .gcc_~mpi_Debug, .tpls] @@ -35,3 +43,7 @@ blueos_gcc_~mpi_Debug_build: blueos_gcc_~mpi_Debug_test: extends: [.blueos_resource1, .gcc_~mpi_Debug, .run_ats] needs: [blueos_gcc_~mpi_Debug_build] + +blueos_gcc_~mpi_Debug_cleanup: + extends: [.blueos_resource1, .gcc_~mpi_Debug, .cleanup_dir] + needs: [blueos_gcc_~mpi_Debug_test] diff --git a/.gitlab/machines.yml b/.gitlab/machines.yml index 8657ceeb4..95b01e765 100644 --- a/.gitlab/machines.yml +++ b/.gitlab/machines.yml @@ -10,6 +10,7 @@ PARTITION: pdebug BUILD_ALLOC: srun -N 1 -c 36 -p pdebug -t 60 TEST_ALLOC: '' + CLEAN_ALLOC: srun -n 20 extends: [.on_toss_4_x86] .on_lassen: @@ -20,6 +21,7 @@ HOSTNAME: 'lassen' BUILD_ALLOC: lalloc 1 -W 60 TEST_ALLOC: $BUILD_ALLOC + CLEAN_ALLOC: lalloc 1 lrun -n 20 LC_MODULES: "cuda/11.1.0" extends: [.on_blueos_3_ppc64] diff --git a/.gitlab/os.yml b/.gitlab/os.yml index 380b48312..8ba613b90 100644 --- a/.gitlab/os.yml +++ b/.gitlab/os.yml @@ -12,6 +12,7 @@ ARCH: 'toss_3_x86_64_ib' GCC_VERSION: '8.3.1' CLANG_VERSION: '9.0.0' + SPHERAL_BUILDS_DIR: /p/lustre1/sphapp/spheral-ci-builds extends: [.sys_config] .on_toss_4_x86: @@ -19,6 +20,7 @@ ARCH: 'toss_4_x86_64_ib' GCC_VERSION: '10.3.1' CLANG_VERSION: '14.0.6' + SPHERAL_BUILDS_DIR: /p/lustre1/sphapp/spheral-ci-builds extends: [.sys_config] .on_blueos_3_ppc64: @@ -26,5 +28,6 @@ ARCH: 'blueos_3_ppc64le_ib_p9' GCC_VERSION: '8.3.1' CLANG_VERSION: '9.0.0' + SPHERAL_BUILDS_DIR: /p/gpfs1/sphapp/spheral-ci-builds extends: [.sys_config] diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml index f78314f5b..7f7d81abc 100644 --- a/.gitlab/scripts.yml +++ b/.gitlab/scripts.yml @@ -6,6 +6,7 @@ .tpls: stage: tpls script: + - echo $USER - CI_BUILD_DIR=$SPHERAL_BUILDS_DIR/$CI_JOB_ID/project - echo $CI_BUILD_DIR &> ci-dir.txt && echo $CI_JOB_NAME &> job-name.txt - echo $CI_BUILD_DIR && echo $CI_PROJECT_DIR @@ -62,6 +63,17 @@ exit_codes: - 80 +.cleanup_dir: + stage: cleanup + variables: + GIT_STRATEGY: none + script: + - CI_BUILD_DIR=$(cat ci-dir.txt) + + - ml load mpifileutils + - cd $SPHERAL_BUILDS_DIR + - $CLEAN_ALLOC drm $CI_BUILD_DIR/.. + # ------------------------------------------------------------------------------ # Shared TPL scripts. # ------------------------------------------------------------------------------ @@ -153,15 +165,19 @@ # This job searches our SPHERAL_BUILDS_DIR and deletes all but the N most recent builds. # This should be enough of a buffer that we likely won't delete a build mid pipeline, # and never fill the sphapp workspace storage. -.clean_dirs: +.clean_old_dirs: stage: cleanup variables: GIT_STRATEGY: none script: - - ml load mpifileutils - cd $SPHERAL_BUILDS_DIR - - source $CI_PROJECT_DIR/$SCRIPT_DIR/gitlab/clean_spheral_builds.sh 40 - extends: [.toss_resource_general] + + - MAX_DIR=30 + - DIR_LIST=$(ls -ltd * | sed "1, $MAX_DIR d" | rev | cut -d ' ' -f1 | rev | paste -sd ' ' - ) + - echo $DIR_LIST + + - ml load mpifileutils + - if [[ $DIR_LIST ]]; then $CLEAN_ALLOC drm $DIR_LIST; else echo "No directories to remove at this time."; fi when: always .merge_pr_rule: From c636fcf58db27fa9b62dea91ab5053b6748581e0 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Tue, 30 Jul 2024 23:16:10 -0700 Subject: [PATCH 2/3] Notes on gitlab CI cleanup strategy. --- RELEASE_NOTES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index cdb57b0bc..a0f78836a 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -19,6 +19,7 @@ Notable changes include: * Build changes / improvements: * Distributed source directory must always be built now. * Git strategies in the Gitlab CI are fixed so a clone only occurs on the first stage for each job, instead of for all stages for each job. + * New Gitlab CI pipeline cleanup strategy deletes job directories immediately upon successful completion. * Bug Fixes / improvements: * Wrappers for MPI calls are simplified and improved. From 20fa6e409a925ba35b4d998cb9b225f6ccdaed9c Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Tue, 30 Jul 2024 23:28:11 -0700 Subject: [PATCH 3/3] Pass artifacts out of build only stage for cxxonly cleanup. --- .gitlab/scripts.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitlab/scripts.yml b/.gitlab/scripts.yml index 7f7d81abc..5b44f650f 100644 --- a/.gitlab/scripts.yml +++ b/.gitlab/scripts.yml @@ -28,6 +28,10 @@ - CI_BUILD_DIR=$(cat ci-dir.txt) - cd $CI_BUILD_DIR && cat job-name.txt - $BUILD_ALLOC ./$SCRIPT_DIR/devtools/host-config-build.py --host-config gitlab.cmake --build $EXTRA_CMAKE_ARGS + artifacts: + paths: + - ci-dir.txt + - job-name.txt .build_and_test: extends: [.build]