diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index f456a23404e..d477f799025 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -39,20 +39,20 @@ jobs: packages: write steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 - name: Login to DockerHub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Docker meta id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5 with: images: ghcr.io/ESMCI/cime tags: | @@ -60,7 +60,7 @@ jobs: type=ref,event=pr,enable=${{ github.event_name == 'pull_request' }} type=sha,format=long - name: Build and push - uses: docker/build-push-action@v3 + uses: docker/build-push-action@v6 with: target: base context: docker/ @@ -76,7 +76,7 @@ jobs: timeout-minutes: 2 steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up python uses: actions/setup-python@v2 with: @@ -102,7 +102,7 @@ jobs: python-version: ['3.8', '3.9', '3.10'] steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Run tests shell: bash env: @@ -149,7 +149,7 @@ jobs: driver: "mct" steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Cache inputdata uses: actions/cache@v2 with: @@ -174,6 +174,14 @@ jobs: conda activate base + # container libnetcdf is 4.9.2 as cesm requires esmf >8.6.1 + # e3sm scorpio incompatible with 4.9.2, downgrade to 4.9.1 + # only reference found about scorpio incompatibility with 4.9.2 (https://github.com/E3SM-Project/scorpio/issues/554#issuecomment-1877361470) + # TODO open scorpio issue, possible solutions; 1. support two conda environments in container 2. maybe move from conda to spack? build all libraries in image + if [[ "${CIME_MODEL}" == "e3sm" ]]; then + mamba install -y 'libnetcdf=4.9.1' + fi + pytest -vvv --cov=CIME --machine docker --no-fortran-run --no-teardown CIME/tests/test_sys* - uses: mxschmitt/action-tmate@v3 if: ${{ !always() }} diff --git a/CIME/SystemTests/eri.py b/CIME/SystemTests/eri.py index 0aff0431373..c7ac3142158 100644 --- a/CIME/SystemTests/eri.py +++ b/CIME/SystemTests/eri.py @@ -83,7 +83,7 @@ def run_phase(self): start_1 = run_startdate stop_n2 = stop_n - stop_n1 - rest_n2 = int(stop_n2 / 2 + 1) + hist_n = stop_n2 start_1_year, start_1_month, start_1_day = [ @@ -93,10 +93,28 @@ def run_phase(self): start_2 = "{:04d}-{:02d}-{:02d}".format( start_2_year, start_1_month, start_1_day ) + rest_n2 = self._set_restart_interval( + stop_n=stop_n2, + stop_option=stop_option, + startdate=start_2, + starttime=start_tod, + ) stop_n3 = stop_n2 - rest_n2 - rest_n3 = int(stop_n3 / 2 + 1) + ninst = self._case.get_value("NINST") + drvrest = "rpointer.cpl" + if ninst > 1: + drvrest += "_0001" + drvrest += self._rest_time + self._set_drv_restart_pointer(drvrest) + + rest_n3 = self._set_restart_interval( + stop_n=stop_n3, + stop_option=stop_option, + startdate=start_2, + starttime=start_tod, + ) stop_n4 = stop_n3 - rest_n3 expect(stop_n4 >= 1 and stop_n1 >= 1, "Run length too short") @@ -223,8 +241,10 @@ def run_phase(self): self._case.set_value("GET_REFCASE", False) self._case.set_value("CONTINUE_RUN", False) self._case.set_value("STOP_N", stop_n3) - self._case.set_value("REST_OPTION", stop_option) - self._case.set_value("REST_N", rest_n3) + self._set_restart_interval( + stop_n=stop_n3, startdate=refdate_3, starttime=refsec_3 + ) + self._case.set_value("HIST_OPTION", stop_option) self._case.set_value("HIST_N", stop_n2) self._case.set_value("DOUT_S", False) @@ -266,6 +286,12 @@ def run_phase(self): self._case.set_value("DOUT_S", False) self._case.set_value("HIST_OPTION", stop_option) self._case.set_value("HIST_N", hist_n) + drvrest = "rpointer.cpl" + if ninst > 1: + drvrest += "_0001" + drvrest += self._rest_time + + self._set_drv_restart_pointer(drvrest) self._case.flush() # do the restart run (short term archiving is off) diff --git a/CIME/SystemTests/ers.py b/CIME/SystemTests/ers.py index 0e4bccd953a..42cc83d3481 100644 --- a/CIME/SystemTests/ers.py +++ b/CIME/SystemTests/ers.py @@ -36,7 +36,13 @@ def _ers_second_phase(self): pfile, os.path.join(os.path.dirname(pfile), "run1." + os.path.basename(pfile)), ) + ninst = self._case.get_value("NINST") + drvrest = "rpointer.cpl" + if ninst > 1: + drvrest += "_0001" + drvrest += self._rest_time + self._set_drv_restart_pointer(drvrest) self._case.set_value("HIST_N", stop_n) self._case.set_value("STOP_N", stop_new) self._case.set_value("CONTINUE_RUN", True) diff --git a/CIME/SystemTests/restart_tests.py b/CIME/SystemTests/restart_tests.py index 4252739d326..d89acf70070 100644 --- a/CIME/SystemTests/restart_tests.py +++ b/CIME/SystemTests/restart_tests.py @@ -50,3 +50,9 @@ def _case_two_setup(self): self._case.set_value("STOP_N", stop_new) self._case.set_value("CONTINUE_RUN", True) self._case.set_value("REST_OPTION", "never") + ninst = self._case.get_value("NINST") + drvrest = "rpointer.cpl" + if ninst > 1: + drvrest += "_0001" + drvrest += self._rest_time + self._set_drv_restart_pointer(drvrest) diff --git a/CIME/SystemTests/system_tests_common.py b/CIME/SystemTests/system_tests_common.py index 42e1b897e58..179d4f614d8 100644 --- a/CIME/SystemTests/system_tests_common.py +++ b/CIME/SystemTests/system_tests_common.py @@ -37,8 +37,9 @@ load_coupler_customization, ) import CIME.build as build +from datetime import datetime, timedelta +import glob, gzip, time, traceback, os, math, calendar -import glob, gzip, time, traceback, os, math from contextlib import ExitStack logger = logging.getLogger(__name__) @@ -111,6 +112,7 @@ def __init__( self._init_environment(caseroot) self._init_locked_files(caseroot, expected) self._skip_pnl = False + self._rest_time = None self._cpllog = ( "med" if self._case.get_value("COMP_INTERFACE") == "nuopc" else "cpl" ) @@ -119,10 +121,22 @@ def __init__( self._user_separate_builds = False self._expected_num_cmp = None self._rest_n = None + # Does the model support this variable? + self._drv_restart_pointer = self._case.get_value("DRV_RESTART_POINTER") + + def _set_drv_restart_pointer(self, value): + if self._drv_restart_pointer: + logger.info("setting DRV_RESTART_POINTER={}".format(value)) + self._case.set_value("DRV_RESTART_POINTER", value) + + def _set_restart_interval( + self, stop_n=None, stop_option=None, startdate=None, starttime=None + ): + if not stop_n: + stop_n = self._case.get_value("STOP_N") + if not stop_option: + stop_option = self._case.get_value("STOP_OPTION") - def _set_restart_interval(self): - stop_n = self._case.get_value("STOP_N") - stop_option = self._case.get_value("STOP_OPTION") self._case.set_value("REST_OPTION", stop_option) # We need to make sure the run is long enough and to set REST_N to a # value that makes sense for all components @@ -172,19 +186,69 @@ def _set_restart_interval(self): factor = 315360000 else: expect(False, f"stop_option {stop_option} not available for this test") - stop_n = int(stop_n * factor // coupling_secs) rest_n = math.ceil((stop_n // 2 + 1) * coupling_secs / factor) expect(stop_n > 0, "Bad STOP_N: {:d}".format(stop_n)) - expect(stop_n > 2, "ERROR: stop_n value {:d} too short".format(stop_n)) + if not starttime: + starttime = self._case.get_value("START_TOD") + if not startdate: + startdate = self._case.get_value("RUN_STARTDATE") + if "-" in startdate: + startdatetime = datetime.fromisoformat(startdate) + timedelta( + seconds=int(starttime) + ) + else: + startdatetime = datetime.strptime(startdate, "%Y%m%d") + timedelta( + seconds=int(starttime) + ) + + cal = self._case.get_value("CALENDAR") + + if stop_option == "nsteps": + rtd = timedelta(seconds=rest_n * factor) + elif stop_option == "nminutes": + rtd = timedelta(minutes=rest_n) + elif stop_option == "nhours": + rtd = timedelta(hours=rest_n) + elif stop_option == "ndays": + rtd = timedelta(days=rest_n) + elif stop_option == "nyears": + rtd = timedelta(days=rest_n * 365) + else: + expect(False, f"stop_option {stop_option} not available for this test") + + restdatetime = startdatetime + rtd + if cal == "NO_LEAP": + dayscorrected = 0 + syr = startdatetime.year + smon = startdatetime.month + ryr = restdatetime.year + rmon = restdatetime.month + while ryr > syr: + if rmon > 2 and calendar.isleap(ryr): + dayscorrected += 1 + ryr = ryr - 1 + if rmon > 2 and smon <= 2: + if calendar.isleap(syr): + dayscorrected += 1 + restdatetime = restdatetime + timedelta(days=dayscorrected) + self._rest_time = ( + f".{restdatetime.year:04d}-{restdatetime.month:02d}-{restdatetime.day:02d}-" + ) + h = restdatetime.hour + m = restdatetime.minute + s = restdatetime.second + self._rest_time += f"{(h*3600+m*60+s):05d}" + logger.info( "doing an {0} {1} initial test with restart file at {2} {1}".format( str(stop_n), stop_option, str(rest_n) ) ) self._case.set_value("REST_N", rest_n) + return rest_n def _init_environment(self, caseroot): @@ -261,7 +325,9 @@ def build( sharedlib_only=(phase_name == SHAREDLIB_BUILD_PHASE), model_only=(phase_name == MODEL_BUILD_PHASE), ) - except BaseException as e: # We want KeyboardInterrupts to generate FAIL status + except ( + BaseException + ) as e: # We want KeyboardInterrupts to generate FAIL status success = False if isinstance(e, CIMEError): # Don't want to print stacktrace for a build failure since that diff --git a/CIME/data/config/config_tests.xml b/CIME/data/config/config_tests.xml index 0d5a4b86c68..82fd600ac96 100644 --- a/CIME/data/config/config_tests.xml +++ b/CIME/data/config/config_tests.xml @@ -261,7 +261,6 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu 11 FALSE TRUE - $STOP_N / 2 + 1 $STOP_OPTION $STOP_OPTION $STOP_N @@ -272,7 +271,6 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu 1 ndays 11 - $STOP_N / 2 + 1 $STOP_OPTION $STOP_N $STOP_OPTION @@ -298,7 +296,6 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu 1 ndays 11 - $STOP_N / 2 - 1 $STOP_OPTION $STOP_N $STOP_OPTION diff --git a/CIME/hist_utils.py b/CIME/hist_utils.py index 2dab6e2df72..253cae0b926 100644 --- a/CIME/hist_utils.py +++ b/CIME/hist_utils.py @@ -37,6 +37,7 @@ FIELDLISTS_DIFFER = "had a different field list from" DIFF_COMMENT = "did NOT match" FAILED_OPEN = "Failed to open file" +IDENTICAL = "the two files seem to be IDENTICAL" # COMPARISON_COMMENT_OPTIONS should include all of the above: these are any of the special # comment strings that describe the reason for a comparison failure COMPARISON_COMMENT_OPTIONS = set( @@ -719,6 +720,8 @@ def get_ts_synopsis(comments): >>> get_ts_synopsis('') '' + >>> get_ts_synopsis('\n') + '' >>> get_ts_synopsis('big error') 'big error' >>> get_ts_synopsis('big error\n') @@ -740,13 +743,19 @@ def get_ts_synopsis(comments): >>> get_ts_synopsis('file1=\nfile2=\nFailed to open file\n') 'ERROR CPRNC failed to open files' >>> get_ts_synopsis('file1=\nfile2=\nSome other error\n') - 'Could not interpret CPRNC output' + 'ERROR Could not interpret CPRNC output' + >>> get_ts_synopsis('file1=\nfile2=\n diff_test: the two files seem to be IDENTICAL \n') + '' """ comments = comments.strip() if comments == "" or "\n" not in comments: return comments + # Empty synopsis when files are identicial + if re.search(IDENTICAL, comments) is not None: + return "" + fieldlist_differences = re.search(FIELDLISTS_DIFFER, comments) is not None baseline_fail = re.search(NO_COMPARE, comments) is not None real_fail = [ @@ -779,6 +788,6 @@ def get_ts_synopsis(comments): elif open_fail: synopsis = "ERROR CPRNC failed to open files" else: - synopsis = "Could not interpret CPRNC output" + synopsis = "ERROR Could not interpret CPRNC output" return synopsis diff --git a/CIME/tests/test_unit_system_tests.py b/CIME/tests/test_unit_system_tests.py index 1c05bed45be..0ada347183b 100644 --- a/CIME/tests/test_unit_system_tests.py +++ b/CIME/tests/test_unit_system_tests.py @@ -103,6 +103,7 @@ def test_check_for_memleak_runtime_error( str(caseroot), "ERIO.ne30_g16_rx1.A.docker_gnu", "mct", + "rpointer.cpl", 0.01, ) @@ -156,6 +157,7 @@ def test_check_for_memleak_not_enough_samples( str(caseroot), "ERIO.ne30_g16_rx1.A.docker_gnu", "mct", + None, 0.01, ) @@ -211,6 +213,7 @@ def test_check_for_memleak_found( str(caseroot), "ERIO.ne30_g16_rx1.A.docker_gnu", "mct", + None, 0.01, ) @@ -268,6 +271,7 @@ def test_check_for_memleak( str(caseroot), "ERIO.ne30_g16_rx1.A.docker_gnu", "mct", + None, 0.01, ) @@ -300,6 +304,7 @@ def test_compare_throughput(self, append_testlog, perf_compare_throughput_baseli str(Path(tempdir) / "caseroot"), "ERIO.ne30_g16_rx1.A.docker_gnu", "mct", + None, ) common = SystemTestsCommon(case) @@ -329,6 +334,7 @@ def test_compare_throughput_error_diff( str(Path(tempdir) / "caseroot"), "ERIO.ne30_g16_rx1.A.docker_gnu", "mct", + "rpointer.cpl.0001-01-01", ) common = SystemTestsCommon(case) @@ -358,6 +364,7 @@ def test_compare_throughput_fail( str(Path(tempdir) / "caseroot"), "ERIO.ne30_g16_rx1.A.docker_gnu", "mct", + None, ) common = SystemTestsCommon(case) @@ -388,6 +395,7 @@ def test_compare_memory(self, append_testlog, perf_compare_memory_baseline): str(caseroot), "ERIO.ne30_g16_rx1.A.docker_gnu", "mct", + "rpointer.cpl", ) common = SystemTestsCommon(case) @@ -403,7 +411,7 @@ def test_compare_memory(self, append_testlog, perf_compare_memory_baseline): @mock.patch("CIME.SystemTests.system_tests_common.perf_compare_memory_baseline") @mock.patch("CIME.SystemTests.system_tests_common.append_testlog") - def test_compare_memory_erorr_diff( + def test_compare_memory_error_diff( self, append_testlog, perf_compare_memory_baseline ): perf_compare_memory_baseline.return_value = (None, "Error diff value") @@ -417,6 +425,7 @@ def test_compare_memory_erorr_diff( str(caseroot), "ERIO.ne30_g16_rx1.A.docker_gnu", "mct", + None, ) common = SystemTestsCommon(case) @@ -429,7 +438,7 @@ def test_compare_memory_erorr_diff( @mock.patch("CIME.SystemTests.system_tests_common.perf_compare_memory_baseline") @mock.patch("CIME.SystemTests.system_tests_common.append_testlog") - def test_compare_memory_erorr_fail( + def test_compare_memory_error_fail( self, append_testlog, perf_compare_memory_baseline ): perf_compare_memory_baseline.return_value = ( @@ -446,6 +455,7 @@ def test_compare_memory_erorr_fail( str(caseroot), "ERIO.ne30_g16_rx1.A.docker_gnu", "mct", + "rpointer.cpl", ) common = SystemTestsCommon(case) @@ -469,6 +479,7 @@ def test_generate_baseline(self): str(caseroot), "ERIO.ne30_g16_rx1.A.docker_gnu", "mct", + None, str(run_dir), "case.std", str(baseline_root), @@ -500,7 +511,6 @@ def test_generate_baseline(self): common._generate_baseline() baseline_dir = baseline_root / "master" / "ERIO.ne30_g16_rx1.A.docker_gnu" - assert (baseline_dir / "cpl.log.gz").exists() assert (baseline_dir / "cpl-tput.log").exists() assert (baseline_dir / "cpl-mem.log").exists() @@ -577,6 +587,7 @@ def test_dry_run(self): "/caseroot", "SMS.f19_g16.S", "cpl", + None, "/caseroot", "SMS.f19_g16.S", ) @@ -595,6 +606,7 @@ def test_dry_run(self): "/caseroot", "SMS.f19_g16.S", "cpl", + "rpointer.cpl", "/caseroot", "SMS.f19_g16.S", ) diff --git a/CIME/tests/test_unit_xml_tests.py b/CIME/tests/test_unit_xml_tests.py index 88a9750130d..a79bb3b9c0a 100644 --- a/CIME/tests/test_unit_xml_tests.py +++ b/CIME/tests/test_unit_xml_tests.py @@ -71,6 +71,7 @@ def test_support_single_exe_error(self, _setup_cases_if_not_yet_done): f"{caseroot}", "ERP.f19_g16.S", "cpl", + None, "ERP.f19_g16.S", f"{caseroot}", "ERP.f19_g16.S", diff --git a/docker/cime.yaml b/docker/cime.yaml index b8ffad68709..f0f2871a976 100644 --- a/docker/cime.yaml +++ b/docker/cime.yaml @@ -16,12 +16,12 @@ dependencies: - openssh - lapack - blas - - libnetcdf=4.9.1=*openmpi* + - libnetcdf=4.9.2=*openmpi* - netcdf-fortran=*=*openmpi* - esmf=*=*openmpi* - - gcc_linux-64=10.* - - gxx_linux-64=10.* - - gfortran_linux-64=10.* + - gcc_linux-64=12.* + - gxx_linux-64=12.* + - gfortran_linux-64=12.* - openmpi-mpifort - gcc - gxx