ESMCI · jedwards4b · Aug 29, 2024 · Aug 29, 2024 · Sep 3, 2024 · Sep 12, 2024
diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -39,28 +39,28 @@ jobs:
       packages: write
     steps:
       - name: Checkout code
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
+        uses: docker/setup-qemu-action@v3
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
+        uses: docker/setup-buildx-action@v3
       - name: Login to DockerHub
-        uses: docker/login-action@v2
+        uses: docker/login-action@v3
         with:
           registry: ghcr.io
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
       - name: Docker meta
         id: meta
-        uses: docker/metadata-action@v4
+        uses: docker/metadata-action@v5
         with:
           images: ghcr.io/ESMCI/cime
           tags: |
             type=raw,value=latest,enable=${{ github.event_name == 'push' }}
             type=ref,event=pr,enable=${{ github.event_name == 'pull_request' }}
             type=sha,format=long
       - name: Build and push
-        uses: docker/build-push-action@v3
+        uses: docker/build-push-action@v6
         with:
           target: base
           context: docker/
@@ -76,7 +76,7 @@ jobs:
     timeout-minutes: 2
     steps:
       - name: Checkout code
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Set up python
         uses: actions/setup-python@v2
         with:
@@ -102,7 +102,7 @@ jobs:
         python-version: ['3.8', '3.9', '3.10']
     steps:
       - name: Checkout code
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Run tests
         shell: bash
         env:
@@ -149,7 +149,7 @@ jobs:
             driver: "mct"
     steps:
       - name: Checkout code
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
       - name: Cache inputdata
         uses: actions/cache@v2
         with:
@@ -174,6 +174,14 @@ jobs:
 
           conda activate base
 
+          # container libnetcdf is 4.9.2 as cesm requires esmf >8.6.1
+          # e3sm scorpio incompatible with 4.9.2, downgrade to 4.9.1
+          # only reference found about scorpio incompatibility with 4.9.2 (https://github.com/E3SM-Project/scorpio/issues/554#issuecomment-1877361470)
+          # TODO open scorpio issue, possible solutions; 1. support two conda environments in container 2. maybe move from conda to spack? build all libraries in image
+          if [[ "${CIME_MODEL}" == "e3sm" ]]; then
+            mamba install -y 'libnetcdf=4.9.1'
+          fi
+
           pytest -vvv --cov=CIME --machine docker --no-fortran-run --no-teardown CIME/tests/test_sys*
       - uses: mxschmitt/action-tmate@v3
         if: ${{ !always() }}

diff --git a/CIME/SystemTests/eri.py b/CIME/SystemTests/eri.py
@@ -83,7 +83,7 @@ def run_phase(self):
         start_1 = run_startdate
 
         stop_n2 = stop_n - stop_n1
-        rest_n2 = int(stop_n2 / 2 + 1)
+
         hist_n = stop_n2
 
         start_1_year, start_1_month, start_1_day = [
@@ -93,10 +93,28 @@ def run_phase(self):
         start_2 = "{:04d}-{:02d}-{:02d}".format(
             start_2_year, start_1_month, start_1_day
         )
+        rest_n2 = self._set_restart_interval(
+            stop_n=stop_n2,
+            stop_option=stop_option,
+            startdate=start_2,
+            starttime=start_tod,
+        )
 
         stop_n3 = stop_n2 - rest_n2
-        rest_n3 = int(stop_n3 / 2 + 1)
 
+        ninst = self._case.get_value("NINST")
+        drvrest = "rpointer.cpl"
+        if ninst > 1:
+            drvrest += "_0001"
+        drvrest += self._rest_time
+        self._set_drv_restart_pointer(drvrest)
+
+        rest_n3 = self._set_restart_interval(
+            stop_n=stop_n3,
+            stop_option=stop_option,
+            startdate=start_2,
+            starttime=start_tod,
+        )
         stop_n4 = stop_n3 - rest_n3
 
         expect(stop_n4 >= 1 and stop_n1 >= 1, "Run length too short")
@@ -223,8 +241,10 @@ def run_phase(self):
         self._case.set_value("GET_REFCASE", False)
         self._case.set_value("CONTINUE_RUN", False)
         self._case.set_value("STOP_N", stop_n3)
-        self._case.set_value("REST_OPTION", stop_option)
-        self._case.set_value("REST_N", rest_n3)
+        self._set_restart_interval(
+            stop_n=stop_n3, startdate=refdate_3, starttime=refsec_3
+        )
+
         self._case.set_value("HIST_OPTION", stop_option)
         self._case.set_value("HIST_N", stop_n2)
         self._case.set_value("DOUT_S", False)
@@ -266,6 +286,12 @@ def run_phase(self):
         self._case.set_value("DOUT_S", False)
         self._case.set_value("HIST_OPTION", stop_option)
         self._case.set_value("HIST_N", hist_n)
+        drvrest = "rpointer.cpl"
+        if ninst > 1:
+            drvrest += "_0001"
+        drvrest += self._rest_time
+
+        self._set_drv_restart_pointer(drvrest)
         self._case.flush()
 
         # do the restart run (short term archiving is off)

diff --git a/CIME/SystemTests/ers.py b/CIME/SystemTests/ers.py
@@ -36,7 +36,13 @@ def _ers_second_phase(self):
                 pfile,
                 os.path.join(os.path.dirname(pfile), "run1." + os.path.basename(pfile)),
             )
+        ninst = self._case.get_value("NINST")
+        drvrest = "rpointer.cpl"
+        if ninst > 1:
+            drvrest += "_0001"
+        drvrest += self._rest_time
 
+        self._set_drv_restart_pointer(drvrest)
         self._case.set_value("HIST_N", stop_n)
         self._case.set_value("STOP_N", stop_new)
         self._case.set_value("CONTINUE_RUN", True)

diff --git a/CIME/SystemTests/restart_tests.py b/CIME/SystemTests/restart_tests.py
@@ -50,3 +50,9 @@ def _case_two_setup(self):
         self._case.set_value("STOP_N", stop_new)
         self._case.set_value("CONTINUE_RUN", True)
         self._case.set_value("REST_OPTION", "never")
+        ninst = self._case.get_value("NINST")
+        drvrest = "rpointer.cpl"
+        if ninst > 1:
+            drvrest += "_0001"
+        drvrest += self._rest_time
+        self._set_drv_restart_pointer(drvrest)
diff --git a/CIME/SystemTests/system_tests_common.py b/CIME/SystemTests/system_tests_common.py
@@ -37,8 +37,9 @@
     load_coupler_customization,
 )
 import CIME.build as build
+from datetime import datetime, timedelta
+import glob, gzip, time, traceback, os, math, calendar
 
-import glob, gzip, time, traceback, os, math
 from contextlib import ExitStack
 
 logger = logging.getLogger(__name__)
@@ -111,6 +112,7 @@ def __init__(
         self._init_environment(caseroot)
         self._init_locked_files(caseroot, expected)
         self._skip_pnl = False
+        self._rest_time = None
         self._cpllog = (
             "med" if self._case.get_value("COMP_INTERFACE") == "nuopc" else "cpl"
         )
@@ -119,10 +121,22 @@ def __init__(
         self._user_separate_builds = False
         self._expected_num_cmp = None
         self._rest_n = None
+        # Does the model support this variable?
+        self._drv_restart_pointer = self._case.get_value("DRV_RESTART_POINTER")
+
+    def _set_drv_restart_pointer(self, value):
+        if self._drv_restart_pointer:
+            logger.info("setting DRV_RESTART_POINTER={}".format(value))
+            self._case.set_value("DRV_RESTART_POINTER", value)
+
+    def _set_restart_interval(
+        self, stop_n=None, stop_option=None, startdate=None, starttime=None
+    ):
+        if not stop_n:
+            stop_n = self._case.get_value("STOP_N")
+        if not stop_option:
+            stop_option = self._case.get_value("STOP_OPTION")
 
-    def _set_restart_interval(self):
-        stop_n = self._case.get_value("STOP_N")
-        stop_option = self._case.get_value("STOP_OPTION")
         self._case.set_value("REST_OPTION", stop_option)
         # We need to make sure the run is long enough and to set REST_N to a
         # value that makes sense for all components
@@ -172,19 +186,69 @@ def _set_restart_interval(self):
             factor = 315360000
         else:
             expect(False, f"stop_option {stop_option} not available for this test")
-
         stop_n = int(stop_n * factor // coupling_secs)
         rest_n = math.ceil((stop_n // 2 + 1) * coupling_secs / factor)
 
         expect(stop_n > 0, "Bad STOP_N: {:d}".format(stop_n))
-
         expect(stop_n > 2, "ERROR: stop_n value {:d} too short".format(stop_n))
+        if not starttime:
+            starttime = self._case.get_value("START_TOD")
+        if not startdate:
+            startdate = self._case.get_value("RUN_STARTDATE")
+        if "-" in startdate:
+            startdatetime = datetime.fromisoformat(startdate) + timedelta(
+                seconds=int(starttime)
+            )
+        else:
+            startdatetime = datetime.strptime(startdate, "%Y%m%d") + timedelta(
+                seconds=int(starttime)
+            )
+
+        cal = self._case.get_value("CALENDAR")
+
+        if stop_option == "nsteps":
+            rtd = timedelta(seconds=rest_n * factor)
+        elif stop_option == "nminutes":
+            rtd = timedelta(minutes=rest_n)
+        elif stop_option == "nhours":
+            rtd = timedelta(hours=rest_n)
+        elif stop_option == "ndays":
+            rtd = timedelta(days=rest_n)
+        elif stop_option == "nyears":
+            rtd = timedelta(days=rest_n * 365)
+        else:
+            expect(False, f"stop_option {stop_option} not available for this test")
+
+        restdatetime = startdatetime + rtd
+        if cal == "NO_LEAP":
+            dayscorrected = 0
+            syr = startdatetime.year
+            smon = startdatetime.month
+            ryr = restdatetime.year
+            rmon = restdatetime.month
+            while ryr > syr:
+                if rmon > 2 and calendar.isleap(ryr):
+                    dayscorrected += 1
+                ryr = ryr - 1
+            if rmon > 2 and smon <= 2:
+                if calendar.isleap(syr):
+                    dayscorrected += 1
+            restdatetime = restdatetime + timedelta(days=dayscorrected)
+        self._rest_time = (
+            f".{restdatetime.year:04d}-{restdatetime.month:02d}-{restdatetime.day:02d}-"
+        )
+        h = restdatetime.hour
+        m = restdatetime.minute
+        s = restdatetime.second
+        self._rest_time += f"{(h*3600+m*60+s):05d}"
+
         logger.info(
             "doing an {0} {1} initial test with restart file at {2} {1}".format(
                 str(stop_n), stop_option, str(rest_n)
             )
         )
         self._case.set_value("REST_N", rest_n)
+
         return rest_n
 
     def _init_environment(self, caseroot):
@@ -261,7 +325,9 @@ def build(
                         sharedlib_only=(phase_name == SHAREDLIB_BUILD_PHASE),
                         model_only=(phase_name == MODEL_BUILD_PHASE),
                     )
-                except BaseException as e:  # We want KeyboardInterrupts to generate FAIL status
+                except (
+                    BaseException
+                ) as e:  # We want KeyboardInterrupts to generate FAIL status
                     success = False
                     if isinstance(e, CIMEError):
                         # Don't want to print stacktrace for a build failure since that

diff --git a/CIME/data/config/config_tests.xml b/CIME/data/config/config_tests.xml
@@ -261,7 +261,6 @@ NODEFAIL          Tests restart upon detected node failure. Generates fake failu
     <STOP_N>11</STOP_N>
     <DOUT_S>FALSE</DOUT_S>
     <FORCE_BUILD_SMP>TRUE</FORCE_BUILD_SMP>
-    <REST_N>$STOP_N / 2 + 1 </REST_N>
     <REST_OPTION>$STOP_OPTION</REST_OPTION>
     <HIST_OPTION>$STOP_OPTION</HIST_OPTION>
     <HIST_N>$STOP_N</HIST_N>
@@ -272,7 +271,6 @@ NODEFAIL          Tests restart upon detected node failure. Generates fake failu
     <INFO_DBUG>1</INFO_DBUG>
     <STOP_OPTION>ndays</STOP_OPTION>
     <STOP_N>11</STOP_N>
-    <REST_N>$STOP_N / 2 + 1</REST_N>
     <REST_OPTION>$STOP_OPTION</REST_OPTION>
     <HIST_N>$STOP_N</HIST_N>
     <HIST_OPTION>$STOP_OPTION</HIST_OPTION>
@@ -298,7 +296,6 @@ NODEFAIL          Tests restart upon detected node failure. Generates fake failu
     <INFO_DBUG>1</INFO_DBUG>
     <STOP_OPTION>ndays</STOP_OPTION>
     <STOP_N>11</STOP_N>
-    <REST_N>$STOP_N / 2 - 1</REST_N>
     <REST_OPTION>$STOP_OPTION</REST_OPTION>
     <HIST_N>$STOP_N</HIST_N>
     <HIST_OPTION>$STOP_OPTION</HIST_OPTION>

diff --git a/CIME/hist_utils.py b/CIME/hist_utils.py
@@ -37,6 +37,7 @@
 FIELDLISTS_DIFFER = "had a different field list from"
 DIFF_COMMENT = "did NOT match"
 FAILED_OPEN = "Failed to open file"
+IDENTICAL = "the two files seem to be IDENTICAL"
 # COMPARISON_COMMENT_OPTIONS should include all of the above: these are any of the special
 # comment strings that describe the reason for a comparison failure
 COMPARISON_COMMENT_OPTIONS = set(
@@ -719,6 +720,8 @@ def get_ts_synopsis(comments):
 
     >>> get_ts_synopsis('')
     ''
+    >>> get_ts_synopsis('\n')
+    ''
     >>> get_ts_synopsis('big error')
     'big error'
     >>> get_ts_synopsis('big error\n')
@@ -740,13 +743,19 @@ def get_ts_synopsis(comments):
     >>> get_ts_synopsis('file1=\nfile2=\nFailed to open file\n')
     'ERROR CPRNC failed to open files'
     >>> get_ts_synopsis('file1=\nfile2=\nSome other error\n')
-    'Could not interpret CPRNC output'
+    'ERROR Could not interpret CPRNC output'
+    >>> get_ts_synopsis('file1=\nfile2=\n  diff_test: the two files seem to be IDENTICAL \n')
+    ''
     """
     comments = comments.strip()
 
     if comments == "" or "\n" not in comments:
         return comments
 
+    # Empty synopsis when files are identicial
+    if re.search(IDENTICAL, comments) is not None:
+        return ""
+
     fieldlist_differences = re.search(FIELDLISTS_DIFFER, comments) is not None
     baseline_fail = re.search(NO_COMPARE, comments) is not None
     real_fail = [
@@ -779,6 +788,6 @@ def get_ts_synopsis(comments):
     elif open_fail:
         synopsis = "ERROR CPRNC failed to open files"
     else:
-        synopsis = "Could not interpret CPRNC output"
+        synopsis = "ERROR Could not interpret CPRNC output"
 
     return synopsis