Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add timestamp to rpointers #4689

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
82e019e
add timestep to rpointer files
jedwards4b Aug 29, 2024
25968d9
Merge remote-tracking branch 'origin/master' into add_timestamp_to_rp…
jedwards4b Aug 29, 2024
3172762
work in progress
jedwards4b Sep 3, 2024
9ed839f
improve name of rpointer
jedwards4b Sep 12, 2024
8aa960b
better test logic for rpointers
jedwards4b Sep 12, 2024
504a36b
fix eri test
jedwards4b Sep 13, 2024
403d693
remove unused import
jedwards4b Sep 24, 2024
15530e6
It is actually used - case object spans multiple files
jedwards4b Sep 25, 2024
d879bf1
Fixes catch all to be an error
jasonb5 Sep 27, 2024
fac7f46
Fixes checking if synopsis is required
jasonb5 Sep 27, 2024
e07befb
Updates workflow action versions
jasonb5 Sep 27, 2024
9e0e5c5
fix issue with translating date string to datetime format
jedwards4b Sep 30, 2024
0ef28fc
fix issue with translating date string to datetime format
jedwards4b Sep 30, 2024
a3e1faf
remove cdeps cime_config file
jedwards4b Oct 1, 2024
4338a77
correct NO_LEAP correction
jedwards4b Oct 1, 2024
de9d555
Updates compilers, libnetcdf to bump esmf to greater than 4.6.1
jasonb5 Oct 2, 2024
c31db27
Downgrades libnetcdf for e3sm
jasonb5 Oct 2, 2024
4dd2b64
fix eri test setting of DRV_RESTART_POINTER
jedwards4b Oct 2, 2024
731e440
fix eri test setting of DRV_RESTART_POINTER and merge in fix_tx_synopsis
jedwards4b Oct 2, 2024
89ee9d3
allow for DRV_RESTART_POINTER to be undefined
jedwards4b Oct 3, 2024
93859d1
update unit tests
jedwards4b Oct 3, 2024
ffe42e2
fix test_unit_xml_tests
jedwards4b Oct 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,28 +39,28 @@ jobs:
packages: write
steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
uses: docker/login-action@v2
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Docker meta
id: meta
uses: docker/metadata-action@v4
uses: docker/metadata-action@v5
with:
images: ghcr.io/ESMCI/cime
tags: |
type=raw,value=latest,enable=${{ github.event_name == 'push' }}
type=ref,event=pr,enable=${{ github.event_name == 'pull_request' }}
type=sha,format=long
- name: Build and push
uses: docker/build-push-action@v3
uses: docker/build-push-action@v6
with:
target: base
context: docker/
Expand All @@ -76,7 +76,7 @@ jobs:
timeout-minutes: 2
steps:
- name: Checkout code
uses: actions/checkout@v2
uses: actions/checkout@v3
- name: Set up python
uses: actions/setup-python@v2
with:
Expand All @@ -102,7 +102,7 @@ jobs:
python-version: ['3.8', '3.9', '3.10']
steps:
- name: Checkout code
uses: actions/checkout@v2
uses: actions/checkout@v3
- name: Run tests
shell: bash
env:
Expand Down Expand Up @@ -149,7 +149,7 @@ jobs:
driver: "mct"
steps:
- name: Checkout code
uses: actions/checkout@v2
uses: actions/checkout@v3
- name: Cache inputdata
uses: actions/cache@v2
with:
Expand All @@ -174,6 +174,14 @@ jobs:

conda activate base

# container libnetcdf is 4.9.2 as cesm requires esmf >8.6.1
# e3sm scorpio incompatible with 4.9.2, downgrade to 4.9.1
# only reference found about scorpio incompatibility with 4.9.2 (https://github.com/E3SM-Project/scorpio/issues/554#issuecomment-1877361470)
# TODO open scorpio issue, possible solutions; 1. support two conda environments in container 2. maybe move from conda to spack? build all libraries in image
if [[ "${CIME_MODEL}" == "e3sm" ]]; then
mamba install -y 'libnetcdf=4.9.1'
fi

pytest -vvv --cov=CIME --machine docker --no-fortran-run --no-teardown CIME/tests/test_sys*
- uses: mxschmitt/action-tmate@v3
if: ${{ !always() }}
Expand Down
34 changes: 30 additions & 4 deletions CIME/SystemTests/eri.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def run_phase(self):
start_1 = run_startdate

stop_n2 = stop_n - stop_n1
rest_n2 = int(stop_n2 / 2 + 1)

hist_n = stop_n2

start_1_year, start_1_month, start_1_day = [
Expand All @@ -93,10 +93,28 @@ def run_phase(self):
start_2 = "{:04d}-{:02d}-{:02d}".format(
start_2_year, start_1_month, start_1_day
)
rest_n2 = self._set_restart_interval(
stop_n=stop_n2,
stop_option=stop_option,
startdate=start_2,
starttime=start_tod,
)

stop_n3 = stop_n2 - rest_n2
rest_n3 = int(stop_n3 / 2 + 1)

ninst = self._case.get_value("NINST")
drvrest = "rpointer.cpl"
if ninst > 1:
drvrest += "_0001"
drvrest += self._rest_time
self._set_drv_restart_pointer(drvrest)

rest_n3 = self._set_restart_interval(
stop_n=stop_n3,
stop_option=stop_option,
startdate=start_2,
starttime=start_tod,
)
stop_n4 = stop_n3 - rest_n3

expect(stop_n4 >= 1 and stop_n1 >= 1, "Run length too short")
Expand Down Expand Up @@ -223,8 +241,10 @@ def run_phase(self):
self._case.set_value("GET_REFCASE", False)
self._case.set_value("CONTINUE_RUN", False)
self._case.set_value("STOP_N", stop_n3)
self._case.set_value("REST_OPTION", stop_option)
self._case.set_value("REST_N", rest_n3)
self._set_restart_interval(
stop_n=stop_n3, startdate=refdate_3, starttime=refsec_3
)

self._case.set_value("HIST_OPTION", stop_option)
self._case.set_value("HIST_N", stop_n2)
self._case.set_value("DOUT_S", False)
Expand Down Expand Up @@ -266,6 +286,12 @@ def run_phase(self):
self._case.set_value("DOUT_S", False)
self._case.set_value("HIST_OPTION", stop_option)
self._case.set_value("HIST_N", hist_n)
drvrest = "rpointer.cpl"
if ninst > 1:
drvrest += "_0001"
drvrest += self._rest_time

self._set_drv_restart_pointer(drvrest)
self._case.flush()

# do the restart run (short term archiving is off)
Expand Down
6 changes: 6 additions & 0 deletions CIME/SystemTests/ers.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,13 @@ def _ers_second_phase(self):
pfile,
os.path.join(os.path.dirname(pfile), "run1." + os.path.basename(pfile)),
)
ninst = self._case.get_value("NINST")
drvrest = "rpointer.cpl"
if ninst > 1:
drvrest += "_0001"
drvrest += self._rest_time

self._set_drv_restart_pointer(drvrest)
self._case.set_value("HIST_N", stop_n)
self._case.set_value("STOP_N", stop_new)
self._case.set_value("CONTINUE_RUN", True)
Expand Down
6 changes: 6 additions & 0 deletions CIME/SystemTests/restart_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,9 @@ def _case_two_setup(self):
self._case.set_value("STOP_N", stop_new)
self._case.set_value("CONTINUE_RUN", True)
self._case.set_value("REST_OPTION", "never")
ninst = self._case.get_value("NINST")
drvrest = "rpointer.cpl"
if ninst > 1:
drvrest += "_0001"
drvrest += self._rest_time
self._set_drv_restart_pointer(drvrest)
80 changes: 73 additions & 7 deletions CIME/SystemTests/system_tests_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@
load_coupler_customization,
)
import CIME.build as build
from datetime import datetime, timedelta
import glob, gzip, time, traceback, os, math, calendar

import glob, gzip, time, traceback, os, math
from contextlib import ExitStack

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -111,6 +112,7 @@ def __init__(
self._init_environment(caseroot)
self._init_locked_files(caseroot, expected)
self._skip_pnl = False
self._rest_time = None
self._cpllog = (
"med" if self._case.get_value("COMP_INTERFACE") == "nuopc" else "cpl"
)
Expand All @@ -119,10 +121,22 @@ def __init__(
self._user_separate_builds = False
self._expected_num_cmp = None
self._rest_n = None
# Does the model support this variable?
self._drv_restart_pointer = self._case.get_value("DRV_RESTART_POINTER")

def _set_drv_restart_pointer(self, value):
if self._drv_restart_pointer:
logger.info("setting DRV_RESTART_POINTER={}".format(value))
self._case.set_value("DRV_RESTART_POINTER", value)

def _set_restart_interval(
self, stop_n=None, stop_option=None, startdate=None, starttime=None
):
if not stop_n:
stop_n = self._case.get_value("STOP_N")
if not stop_option:
stop_option = self._case.get_value("STOP_OPTION")

def _set_restart_interval(self):
stop_n = self._case.get_value("STOP_N")
stop_option = self._case.get_value("STOP_OPTION")
self._case.set_value("REST_OPTION", stop_option)
# We need to make sure the run is long enough and to set REST_N to a
# value that makes sense for all components
Expand Down Expand Up @@ -172,19 +186,69 @@ def _set_restart_interval(self):
factor = 315360000
else:
expect(False, f"stop_option {stop_option} not available for this test")

stop_n = int(stop_n * factor // coupling_secs)
rest_n = math.ceil((stop_n // 2 + 1) * coupling_secs / factor)

expect(stop_n > 0, "Bad STOP_N: {:d}".format(stop_n))

expect(stop_n > 2, "ERROR: stop_n value {:d} too short".format(stop_n))
if not starttime:
starttime = self._case.get_value("START_TOD")
if not startdate:
startdate = self._case.get_value("RUN_STARTDATE")
if "-" in startdate:
startdatetime = datetime.fromisoformat(startdate) + timedelta(
seconds=int(starttime)
)
else:
startdatetime = datetime.strptime(startdate, "%Y%m%d") + timedelta(
seconds=int(starttime)
)

cal = self._case.get_value("CALENDAR")

if stop_option == "nsteps":
rtd = timedelta(seconds=rest_n * factor)
elif stop_option == "nminutes":
rtd = timedelta(minutes=rest_n)
elif stop_option == "nhours":
rtd = timedelta(hours=rest_n)
elif stop_option == "ndays":
rtd = timedelta(days=rest_n)
elif stop_option == "nyears":
rtd = timedelta(days=rest_n * 365)
else:
expect(False, f"stop_option {stop_option} not available for this test")

restdatetime = startdatetime + rtd
if cal == "NO_LEAP":
dayscorrected = 0
syr = startdatetime.year
smon = startdatetime.month
ryr = restdatetime.year
rmon = restdatetime.month
while ryr > syr:
if rmon > 2 and calendar.isleap(ryr):
dayscorrected += 1
ryr = ryr - 1
if rmon > 2 and smon <= 2:
if calendar.isleap(syr):
dayscorrected += 1
restdatetime = restdatetime + timedelta(days=dayscorrected)
self._rest_time = (
f".{restdatetime.year:04d}-{restdatetime.month:02d}-{restdatetime.day:02d}-"
)
h = restdatetime.hour
m = restdatetime.minute
s = restdatetime.second
self._rest_time += f"{(h*3600+m*60+s):05d}"

logger.info(
"doing an {0} {1} initial test with restart file at {2} {1}".format(
str(stop_n), stop_option, str(rest_n)
)
)
self._case.set_value("REST_N", rest_n)

return rest_n

def _init_environment(self, caseroot):
Expand Down Expand Up @@ -261,7 +325,9 @@ def build(
sharedlib_only=(phase_name == SHAREDLIB_BUILD_PHASE),
model_only=(phase_name == MODEL_BUILD_PHASE),
)
except BaseException as e: # We want KeyboardInterrupts to generate FAIL status
except (
BaseException
) as e: # We want KeyboardInterrupts to generate FAIL status
success = False
if isinstance(e, CIMEError):
# Don't want to print stacktrace for a build failure since that
Expand Down
3 changes: 0 additions & 3 deletions CIME/data/config/config_tests.xml
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,6 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu
<STOP_N>11</STOP_N>
<DOUT_S>FALSE</DOUT_S>
<FORCE_BUILD_SMP>TRUE</FORCE_BUILD_SMP>
<REST_N>$STOP_N / 2 + 1 </REST_N>
<REST_OPTION>$STOP_OPTION</REST_OPTION>
<HIST_OPTION>$STOP_OPTION</HIST_OPTION>
<HIST_N>$STOP_N</HIST_N>
Expand All @@ -272,7 +271,6 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu
<INFO_DBUG>1</INFO_DBUG>
<STOP_OPTION>ndays</STOP_OPTION>
<STOP_N>11</STOP_N>
<REST_N>$STOP_N / 2 + 1</REST_N>
<REST_OPTION>$STOP_OPTION</REST_OPTION>
<HIST_N>$STOP_N</HIST_N>
<HIST_OPTION>$STOP_OPTION</HIST_OPTION>
Expand All @@ -298,7 +296,6 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu
<INFO_DBUG>1</INFO_DBUG>
<STOP_OPTION>ndays</STOP_OPTION>
<STOP_N>11</STOP_N>
<REST_N>$STOP_N / 2 - 1</REST_N>
<REST_OPTION>$STOP_OPTION</REST_OPTION>
<HIST_N>$STOP_N</HIST_N>
<HIST_OPTION>$STOP_OPTION</HIST_OPTION>
Expand Down
13 changes: 11 additions & 2 deletions CIME/hist_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
FIELDLISTS_DIFFER = "had a different field list from"
DIFF_COMMENT = "did NOT match"
FAILED_OPEN = "Failed to open file"
IDENTICAL = "the two files seem to be IDENTICAL"
# COMPARISON_COMMENT_OPTIONS should include all of the above: these are any of the special
# comment strings that describe the reason for a comparison failure
COMPARISON_COMMENT_OPTIONS = set(
Expand Down Expand Up @@ -719,6 +720,8 @@ def get_ts_synopsis(comments):

>>> get_ts_synopsis('')
''
>>> get_ts_synopsis('\n')
''
>>> get_ts_synopsis('big error')
'big error'
>>> get_ts_synopsis('big error\n')
Expand All @@ -740,13 +743,19 @@ def get_ts_synopsis(comments):
>>> get_ts_synopsis('file1=\nfile2=\nFailed to open file\n')
'ERROR CPRNC failed to open files'
>>> get_ts_synopsis('file1=\nfile2=\nSome other error\n')
'Could not interpret CPRNC output'
'ERROR Could not interpret CPRNC output'
>>> get_ts_synopsis('file1=\nfile2=\n diff_test: the two files seem to be IDENTICAL \n')
''
"""
comments = comments.strip()

if comments == "" or "\n" not in comments:
return comments

# Empty synopsis when files are identicial
if re.search(IDENTICAL, comments) is not None:
return ""

fieldlist_differences = re.search(FIELDLISTS_DIFFER, comments) is not None
baseline_fail = re.search(NO_COMPARE, comments) is not None
real_fail = [
Expand Down Expand Up @@ -779,6 +788,6 @@ def get_ts_synopsis(comments):
elif open_fail:
synopsis = "ERROR CPRNC failed to open files"
else:
synopsis = "Could not interpret CPRNC output"
synopsis = "ERROR Could not interpret CPRNC output"

return synopsis
Loading
Loading