Skip to content

Commit

Permalink
Changes towards a new run sbatch script.
Browse files Browse the repository at this point in the history
  • Loading branch information
nwieters authored and pgierz committed Feb 3, 2025
1 parent 46c5d69 commit e8cab33
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 48 deletions.
34 changes: 24 additions & 10 deletions src/esm_runscripts/batch_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
from .pbs import Pbs
from .slurm import Slurm

import pdb

known_batch_systems = ["slurm", "pbs"]
reserved_jobtypes = ["prepcompute", "compute", "prepare", "tidy", "inspect"]

Expand Down Expand Up @@ -348,7 +346,7 @@ def append_start_statement(config, subjob):
config["general"]["run_number"],
config["general"]["current_date"],
config["general"]["jobid"],
"- start",
"- start from run script",
],
timestampStr_from_Unix=True,
)
Expand All @@ -364,7 +362,7 @@ def append_done_statement(config, subjob):
config["general"]["run_number"],
config["general"]["current_date"],
config["general"]["jobid"],
"- done",
"- done from run script",
],
timestampStr_from_Unix=True,
)
Expand Down Expand Up @@ -559,7 +557,9 @@ def write_run_batch_script(config, cluster, batch_or_shell="batch"):

command = phase["run_command"]
if phase["phase_type"] == "SimulationSetup":
runfile.write("cd " + config["general"]["experiment_scripts_dir"] + "\n")
runfile.write(
"cd " + config["general"]["experiment_scripts_dir"] + "\n"
)
runfile.write(f"{command} --run-from-batch-script\n")
elif phase["phase_type"] == "compute":
runfile.write("cd " + config["general"]["thisrun_work_dir"] + "\n")
Expand Down Expand Up @@ -587,14 +587,25 @@ def write_run_batch_script(config, cluster, batch_or_shell="batch"):
runfile.write(f"{command}\n")
runfile.write("process=$!\n")
runfile.write("\n")
runfile.write("#********** Start to observe " + phase["name"] + " *************\n")
runfile.write("echo start observe >> " + config["general"]["experiment_log_file"] + "\n")
runfile.write("cd " + config["general"]["experiment_scripts_dir"] + "\n")
runfile.write(
"#********** Start to observe "
+ phase["name"]
+ " *************\n"
)
runfile.write(self.append_start_statement(config, "observe") + "\n")
runfile.write(
"cd " + config["general"]["experiment_scripts_dir"] + "\n"
)
runfile.write(f"{observe_call}\n")
runfile.write("\n")
runfile.write("wait\n")
runfile.write("echo end observe >> " + config["general"]["experiment_log_file"] + "\n")
doneline = "echo " + line + " >> " + config["general"]["experiment_log_file"]
runfile.write(self.append_done_statement(config, "observe") + "\n")
doneline = (
"echo "
+ line
+ " >> "
+ config["general"]["experiment_log_file"]
)
else:
runfile.write(f"{command}\n")
runfile.write(self.append_done_statement(config, phase["name"]) + "\n")
Expand All @@ -615,10 +626,13 @@ def write_run_batch_script(config, cluster, batch_or_shell="batch"):

run_number = config["general"].get("run_number", None)

runfile.write(self.append_start_statement(config, "restart") + "\n")
runfile.write(
f"esm_runscripts {scriptname} -e {expid} -t restart --open-run -v --no-motd --run-from-batch-script --open-run -s {current_date} -r {run_number}"
)
runfile.write("\n")
runfile.write(self.append_done_statement(config, "restart") + "\n")
runfile.write("\n")

runfile.write("\n")
runfile.write("wait\n")
Expand Down
9 changes: 7 additions & 2 deletions src/esm_runscripts/logfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@ def initialize_logfiles(config, org_jobtype):
log_stuff = False
if os.path.isdir(os.path.dirname(config["general"]["experiment_log_file"])):
if not org_jobtype == "inspect":
#if not os.path.isfile(config["general"]["experiment_log_file"]):
log_stuff = True

# Set name of logfile into config
config = set_logfile_name(config, "")

# Writes some line to the logfile defined in config.
if log_stuff:

helpers.write_to_log(
Expand All @@ -28,9 +31,10 @@ def initialize_logfiles(config, org_jobtype):
logfile_run_number,
str(config["general"]["current_date"]),
str(config["general"]["jobid"]),
"- start",
"- start in initialize_logfiles",
],
)
# Creates a logfile object/handle for stdout/phase log file
logfile = RuntimeLogger(
config["general"]["logfile_path"],
"w",
Expand All @@ -39,6 +43,7 @@ def initialize_logfiles(config, org_jobtype):
else:
logfile = sys.stdout

# Writes logfile handle into global variable
logfile_handle = logfile
return config

Expand All @@ -56,7 +61,7 @@ def finalize_logfiles(config, org_jobtype):
logfile_run_number,
str(config["general"]["current_date"]),
str(config["general"]["jobid"]),
"- done",
"- done in finalize_logfiles",
],
)

Expand Down
14 changes: 10 additions & 4 deletions src/esm_runscripts/prepexp.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,9 +355,15 @@ def initialize_experiment_logfile(config):
it_coupled_model = config["general"]["iterative_coupled_model"]
datestamp = config["general"]["run_datestamp"]

if config["general"]["run_number"] == 1:
if os.path.isfile(config["general"]["experiment_log_file"]):
os.remove(config["general"]["experiment_log_file"])
fromdir = os.path.realpath(config["general"]["started_from"])
scriptsdir = os.path.realpath(config["general"]["experiment_scripts_dir"])

# if (fromdir == scriptsdir):
# TODO: Check the next if statements
if not config["general"]["run_from_batch_script"]:
#if config["general"]["run_number"] == 1:
#if os.path.isfile(config["general"]["experiment_log_file"]):
# os.remove(config["general"]["experiment_log_file"])

log_msg = f"# Beginning of Experiment {expid}"
write_to_log(config, [log_msg], message_sep="")
Expand All @@ -369,7 +375,7 @@ def initialize_experiment_logfile(config):
str(config["general"]["run_number"]),
str(config["general"]["current_date"]),
str(config["general"]["jobid"]),
"- start",
"- start in prepexp",
],
)

Expand Down
6 changes: 3 additions & 3 deletions src/esm_runscripts/resubmit.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ def maybe_resubmit(config):
"""
task = config["general"].get("task", None)
# if task is start, restart, run_workflow -> write new *.run file
if end_of_experiment(config):
print("test")
# if end_of_experiment(config):
# print('test')
if task in ["start", "run_workflow", "restart"]:
jobtype = config["general"]["jobtype"] # current phase
workflow = config["general"]["workflow"]["object"]
Expand All @@ -259,7 +259,7 @@ def maybe_resubmit(config):

elif task in ["run_phase"]:
# check if phase type compute
print("execute a compute job")
print("execute a phase")

# TODO: Check if run from *.run file
# TODO: Create *.run file
Expand Down
60 changes: 31 additions & 29 deletions src/esm_runscripts/sim_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,41 +121,43 @@ def __call__(self, kill_after_submit=True):
# self.pseudocall(kill_after_submit)
# call to observe here..
org_jobtype = str(self.config["general"]["jobtype"])
self.config = logfiles.initialize_logfiles(self.config, org_jobtype)

# write *.run file
# submit batch script
resubmit.maybe_resubmit(self.config)

# if not check run???
# set stdout and stderr to lofile
if self.config["general"]["submitted"]:
old_stdout = sys.stdout
old_stderr = sys.stderr
sys.stdout = logfiles.logfile_handle
sys.stderr = logfiles.logfile_handle

# breakpoint()
if self.config["general"]["task"].startswith("observe"):
pid = self.config["general"]["command_line_config"].get(
"launcher_pid", -666
)
if not pid == -666:
self.observe()
else:
try:
getattr(self, self.config["general"]["jobtype"])()
except AttributeError:
print(
f"No method for jobtype {self.config['general']['jobtype']} found."
if self.config["general"]["task"] == "run_phase":
# Writes to general log file and creates a (global) logile handle to logfile for current jobtype
self.config = logfiles.initialize_logfiles(self.config, org_jobtype)

# if not check run???
# set stdout and stderr to lofile
if self.config["general"]["submitted"]:
old_stdout = sys.stdout
old_stderr = sys.stderr
sys.stdout = logfiles.logfile_handle
sys.stderr = logfiles.logfile_handle

if self.config["general"]["task"].startswith("observe"):
pid = self.config["general"]["command_line_config"].get(
"launcher_pid", -666
)

# if this line is reached, the run is submitted and running or finished
self.config = logfiles.finalize_logfiles(self.config, org_jobtype)

if self.config["general"]["submitted"]:
sys.stdout = old_stdout
sys.stderr = old_stderr
if not pid == -666:
self.observe()
else:
try:
getattr(self, self.config["general"]["jobtype"])()
except AttributeError:
print(
f"No method for jobtype {self.config['general']['jobtype']} found."
)

# if this line is reached, the run is submitted and running or finished
self.config = logfiles.finalize_logfiles(self.config, org_jobtype)

if self.config["general"]["submitted"]:
sys.stdout = old_stdout
sys.stderr = old_stderr

if kill_after_submit:
if self.config["general"].get("experiment_over", False):
Expand Down

0 comments on commit e8cab33

Please sign in to comment.