From a5da22b8863f5a48750bbc6b418e095ce782fe8f Mon Sep 17 00:00:00 2001
From: David Huber <david.huber@noaa.gov>
Date: Thu, 19 Sep 2024 18:13:12 +0000
Subject: [PATCH] Combine get_valid_runs and get_task_names

---
 workflow/applications/applications.py      | 34 +++++---------------
 workflow/applications/gefs.py              | 10 +-----
 workflow/applications/gfs_cycled.py        | 36 +++++-----------------
 workflow/applications/gfs_forecast_only.py |  4 +--
 4 files changed, 18 insertions(+), 66 deletions(-)

diff --git a/workflow/applications/applications.py b/workflow/applications/applications.py
index ee721907e87..d262c76fa5a 100644
--- a/workflow/applications/applications.py
+++ b/workflow/applications/applications.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-from typing import Dict, List, Any
+from typing import Dict, List, Tuple, Any
 from datetime import timedelta
 from hosts import Host
 from wxflow import Configuration, to_timedelta
@@ -100,20 +100,16 @@ def _init_finalize(self, conf: Configuration):
         # Get a list of all possible config files that would be part of the application
         self.configs_names = self._get_app_configs()
 
-        # Get the list of valid runs for the configuration
-        self.runs = self.get_valid_runs()
+        # Get task names, configs, and APPs for the application
+        self.runs, self.task_names = self.get_task_names()
 
-        # Initialize the task_names, configs, and model_apps dictionaries
-        self.task_names = dict.fromkeys(self.runs)
+        # Initialize the configs and model_apps dictionaries
         self.model_apps = dict.fromkeys(self.runs)
         self.configs = dict.fromkeys(self.runs)
 
         # Now configure the experiment for each valid run
         for run in self.runs:
 
-            # Get task names, configs, and APPs for the application
-            self.task_names[run] = self.get_task_names(run)
-
             self.configs[run] = self._source_configs(conf, run=run, log=False)
 
             self.model_apps[run] = self.configs[run]['base'].get('APP', 'ATM')
@@ -183,25 +179,9 @@ def _source_configs(self, conf: Configuration, run: str = "gfs", log: bool = Tru
         return configs
 
     @abstractmethod
-    def get_valid_runs(self) -> List[str]:
-        '''
-        Create a list of RUNs for the configuation.
-
-        Parameters
-        ----------
-        None
-
-        Returns
-        -------
-        Dict[str, List[str]]: Lists of tasks for each RUN.
-
-        '''
-        pass
-
-    @abstractmethod
-    def get_task_names(self, run: str) -> Dict[str, List[str]]:
+    def get_task_names(self, run: str) -> Tuple[List[str], Dict[str, List[str]]]:
         '''
-        Create a list of task names for each RUN valid for the configuation.
+        Create a list of valid RUNs and a dict of task names for each RUN valid for the configuation.
 
         Parameters
         ----------
@@ -209,7 +189,7 @@ def get_task_names(self, run: str) -> Dict[str, List[str]]:
 
         Returns
         -------
-        Dict[str, List[str]]: Lists of tasks for each RUN.
+        Tuple[List[str], Dict[str, List[str]]]: List of valid runs and lists of all tasks for each RUN.
 
         '''
         pass
diff --git a/workflow/applications/gefs.py b/workflow/applications/gefs.py
index 3a27808045d..049b9dfc575 100644
--- a/workflow/applications/gefs.py
+++ b/workflow/applications/gefs.py
@@ -47,14 +47,6 @@ def _update_base(base_in):
 
         return base_out
 
-    def get_valid_runs(self):
-        """
-        Return the GEFS RUN.
-        """
-
-        # Only one RUN (should be gefs) is allowed as specified in __init__
-        return [self.run]
-
     def get_task_names(self, run):
 
         tasks = ['stage_ic']
@@ -92,4 +84,4 @@ def get_task_names(self, run):
 
         tasks += ['arch', 'cleanup']
 
-        return tasks
+        return [self.run], {f"{self.run}": tasks}
diff --git a/workflow/applications/gfs_cycled.py b/workflow/applications/gfs_cycled.py
index 87a6ab43e24..de1c6336b17 100644
--- a/workflow/applications/gfs_cycled.py
+++ b/workflow/applications/gfs_cycled.py
@@ -130,34 +130,15 @@ def _update_base(base_in):
 
         return GFSCycledAppConfig.get_gfs_cyc_dates(base_in)
 
-    def get_valid_runs(self):
+    def get_task_names(self):
         """
-        Get a list of valid RUNs in cycled MODE.
-        """
-
-        # The gdas run is always present for the cycled application
-        runs = ["gdas"]
-
-        # Are we running the early cycle deterministic forecast?
-        if self.gfs_cyc > 0:
-            runs.append("gfs")
-
-        # Ensembles?  Add the valid run based on eupd_runs.
-        if self.do_hybvar:
-            if 'gdas' in self.eupd_runs:
-                runs.append("enkfgdas")
-            if 'gfs' in self.eupd_runs:
-                runs.append("enkfgfs")
-
-        return runs
-
-    def get_task_names(self, run: str):
-        """
-        Get the task names for all the tasks in the cycled application.
+        Get the task names in this cycled configuration and all of the valid runs.
         Note that the order of the task names matters in the XML.
         This is the place where that order is set.
         """
 
+        runs = ["gdas"]
+
         gdas_gfs_common_tasks_before_fcst = ['prep']
         gdas_gfs_common_cleanup_tasks = ['arch', 'cleanup']
 
@@ -314,24 +295,23 @@ def get_task_names(self, run: str):
         tasks['gdas'] = gdas_tasks
 
         if self.do_hybvar and 'gdas' in self.eupd_runs:
+            runs.append("enkfgdas")
             enkfgdas_tasks = hybrid_tasks + hybrid_after_eupd_tasks
             tasks['enkfgdas'] = enkfgdas_tasks
 
         # Add RUN=gfs tasks if running early cycle
         if self.gfs_cyc > 0:
+            runs.append("gfs")
             tasks['gfs'] = gfs_tasks
 
             if self.do_hybvar and 'gfs' in self.eupd_runs:
+                runs.append("enkfgfs")
                 enkfgfs_tasks = hybrid_tasks + hybrid_after_eupd_tasks
                 enkfgfs_tasks.remove("echgres")
                 enkfgfs_tasks.remove("esnowrecen")
                 tasks['enkfgfs'] = enkfgfs_tasks
 
-        if run not in tasks:
-            raise KeyError(f"FATAL ERROR: GFS cycled experiment is not configured "
-                           f"for the input run ({run})")
-
-        return tasks[run]
+        return runs, tasks
 
     @staticmethod
     def get_gfs_cyc_dates(base: Dict[str, Any]) -> Dict[str, Any]:
diff --git a/workflow/applications/gfs_forecast_only.py b/workflow/applications/gfs_forecast_only.py
index 421d046b80c..bc751b7d17b 100644
--- a/workflow/applications/gfs_forecast_only.py
+++ b/workflow/applications/gfs_forecast_only.py
@@ -90,7 +90,7 @@ def get_valid_runs(self):
         # Only one RUN is allowed for forecast-only mode as specified in __init__
         return [self.run]
 
-    def get_task_names(self, run: str):
+    def get_task_names(self):
         """
         Get the task names for all the tasks in the forecast-only application.
         Note that the order of the task names matters in the XML.
@@ -165,4 +165,4 @@ def get_task_names(self, run: str):
 
         tasks += ['arch', 'cleanup']  # arch and cleanup **must** be the last tasks
 
-        return tasks
+        return [self.run], {f"{self.run}": tasks}