Support for Intel code-coverage analysis

CABLE-LSM · Jul 9, 2024 · ef27b5b · ef27b5b
1 parent fc625c0
commit ef27b5b
Show file tree

Hide file tree

Showing 13 changed files with 357 additions and 41 deletions.
diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md
@@ -471,6 +471,15 @@ science_configurations: [
 ]
 ```
 
+## build_option
+
+: **Default:** release, _optional key. :octicons-dash-24: Specifies build type (`Debug`, `Release`), along with additional flags depending on required post-run analysis. Supported options are: `debug`, `debug-codecov`, `release`.
+
+```yaml
+build_option:
+  release
+```
+
 [meorg]: https://modelevaluation.org/
 [forty-two-me]: https://modelevaluation.org/experiment/display/s6k22L3WajmiS9uGv
 [five-me]: https://modelevaluation.org/experiment/display/Nb37QxkAz3FczWDd7

diff --git a/src/benchcab/benchcab.py b/src/benchcab/benchcab.py
@@ -14,6 +14,11 @@
 from benchcab import fluxsite, internal, spatial
 from benchcab.comparison import run_comparisons, run_comparisons_in_parallel
 from benchcab.config import read_config
+from benchcab.coverage import (
+    get_coverage_tasks_default,
+    run_coverage_tasks,
+    run_coverages_in_parallel,
+)
 from benchcab.environment_modules import EnvironmentModules, EnvironmentModulesInterface
 from benchcab.internal import get_met_forcing_file_names
 from benchcab.model import Model
@@ -226,6 +231,23 @@ def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None:
         logger.info("The NetCDF output for each task is written to:")
         logger.info(f"{internal.FLUXSITE_DIRS['OUTPUT']}/<task_name>_out.nc")
 
+    def codecov(self, config_path: str):
+        """Endpoint for `benchcab codecov`."""
+        logger = self._get_logger()
+        config = self._get_config(config_path)
+        self._validate_environment(project=config["project"], modules=config["modules"])
+
+        coverage_tasks = get_coverage_tasks_default(
+            models=self._get_models(config=config)
+        )
+        logger.info("Running coverage tasks...")
+        if config["fluxsite"]["multiprocess"]:
+            ncpus = config["fluxsite"]["pbs"]["ncpus"]
+            run_coverages_in_parallel(coverage_tasks, n_processes=ncpus)
+        else:
+            run_coverage_tasks(coverage_tasks)
+        logger.info("Successfully ran coverage tasks")
+
     def checkout(self, config_path: str):
         """Endpoint for `benchcab checkout`."""
         logger = self._get_logger()
@@ -271,7 +293,11 @@ def build(self, config_path: str, mpi=False):
                 logger.info(
                     f"Compiling CABLE {build_mode} for realisation {repo.name}..."
                 )
-                repo.build(modules=config["modules"], mpi=mpi)
+                repo.build(
+                    modules=config["modules"],
+                    mpi=mpi,
+                    build_option=config["build_option"],
+                )
             logger.info(f"Successfully compiled CABLE for realisation {repo.name}")
 
     def fluxsite_setup_work_directory(self, config_path: str):

diff --git a/src/benchcab/cli.py b/src/benchcab/cli.py
@@ -244,6 +244,17 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser:
         submissions: deletes runs/ and benchmark submission files
         all: deletes in both stages of submissions and realisations""",
     )
-
     parser_clean.set_defaults(func=app.clean)
+
+    # subcommand: 'benchcab codecov"
+    parser_codecov = subparsers.add_parser(
+        "codecov",
+        parents=[args_help, args_subcommand],
+        help="Runs code coverage if runs are finised.",
+        description="""Uses profmerge and codecov utilties to do code coverage
+        analysis. Note: All sources must be built using Intel compiler.
+        """,
+        add_help=False,
+    )
+    parser_codecov.set_defaults(func=app.codecov)
     return main_parser
diff --git a/src/benchcab/config.py b/src/benchcab/config.py
@@ -120,6 +120,8 @@ def read_optional_key(config: dict):
         "pbs", {}
     )
 
+    config["build_option"] = config.get("build_option", internal.DEFAULT_BUILD_TYPE)
+
 
 def read_config_file(config_path: str) -> dict:
     """Load the config file in a dict.

diff --git a/src/benchcab/coverage.py b/src/benchcab/coverage.py
@@ -0,0 +1,81 @@
+# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: Apache-2.0
+
+"""A module containing functions and data structures for running coverage tasks."""
+
+import multiprocessing
+import operator
+from typing import Optional
+
+from benchcab import internal
+from benchcab.environment_modules import EnvironmentModules, EnvironmentModulesInterface
+from benchcab.model import Model
+from benchcab.utils import get_logger
+from benchcab.utils.fs import chdir
+from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface
+
+
+class CoverageTask:
+    """A class used to represent a single bitwise coverage task."""
+
+    subprocess_handler: SubprocessWrapperInterface = SubprocessWrapper()
+    modules_handler: EnvironmentModulesInterface = EnvironmentModules()
+
+    def __init__(
+        self,
+        coverage_dir: str,
+        project_name: Optional[str] = "CABLE",
+        dpi_file: Optional[str] = "pgopti.dpi",
+        spi_file: Optional[str] = "pgopti.spi",
+    ) -> None:
+        """Constructor.
+
+        Parameters
+        ----------
+        coverage_dir:
+            Name of directory where coverage analysis is to be done
+
+        project_name:
+            Name of project on which codecov is run
+        dpi_file:
+            name of DPI file created after merging .dyn files created after all runs
+        spi_file:
+            Static profile information on compilation
+
+        """
+        self.logger = get_logger()
+        self.coverage_dir = coverage_dir
+        self.project_name = project_name
+        self.dpi_file = dpi_file
+        self.spi_file = spi_file
+
+    def run(self) -> None:
+        """Executes `profmerge` and `codecov` to run codecov analysis for a given realisation."""
+        with chdir(self.coverage_dir), self.modules_handler.load(
+            [internal.DEFAULT_MODULES["intel-compiler"]]
+        ):
+            self.subprocess_handler.run_cmd(f"profmerge -prof-dpi {self.dpi_file}")
+            self.subprocess_handler.run_cmd(
+                f"codecov -prj {self.project_name} -dpi {self.dpi_file} -spi {self.spi_file}"
+            )
+
+
+def run_coverage_tasks(coverage_tasks: list[CoverageTask]) -> None:
+    """Runs coverage tasks serially."""
+    for task in coverage_tasks:
+        task.run()
+
+
+def get_coverage_tasks_default(models: list[Model]) -> list[CoverageTask]:
+    """Returns list of Coveragee Tasks setting default values for optional parameters."""
+    return [CoverageTask(model.get_coverage_dir()) for model in models]
+
+
+def run_coverages_in_parallel(
+    coverage_tasks: list[CoverageTask],
+    n_processes=internal.FLUXSITE_DEFAULT_PBS["ncpus"],
+) -> None:
+    """Runs coverage tasks in parallel across multiple processes."""
+    run_task = operator.methodcaller("run")
+    with multiprocessing.Pool(n_processes) as pool:
+        pool.map(run_task, coverage_tasks, chunksize=1)
diff --git a/src/benchcab/data/config-schema.yml b/src/benchcab/data/config-schema.yml
@@ -130,4 +130,13 @@ spatial:
         args:
           nullable: true
           type: "string"
-          required: false
+          required: false
+
+build_option:
+  type: "string"
+  required: false
+  allowed: [
+    "debug",
+    "debug-codecov",
+    "release",
+  ]
diff --git a/src/benchcab/data/test/config-optional.yml b/src/benchcab/data/test/config-optional.yml
@@ -35,6 +35,8 @@ realisations:
         branch_path: branches/Users/ccc561/v3.0-YP-changes
     name: git_branch
 
+build_option:
+  debug-codecov
 
 modules: [
   intel-compiler/2021.1.1,

diff --git a/src/benchcab/internal.py b/src/benchcab/internal.py
@@ -13,7 +13,10 @@
 CONFIG_REQUIRED_KEYS = ["realisations", "modules"]
 
 # CMake module used for compilation:
-CMAKE_MODULE = "cmake/3.24.2"
+DEFAULT_MODULES = {
+    "cmake": "cmake/3.24.2",
+    "intel-compiler": "intel-compiler/2021.10.0",
+}
 
 # Number of parallel jobs used when compiling with CMake:
 CMAKE_BUILD_PARALLEL_LEVEL = 4
@@ -51,6 +54,9 @@
 # Path CABLE grid info file
 GRID_FILE = CABLE_AUX_DIR / "offline" / "gridinfo_CSIRO_1x1.nc"
 
+# Relative path to directory that stores codecov files
+CODECOV_DIR = RUN_DIR / "coverage"
+
 # Fluxsite directory tree
 FLUXSITE_DIRS: dict[str, Path] = {}
 
@@ -142,6 +148,8 @@
     "science/pop/*90",
 ]
 
+DEFAULT_BUILD_TYPE = "release"
+
 # Contains the default science configurations used to run the CABLE test suite
 # (when a science config file is not provided by the user)
 DEFAULT_SCIENCE_CONFIGURATIONS = [

diff --git a/src/benchcab/model.py b/src/benchcab/model.py
@@ -13,7 +13,7 @@
 from benchcab import internal
 from benchcab.environment_modules import EnvironmentModules, EnvironmentModulesInterface
 from benchcab.utils import get_logger
-from benchcab.utils.fs import chdir, prepend_path
+from benchcab.utils.fs import chdir, mkdir, prepend_path
 from benchcab.utils.repo import GitRepo, LocalRepo, Repo
 from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface
 
@@ -82,6 +82,36 @@ def model_id(self) -> int:
     def model_id(self, value: int):
         self._model_id = value
 
+    def get_coverage_dir(self) -> Path:
+        """Get absolute path for code coverage analysis."""
+        return (internal.CODECOV_DIR / f"R{self.model_id}").absolute()
+
+    def get_build_flags(self, mpi: bool, build_option: str, compiler_id: str) -> dict:
+        """Get flags for CMake build."""
+        build_flags = {}
+
+        build_flags["build_type"] = "Debug" if "debug" in build_option else "Release"
+        build_flags["mpi"] = "ON" if mpi else "OFF"
+
+        build_flags["flags_init"] = ""
+        if "codecov" in build_option:
+            if compiler_id != "ifort":
+                msg = f"""For code coverage, the only supported compiler is `ifort`
+                User has {compiler_id} in their environment"""
+                raise RuntimeError(msg)
+
+            codecov_dir = self.get_coverage_dir()
+
+            self.logger.info("Building with Intel code coverage using PGO")
+
+            # `ifort` checks for pre-existing profile directories before compilation
+            mkdir(codecov_dir, parents=True, exist_ok=True)
+
+            self.logger.debug(f"Analysis directory set as {codecov_dir}")
+            build_flags["flags_init"] += f'"-prof-gen=srcpos -prof-dir={codecov_dir}"'
+
+        return build_flags
+
     def get_exe_path(self, mpi=False) -> Path:
         """Return the path to the built executable."""
         exe = internal.CABLE_MPI_EXE if mpi else internal.CABLE_EXE
@@ -118,46 +148,66 @@ def custom_build(self, modules: list[str]):
         with chdir(build_script_path.parent), self.modules_handler.load(modules):
             self.subprocess_handler.run_cmd(f"./{tmp_script_path.name}")
 
-    def build(self, modules: list[str], mpi=False):
+    def build(self, modules: list[str], mpi: bool, build_option: str):
         """Build CABLE with CMake."""
         path_to_repo = internal.SRC_DIR / self.name
-        cmake_args = [
-            "-DCMAKE_BUILD_TYPE=Release",
-            "-DCABLE_MPI=" + ("ON" if mpi else "OFF"),
-        ]
-        with chdir(path_to_repo), self.modules_handler.load(
-            [internal.CMAKE_MODULE, *modules]
-        ):
-            env = os.environ.copy()
-
-            # This is required to prevent CMake from finding the conda
-            # installation of netcdf-fortran (#279):
-            env.pop("LDFLAGS", None)
-
-            # This is required to prevent CMake from finding MPI libraries in
-            # the conda environment (#279):
-            env.pop("CMAKE_PREFIX_PATH", None)
-
-            # This is required so that the netcdf-fortran library is discoverable by
-            # pkg-config:
-            prepend_path(
-                "PKG_CONFIG_PATH", f"{env['NETCDF_BASE']}/lib/Intel/pkgconfig", env=env
+
+        with self.modules_handler.load([internal.DEFAULT_MODULES["cmake"], *modules]):
+
+            # $FC is loaded after compiler module is loaded,
+            # but we need runs/ dir relative to project rootdir
+            env_fc = os.environ.get("FC", "")
+            self.logger.debug(
+                f"Getting environment variable for compiler $FC = {env_fc}"
             )
+            build_flags = self.get_build_flags(mpi, build_option, env_fc)
+            env_fc = None
+
+            with chdir(path_to_repo):
+                env = os.environ.copy()
+
+                cmake_args = [
+                    f"-DCABLE_MPI={build_flags['mpi']}"
+                    f"-DCMAKE_BUILD_TYPE={build_flags['build_type']}",
+                    f"-DCMAKE_Fortran_FLAGS_INIT={build_flags['flags_init']}",
+                ]
+
+                # This is required to prevent CMake from finding the conda
+                # installation of netcdf-fortran (#279):
+                env.pop("LDFLAGS", None)
+
+                # This is required to prevent CMake from finding MPI libraries in
+                # the conda environment (#279):
+                env.pop("CMAKE_PREFIX_PATH", None)
 
-            if self.modules_handler.module_is_loaded("openmpi"):
-                # This is required so that the openmpi MPI libraries are discoverable
-                # via CMake's `find_package` mechanism:
+                # This is required so that the netcdf-fortran library is discoverable by
+                # pkg-config:
                 prepend_path(
-                    "CMAKE_PREFIX_PATH", f"{env['OPENMPI_BASE']}/include/Intel", env=env
+                    "PKG_CONFIG_PATH",
+                    f"{env['NETCDF_BASE']}/lib/Intel/pkgconfig",
+                    env=env,
                 )
 
-            env["CMAKE_BUILD_PARALLEL_LEVEL"] = str(internal.CMAKE_BUILD_PARALLEL_LEVEL)
+                if self.modules_handler.module_is_loaded("openmpi"):
+                    # This is required so that the openmpi MPI libraries are discoverable
+                    # via CMake's `find_package` mechanism:
+                    prepend_path(
+                        "CMAKE_PREFIX_PATH",
+                        f"{env['OPENMPI_BASE']}/include/Intel",
+                        env=env,
+                    )
+
+                env["CMAKE_BUILD_PARALLEL_LEVEL"] = str(
+                    internal.CMAKE_BUILD_PARALLEL_LEVEL
+                )
 
-            self.subprocess_handler.run_cmd(
-                "cmake -S . -B build " + " ".join(cmake_args), env=env
-            )
-            self.subprocess_handler.run_cmd("cmake --build build ", env=env)
-            self.subprocess_handler.run_cmd("cmake --install build --prefix .", env=env)
+                self.subprocess_handler.run_cmd(
+                    "cmake -S . -B build " + " ".join(cmake_args), env=env
+                )
+                self.subprocess_handler.run_cmd("cmake --build build ", env=env)
+                self.subprocess_handler.run_cmd(
+                    "cmake --install build --prefix .", env=env
+                )
 
 
 def remove_module_lines(file_path: Path) -> None:

diff --git a/src/benchcab/utils/pbs.py b/src/benchcab/utils/pbs.py
@@ -3,7 +3,7 @@
 
 """Contains helper functions for manipulating PBS job scripts."""
 
-from typing import TypedDict
+from typing import Optional, TypedDict
 
 from benchcab.utils import interpolate_file_template
 
@@ -23,8 +23,8 @@ def render_job_script(
     modules: list,
     benchcab_path: str,
     pbs_config: PBSConfig,
-    verbose=False,
-    skip_bitwise_cmp=False,
+    verbose: Optional[bool] = False,
+    skip_bitwise_cmp: Optional[bool] = False,
 ) -> str:
     """Returns the text for a PBS job script that executes all computationally expensive commands.