Merge pull request #298 from CABLE-LSM/91-code-coverage-analysis

Code coverage intel analysis
CABLE-LSM · Jul 15, 2024 · ab1d22d · ab1d22d
2 parents a314471 + 1dcd270
commit ab1d22d
Show file tree

Hide file tree

Showing 18 changed files with 433 additions and 47 deletions.
diff --git a/docs/user_guide/config_options.md b/docs/user_guide/config_options.md
@@ -471,6 +471,15 @@ science_configurations: [
 ]
 ```
 
+## codecov
+
+: **Default:** False, _optional key. :octicons-dash-24: Specifies whether to build `benchcab` with code-coverage flags, which can then be used in post-run analysis (`benchcab gen_codecov`).
+
+```yaml
+codecov:
+  true
+```
+
 [meorg]: https://modelevaluation.org/
 [forty-two-me]: https://modelevaluation.org/experiment/display/s6k22L3WajmiS9uGv
 [five-me]: https://modelevaluation.org/experiment/display/Nb37QxkAz3FczWDd7

diff --git a/src/benchcab/benchcab.py b/src/benchcab/benchcab.py
@@ -14,6 +14,11 @@
 from benchcab import fluxsite, internal, spatial
 from benchcab.comparison import run_comparisons, run_comparisons_in_parallel
 from benchcab.config import read_config
+from benchcab.coverage import (
+    get_coverage_tasks_default,
+    run_coverage_tasks,
+    run_coverages_in_parallel,
+)
 from benchcab.environment_modules import EnvironmentModules, EnvironmentModulesInterface
 from benchcab.internal import get_met_forcing_file_names
 from benchcab.model import Model
@@ -213,6 +218,7 @@ def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None:
                 modules=config["modules"],
                 pbs_config=config["fluxsite"]["pbs"],
                 skip_bitwise_cmp="fluxsite-bitwise-cmp" in skip,
+                skip_codecov="gen_codecov" in skip or not config["codecov"],
                 verbose=is_verbose(),
                 benchcab_path=str(self.benchcab_exe_path),
             )
@@ -236,6 +242,29 @@ def fluxsite_submit_job(self, config_path: str, skip: list[str]) -> None:
         logger.info("The NetCDF output for each task is written to:")
         logger.info(f"{internal.FLUXSITE_DIRS['OUTPUT']}/<task_name>_out.nc")
 
+    def gen_codecov(self, config_path: str):
+        """Endpoint for `benchcab codecov`."""
+        logger = self._get_logger()
+        config = self._get_config(config_path)
+        self._validate_environment(project=config["project"], modules=config["modules"])
+
+        coverage_tasks = get_coverage_tasks_default(
+            models=self._get_models(config=config)
+        )
+
+        if not config["codecov"]:
+            msg = """`config.yaml` should have set `codecov: true` before building and
+            running `gen_codecov`."""
+            raise ValueError(msg)
+
+        logger.info("Running coverage tasks...")
+        if config["fluxsite"]["multiprocess"]:
+            ncpus = config["fluxsite"]["pbs"]["ncpus"]
+            run_coverages_in_parallel(coverage_tasks, n_processes=ncpus)
+        else:
+            run_coverage_tasks(coverage_tasks)
+        logger.info("Successfully ran coverage tasks")
+
     def checkout(self, config_path: str):
         """Endpoint for `benchcab checkout`."""
         logger = self._get_logger()
@@ -281,7 +310,11 @@ def build(self, config_path: str, mpi=False):
                 logger.info(
                     f"Compiling CABLE {build_mode} for realisation {repo.name}..."
                 )
-                repo.build(modules=config["modules"], mpi=mpi)
+                repo.build(
+                    modules=config["modules"],
+                    mpi=mpi,
+                    coverage=config["codecov"],
+                )
             logger.info(f"Successfully compiled CABLE for realisation {repo.name}")
 
     def fluxsite_setup_work_directory(self, config_path: str):
@@ -356,6 +389,8 @@ def fluxsite(self, config_path: str, no_submit: bool, skip: list[str]):
             self.fluxsite_run_tasks(config_path)
             if "fluxsite-bitwise-cmp" not in skip:
                 self.fluxsite_bitwise_cmp(config_path)
+            if "codecov" not in skip:
+                self.gen_codecov(config_path)
         else:
             self.fluxsite_submit_job(config_path, skip)
 

diff --git a/src/benchcab/cli.py b/src/benchcab/cli.py
@@ -244,6 +244,17 @@ def generate_parser(app: Benchcab) -> argparse.ArgumentParser:
         submissions: deletes runs/ and benchmark submission files
         all: deletes in both stages of submissions and realisations""",
     )
-
     parser_clean.set_defaults(func=app.clean)
+
+    # subcommand: 'benchcab gen_codecov"
+    parser_codecov = subparsers.add_parser(
+        "gen_codecov",
+        parents=[args_help, args_subcommand],
+        help="Runs code coverage tasks when runs are finised.",
+        description="""Uses profmerge and codecov utilties to do code coverage
+        analysis. Note: All sources must be built using Intel compiler.
+        """,
+        add_help=False,
+    )
+    parser_codecov.set_defaults(func=app.gen_codecov)
     return main_parser
diff --git a/src/benchcab/config.py b/src/benchcab/config.py
@@ -120,6 +120,8 @@ def read_optional_key(config: dict):
         "pbs", {}
     )
 
+    config["codecov"] = config.get("codecov", False)
+
 
 def read_config_file(config_path: str) -> dict:
     """Load the config file in a dict.

diff --git a/src/benchcab/coverage.py b/src/benchcab/coverage.py
@@ -0,0 +1,95 @@
+# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
+# SPDX-License-Identifier: Apache-2.0
+
+"""A module containing functions and data structures for running coverage tasks."""
+
+import multiprocessing
+import operator
+from contextlib import nullcontext
+from pathlib import Path
+from typing import Optional
+
+from benchcab import internal
+from benchcab.environment_modules import EnvironmentModules, EnvironmentModulesInterface
+from benchcab.model import Model
+from benchcab.utils import get_logger
+from benchcab.utils.fs import chdir
+from benchcab.utils.subprocess import SubprocessWrapper, SubprocessWrapperInterface
+
+
+class CoverageTask:
+    """A class used to represent a single coverage report generation task."""
+
+    subprocess_handler: SubprocessWrapperInterface = SubprocessWrapper()
+    modules_handler: EnvironmentModulesInterface = EnvironmentModules()
+
+    def __init__(
+        self,
+        coverage_dir: str,
+        project_name: Optional[str] = "CABLE",
+        dpi_file: Optional[str] = "pgopti.dpi",
+        spi_file: Optional[str] = "pgopti.spi",
+    ) -> None:
+        """Constructor.
+
+        Parameters
+        ----------
+        coverage_dir:
+            Name of directory where coverage analysis is to be done
+
+        project_name:
+            Name of project on which codecov is run
+        dpi_file:
+            name of DPI file created after merging .dyn files created after all runs
+        spi_file:
+            Static profile information on compilation
+
+        """
+        self.logger = get_logger()
+        self.coverage_dir = coverage_dir
+        self.project_name = project_name
+        self.dpi_file = dpi_file
+        self.spi_file = spi_file
+
+    def run(self) -> None:
+        """Executes `profmerge` and `codecov` to run codecov analysis for a given realisation."""
+        if not Path(self.coverage_dir).is_dir():
+            msg = f"""The coverage directory: {self.coverage_dir}
+            does not exist. Did you run the jobs and/or set `coverage: true` in `config.yaml`
+            before the building stage"""
+            raise OSError(msg)
+
+        self.logger.info(f"Generating coverage report in {self.coverage_dir}")
+
+        # Load intel-compiler in case we run from CLI, otherwise assuming
+        # PBS jobscript loads
+        with chdir(self.coverage_dir), (
+            nullcontext()
+            if self.modules_handler.module_is_loaded("intel-compiler")
+            else self.modules_handler.load([internal.DEFAULT_MODULES["intel-compiler"]])
+        ):
+            self.subprocess_handler.run_cmd(f"profmerge -prof-dpi {self.dpi_file}")
+            self.subprocess_handler.run_cmd(
+                f"codecov -prj {self.project_name} -dpi {self.dpi_file} -spi {self.spi_file}"
+            )
+
+
+def run_coverage_tasks(coverage_tasks: list[CoverageTask]) -> None:
+    """Runs coverage tasks serially."""
+    for task in coverage_tasks:
+        task.run()
+
+
+def get_coverage_tasks_default(models: list[Model]) -> list[CoverageTask]:
+    """Returns list of Coveragee Tasks setting default values for optional parameters."""
+    return [CoverageTask(model.get_coverage_dir()) for model in models]
+
+
+def run_coverages_in_parallel(
+    coverage_tasks: list[CoverageTask],
+    n_processes=internal.FLUXSITE_DEFAULT_PBS["ncpus"],
+) -> None:
+    """Runs coverage tasks in parallel across multiple processes."""
+    run_task = operator.methodcaller("run")
+    with multiprocessing.Pool(n_processes) as pool:
+        pool.map(run_task, coverage_tasks, chunksize=1)
diff --git a/src/benchcab/data/config-schema.yml b/src/benchcab/data/config-schema.yml
@@ -130,4 +130,8 @@ spatial:
         args:
           nullable: true
           type: "string"
-          required: false
+          required: false
+
+codecov:
+  type: "boolean"
+  required: false
diff --git a/src/benchcab/data/pbs_jobscript.j2 b/src/benchcab/data/pbs_jobscript.j2
@@ -16,4 +16,9 @@ module load {{module}}
 set -ev
 
 {{benchcab_path}} fluxsite-run-tasks --config={{config_path}}{{verbose_flag}}
-{% if skip_bitwise_cmp == False %}{{benchcab_path}} fluxsite-bitwise-cmp --config={{config_path}}{{verbose_flag}}{% endif %}
+{%- if skip_bitwise_cmp == False %}
+{{benchcab_path}} fluxsite-bitwise-cmp --config={{config_path}}{{verbose_flag}}
+{%- endif %}
+{%- if skip_codecov == False %}
+{{benchcab_path}} gen_codecov --config={{config_path}}{{verbose_flag}}
+{%- endif %}
diff --git a/src/benchcab/data/test/config-optional.yml b/src/benchcab/data/test/config-optional.yml
@@ -35,6 +35,8 @@ realisations:
         branch_path: branches/Users/ccc561/v3.0-YP-changes
     name: git_branch
 
+codecov:
+  true
 
 modules: [
   intel-compiler/2021.1.1,

diff --git a/src/benchcab/data/test/pbs_jobscript_no_skip_codecov.sh b/src/benchcab/data/test/pbs_jobscript_no_skip_codecov.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+#PBS -l wd
+#PBS -l ncpus=18
+#PBS -l mem=30GB
+#PBS -l walltime=6:00:00
+#PBS -q normal
+#PBS -P tm70
+#PBS -j oe
+#PBS -m e
+#PBS -l storage=gdata/ks32+gdata/hh5+gdata/wd9
+
+module purge
+module load foo
+module load bar
+module load baz
+
+set -ev
+
+/absolute/path/to/benchcab fluxsite-run-tasks --config=/path/to/config.yaml
+/absolute/path/to/benchcab fluxsite-bitwise-cmp --config=/path/to/config.yaml
+/absolute/path/to/benchcab gen_codecov --config=/path/to/config.yaml
diff --git a/...b/data/test/pbs_jobscript_skip_bitwise.sh → .../data/test/pbs_jobscript_skip_optional.sh b/...b/data/test/pbs_jobscript_skip_bitwise.sh → .../data/test/pbs_jobscript_skip_optional.sh
@@ -16,4 +16,4 @@ module load baz
 
 set -ev
 
-/absolute/path/to/benchcab fluxsite-run-tasks --config=/path/to/config.yaml
+/absolute/path/to/benchcab fluxsite-run-tasks --config=/path/to/config.yaml
diff --git a/src/benchcab/internal.py b/src/benchcab/internal.py
@@ -13,7 +13,10 @@
 CONFIG_REQUIRED_KEYS = ["realisations", "modules"]
 
 # CMake module used for compilation:
-CMAKE_MODULE = "cmake/3.24.2"
+DEFAULT_MODULES = {
+    "cmake": "cmake/3.24.2",
+    "intel-compiler": "intel-compiler/2021.10.0",
+}
 
 # Number of parallel jobs used when compiling with CMake:
 CMAKE_BUILD_PARALLEL_LEVEL = 4
@@ -55,6 +58,9 @@
 # Path CABLE grid info file
 GRID_FILE = CABLE_AUX_DIR / "offline" / "gridinfo_CSIRO_1x1.nc"
 
+# Relative path to directory that stores codecov files
+CODECOV_DIR = RUN_DIR / "coverage"
+
 # Fluxsite directory tree
 FLUXSITE_DIRS: dict[str, Path] = {}
 
@@ -247,7 +253,7 @@
 
 FLUXSITE_DEFAULT_EXPERIMENT = "forty-two-site-test"
 
-OPTIONAL_COMMANDS = ["fluxsite-bitwise-cmp"]
+OPTIONAL_COMMANDS = ["fluxsite-bitwise-cmp", "gen_codecov"]
 
 
 def get_met_forcing_file_names(experiment: str) -> list[str]: