diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py
index 6e6d76f4c..2480bd53e 100644
--- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py
+++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py
@@ -708,3 +708,72 @@ def _df_to_rec(rec_dict, mod_name, rec_index_of_interest=None):
rec_arr.rank = counters_df.iloc[rec_index_of_interest, 1]
buf = rec_arr.tobytes()
return buf
+
+
+@functools.lru_cache()
+def log_get_derived_metrics(log_path: str, mod_name: str):
+ """
+ Returns the darshan_derived_metrics struct from CFFI/C accumulator code.
+
+ Parameters:
+ log_path: Path to the darshan log file
+ mod_name: The name of the module to retrieve derived metrics for
+
+ Returns:
+ darshan_derived_metrics struct (cdata object)
+ """
+ # TODO: eventually add support for i.e., a regex filter on the records
+ # the user wants to get derived metrics for--like filtering to records
+ # with a single filename involved before accumulating the data?
+ log_handle = log_open(log_path)
+ jobrec = ffi.new("struct darshan_job *")
+ libdutil.darshan_log_get_job(log_handle['handle'], jobrec)
+ modules = log_get_modules(log_handle)
+
+ if mod_name not in modules:
+ raise ValueError(f"{mod_name} is not in the available log file "
+ f"modules: {modules.keys()}")
+
+ mod_type = _structdefs[mod_name]
+ darshan_accumulator = ffi.new("darshan_accumulator *")
+ r = libdutil.darshan_accumulator_create(modules[mod_name]['idx'],
+ jobrec[0].nprocs,
+ darshan_accumulator)
+ if r != 0:
+ raise RuntimeError("A nonzero exit code was received from "
+ "darshan_accumulator_create() at the C level. "
+ f"This could mean that the {mod_name} module does not "
+ "support derived metric calculation, or that "
+ "another kind of error occurred. It may be possible "
+ "to retrieve additional information from the stderr "
+ "stream.")
+
+ buf = ffi.new("void **")
+ r = 1
+ while r >= 1:
+ r = libdutil.darshan_log_get_record(log_handle['handle'], modules[mod_name]['idx'], buf)
+ if r < 1:
+ break
+ rbuf = ffi.cast(mod_type, buf)
+ r_i = libdutil.darshan_accumulator_inject(darshan_accumulator[0], rbuf[0], 1)
+ if r_i != 0:
+ libdutil.darshan_free(buf[0])
+ raise RuntimeError("A nonzero exit code was received from "
+ "darshan_accumulator_inject() at the C level. "
+ "It may be possible "
+ "to retrieve additional information from the stderr "
+ "stream.")
+ darshan_derived_metrics = ffi.new("struct darshan_derived_metrics *")
+ r = libdutil.darshan_accumulator_emit(darshan_accumulator[0],
+ darshan_derived_metrics,
+ rbuf[0])
+ libdutil.darshan_free(buf[0])
+ libdutil.darshan_accumulator_destroy(darshan_accumulator[0])
+ log_close(log_handle)
+ if r != 0:
+ raise RuntimeError("A nonzero exit code was received from "
+ "darshan_accumulator_emit() at the C level. "
+ "It may be possible "
+ "to retrieve additional information from the stderr "
+ "stream.")
+ return darshan_derived_metrics
diff --git a/darshan-util/pydarshan/darshan/cli/base.html b/darshan-util/pydarshan/darshan/cli/base.html
index 99382cb34..302858046 100644
--- a/darshan-util/pydarshan/darshan/cli/base.html
+++ b/darshan-util/pydarshan/darshan/cli/base.html
@@ -49,7 +49,8 @@
${fig_title}
${fig.fig_description}
% else:
-
+
+
${fig.fig_description}
% endif
diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py
index 56050d5c0..efc9cb0cd 100644
--- a/darshan-util/pydarshan/darshan/cli/summary.py
+++ b/darshan-util/pydarshan/darshan/cli/summary.py
@@ -14,6 +14,7 @@
import darshan
import darshan.cli
+from darshan.lib.accum import log_get_bytes_bandwidth
from darshan.experimental.plots import (
plot_dxt_heatmap,
plot_io_cost,
@@ -53,6 +54,11 @@ def __init__(
fig_args: dict,
fig_description: str = "",
fig_width: int = 500,
+ # when there is no HTML data generated
+ # for the figure (i.e., no image/plot),
+ # we have the option of changing the caption
+ # text color for a warning/important standalone text
+ text_only_color: str = "red",
):
self.section_title = section_title
if not fig_title:
@@ -65,7 +71,11 @@ def __init__(
# temporary handling for DXT disabled cases
# so special error message can be passed
# in place of an encoded image
+ # NOTE: this code path is now also
+ # being used for adding the bandwidth
+ # text, which doesn't really have an image...
self.fig_html = None
+ self.text_only_color = text_only_color
if self.fig_func:
self.generate_fig()
@@ -487,6 +497,30 @@ def register_figures(self):
)
self.figures.append(opcount_fig)
+ try:
+ # this is really just some text
+ # so using ReportFigure feels awkward...
+ bandwidth_fig = ReportFigure(
+ section_title=sect_title,
+ fig_title="",
+ fig_func=None,
+ fig_args=None,
+ fig_description=log_get_bytes_bandwidth(log_path=self.log_path,
+ mod_name=mod),
+ text_only_color="blue")
+ self.figures.append(bandwidth_fig)
+ except (RuntimeError, KeyError):
+ # the module probably doesn't support derived metrics
+ # calculations, but the C code doesn't distinguish other
+ # types of errors
+
+ # the KeyError appears to be needed for a subset of logs
+ # for which _structdefs lacks APMPI or APXC entries;
+ # for example `e3sm_io_heatmap_only.darshan` in logs
+ # repo
+ pass
+
+
#########################
# Data Access by Category
if not {"POSIX", "STDIO"}.isdisjoint(set(self.report.modules)):
diff --git a/darshan-util/pydarshan/darshan/lib/accum.py b/darshan-util/pydarshan/darshan/lib/accum.py
new file mode 100644
index 000000000..2bfe8898a
--- /dev/null
+++ b/darshan-util/pydarshan/darshan/lib/accum.py
@@ -0,0 +1,53 @@
+from darshan.backend.cffi_backend import log_get_derived_metrics
+
+
+def log_get_bytes_bandwidth(log_path: str, mod_name: str) -> str:
+ """
+ Summarize I/O performance for a given darshan module.
+
+ Parameters
+ ----------
+ log_path : str
+ Path to the darshan binary log file.
+ mod_name : str
+ Name of the darshan module to summarize the I/O
+ performance for.
+
+ Returns
+ -------
+ out: str
+ A short string summarizing the performance of the given module
+ in the provided log file, including bandwidth and total data
+ transferred.
+
+ Raises
+ ------
+ RuntimeError
+ When a provided module name is not supported for the accumulator
+ interface for provision of the summary data, or for any other
+ error that occurs in the C/CFFI interface.
+ ValueError
+ When a provided module name does not exist in the log file.
+
+ Examples
+ --------
+
+ >>> from darshan.log_utils import get_log_path
+ >>> from darshan.lib.accum import log_get_bytes_bandwidth
+
+ >>> log_path = get_log_path("imbalanced-io.darshan")
+ >>> log_get_bytes_bandwidth(log_path, "POSIX")
+ I/O performance estimate (at the POSIX layer): transferred 101785.8 MiB at 164.99 MiB/s
+
+ >>> log_get_bytes_bandwidth(log_path, "MPI-IO")
+ I/O performance estimate (at the MPI-IO layer): transferred 126326.8 MiB at 101.58 MiB/s
+ """
+ # get total bytes (in MiB) and bandwidth (in MiB/s) for
+ # a given module -- this information was commonly reported
+ # in the old perl-based summary reports
+ darshan_derived_metrics = log_get_derived_metrics(log_path=log_path,
+ mod_name=mod_name)
+ total_mib = darshan_derived_metrics.total_bytes / 2 ** 20
+ total_bw = darshan_derived_metrics.agg_perf_by_slowest
+ ret_str = f"I/O performance estimate (at the {mod_name} layer): transferred {total_mib:.1f} MiB at {total_bw:.2f} MiB/s"
+ return ret_str
diff --git a/darshan-util/pydarshan/darshan/tests/test_lib_accum.py b/darshan-util/pydarshan/darshan/tests/test_lib_accum.py
new file mode 100644
index 000000000..786796823
--- /dev/null
+++ b/darshan-util/pydarshan/darshan/tests/test_lib_accum.py
@@ -0,0 +1,93 @@
+from darshan.lib.accum import log_get_bytes_bandwidth
+from darshan.log_utils import get_log_path
+
+import pytest
+
+
+@pytest.mark.parametrize("log_path, mod_name, expected_str", [
+ # the expected bytes/bandwidth strings are pasted
+ # directly from the old perl summary reports;
+ # exceptions noted below
+ # in some cases we defer to darshan-parser for the expected
+ # values; see discussion in gh-839
+ ("imbalanced-io.darshan",
+ "STDIO",
+ "I/O performance estimate (at the STDIO layer): transferred 1.1 MiB at 0.01 MiB/s"),
+ ("imbalanced-io.darshan",
+ "MPI-IO",
+ "I/O performance estimate (at the MPI-IO layer): transferred 126326.8 MiB at 101.58 MiB/s"),
+ # imbalanced-io.darshan does have LUSTRE data,
+ # but it doesn't support derived metrics at time
+ # of writing
+ ("imbalanced-io.darshan",
+ "LUSTRE",
+ "RuntimeError"),
+ ("imbalanced-io.darshan",
+ "POSIX",
+ "I/O performance estimate (at the POSIX layer): transferred 101785.8 MiB at 164.99 MiB/s"),
+ ("laytonjb_test1_id28730_6-7-43012-2131301613401632697_1.darshan",
+ "STDIO",
+ "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 4.22 MiB/s"),
+ ("runtime_and_dxt_heatmaps_diagonal_write_only.darshan",
+ "POSIX",
+ "I/O performance estimate (at the POSIX layer): transferred 0.0 MiB at 0.02 MiB/s"),
+ ("treddy_mpi-io-test_id4373053_6-2-60198-9815401321915095332_1.darshan",
+ "STDIO",
+ "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 16.47 MiB/s"),
+ ("e3sm_io_heatmap_only.darshan",
+ "STDIO",
+ "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 3.26 MiB/s"),
+ ("e3sm_io_heatmap_only.darshan",
+ "MPI-IO",
+ "I/O performance estimate (at the MPI-IO layer): transferred 73880.2 MiB at 105.69 MiB/s"),
+ ("partial_data_stdio.darshan",
+ "MPI-IO",
+ "I/O performance estimate (at the MPI-IO layer): transferred 32.0 MiB at 2317.98 MiB/s"),
+ ("partial_data_stdio.darshan",
+ "STDIO",
+ "I/O performance estimate (at the STDIO layer): transferred 16336.0 MiB at 2999.14 MiB/s"),
+ # the C derived metrics code can't distinguish
+ # between different kinds of errors at this time,
+ # but we can still intercept in some cases...
+ ("partial_data_stdio.darshan",
+ "GARBAGE",
+ "ValueError"),
+ # TODO: determine if the lack of APMPI and
+ # any other "add-ons" in _structdefs is a bug
+ # in the control flow for `log_get_derived_metrics()`?
+ ("e3sm_io_heatmap_only.darshan",
+ "APMPI",
+ "KeyError"),
+ ("skew-app.darshan",
+ "POSIX",
+ "I/O performance estimate (at the POSIX layer): transferred 41615.8 MiB at 157.49 MiB/s"),
+ ("skew-app.darshan",
+ "MPI-IO",
+ "I/O performance estimate (at the MPI-IO layer): transferred 41615.8 MiB at 55.22 MiB/s"),
+])
+def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str):
+ # test the basic scenario of retrieving
+ # the total data transferred and bandwidth
+ # for all records in a given module; the situation
+ # of accumulating drived metrics with filtering
+ # (i.e., for a single filename) is not tested here
+
+ log_path = get_log_path(log_path)
+ if expected_str == "RuntimeError":
+ with pytest.raises(RuntimeError,
+ match=f"{mod_name} module does not support derived"):
+ log_get_bytes_bandwidth(log_path=log_path,
+ mod_name=mod_name)
+ elif expected_str == "ValueError":
+ with pytest.raises(ValueError,
+ match=f"{mod_name} is not in the available log"):
+ log_get_bytes_bandwidth(log_path=log_path,
+ mod_name=mod_name)
+ elif expected_str == "KeyError":
+ with pytest.raises(KeyError, match=f"{mod_name}"):
+ log_get_bytes_bandwidth(log_path=log_path,
+ mod_name=mod_name)
+ else:
+ actual_str = log_get_bytes_bandwidth(log_path=log_path,
+ mod_name=mod_name)
+ assert actual_str == expected_str
diff --git a/darshan-util/pydarshan/darshan/tests/test_summary.py b/darshan-util/pydarshan/darshan/tests/test_summary.py
index 282ab7d36..1c1acd2fa 100644
--- a/darshan-util/pydarshan/darshan/tests/test_summary.py
+++ b/darshan-util/pydarshan/darshan/tests/test_summary.py
@@ -236,6 +236,12 @@ def test_main_all_logs_repo_files(tmpdir, log_filepath):
else:
assert actual_runtime_heatmap_titles == 0
+ # check for presence of bandwidth summary strings
+ # (more detailed per-module probes are present
+ # in test_derived_metrics_bytes_and_bandwidth())
+ assert "I/O performance estimate" in report_str
+ assert "color: blue" in report_str
+
class TestReportData: