MAINT: PR 839 revisions

* `log_get_derived_metrics()` was adjusted to inject all the records for a given module, because this is our initial target to replicate the stats in the old perl summary report * a new `log_get_bytes_bandwidth()` function was drafted in as a convenience wrapper to get MiB (total bytes) and bandwidth (MiB/s) values printed out in the old perl report * renamed the regression test for this PR and adjusted it to compare against the bytes/bandwidth strings present in the perl reports; so far, only a small subset of the STDIO results are working properly (see the xfails in this test..)
darshan-hpc · Nov 14, 2022 · fbb726e · fbb726e
1 parent 30849ec
commit fbb726e
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 34 deletions.
diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py
@@ -676,9 +676,6 @@ def log_get_derived_metrics(log_path: str, mod_name: str):
         return None
     mod_type = _structdefs[mod_name]
 
-    buf = ffi.new("void **")
-    r = libdutil.darshan_log_get_record(log_handle['handle'], modules[mod_name]['idx'], buf)
-    rbuf = ffi.cast(mod_type, buf)
 
     darshan_accumulator = ffi.new("darshan_accumulator *")
     print("before create")
@@ -697,14 +694,21 @@ def log_get_derived_metrics(log_path: str, mod_name: str):
     print("after create")
 
     print("before inject")
-    r = libdutil.darshan_accumulator_inject(darshan_accumulator[0], rbuf[0], 1)
-    if r != 0:
-        libdutil.darshan_free(buf[0])
-        raise RuntimeError("A nonzero exit code was received from "
-                           "darshan_accumulator_inject() at the C level. "
-                           "It may be possible "
-                           "to retrieve additional information from the stderr "
-                           "stream.")
+    buf = ffi.new("void **")
+    r = 1
+    while r >= 1:
+        r = libdutil.darshan_log_get_record(log_handle['handle'], modules[mod_name]['idx'], buf)
+        if r < 1:
+            break
+        rbuf = ffi.cast(mod_type, buf)
+        r_i = libdutil.darshan_accumulator_inject(darshan_accumulator[0], rbuf[0], 1)
+        if r_i != 0:
+            libdutil.darshan_free(buf[0])
+            raise RuntimeError("A nonzero exit code was received from "
+                               "darshan_accumulator_inject() at the C level. "
+                               "It may be possible "
+                               "to retrieve additional information from the stderr "
+                               "stream.")
     print("after inject")
     darshan_derived_metrics = ffi.new("struct darshan_derived_metrics *")
     print("before emit")
@@ -722,3 +726,20 @@ def log_get_derived_metrics(log_path: str, mod_name: str):
     #libdutil.darshan_accumulator_destroy(darshan_accumulator)
     libdutil.darshan_free(buf[0])
     return darshan_derived_metrics
+
+
+def log_get_bytes_bandwidth(log_path: str, mod_name: str) -> str:
+    # get total bytes (in MiB) and bandwidth (in MiB/s) for
+    # a given module -- this information was commonly reported
+    # in the old perl-based summary reports
+    darshan_derived_metrics = log_get_derived_metrics(log_path=log_path,
+                                                      mod_name=mod_name)
+    total_mib = darshan_derived_metrics.total_bytes / 2 ** 20
+    report = darshan.DarshanReport(log_path, read_all=True)
+    fcounters_df = report.records[f"{mod_name}"].to_df()['fcounters']
+    total_rw_time = (fcounters_df[f"{mod_name}_F_READ_TIME"].sum() +
+                     fcounters_df[f"{mod_name}_F_WRITE_TIME"].sum() +
+                     fcounters_df[f"{mod_name}_F_META_TIME"].sum())
+    total_bw = total_mib / total_rw_time
+    ret_str = f"I/O performance estimate (at the {mod_name} layer): transferred {total_mib:.1f} MiB at {total_bw:.2f} MiB/s"
+    return ret_str
diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py
@@ -161,29 +161,32 @@ def test_log_get_generic_record(dtype):
         assert actual_fcounter_names == expected_fcounter_names
 
 
-@pytest.mark.parametrize("log_path", [
-    "imbalanced-io.darshan",
+@pytest.mark.parametrize("log_path, mod_name, expected_str", [
+    # the expected bytes/bandwidth strings are pasted
+    # directly from the old perl summary reports
+    ("imbalanced-io.darshan",
+     "STDIO",
+     "I/O performance estimate (at the STDIO layer): transferred 1.1 MiB at 0.01 MiB/s"),
+    ("laytonjb_test1_id28730_6-7-43012-2131301613401632697_1.darshan",
+     "STDIO",
+     "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 4.22 MiB/s"),
+    pytest.param("runtime_and_dxt_heatmaps_diagonal_write_only.darshan",
+     "POSIX",
+     "I/O performance estimate (at the POSIX layer): transferred 0.0 MiB at 0.02 MiB/s",
+     marks=pytest.mark.xfail(reason="Not sure why modules other than STDIO fail yet...")),
+    pytest.param("treddy_mpi-io-test_id4373053_6-2-60198-9815401321915095332_1.darshan",
+     "STDIO",
+     "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 16.47 MiB/s",
+     marks=pytest.mark.xfail(reason="Something extra needed to account for MPI-IO?")),
 ])
-def test_derived_metrics_basic(log_path):
+def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str):
     # test the basic scenario of retrieving
-    # the derived metrics from all records for a given
-    # module; the situation where you'd like to
-    # retrieve derived metrics for a subset of records (i.e.,
-    # a particular filename) is not tested here
+    # the total data transferred and bandwidth
+    # for all records in a given module; the situation
+    # of accumulating drived metrics with filtering
+    # (i.e., for a single filename) is not tested here
+
     log_path = get_log_path(log_path)
-    report = darshan.DarshanReport(log_path, read_all=True)
-    for mod_name in report.modules:
-        # if support is added for accumulator work on these
-        # modules later on, the test will fail to raise an error,
-        # causing the test to ultimately fail; that is good, it will
-        # force us to acknowledge that the support was added intentionally
-        # under the hood
-        print("testing mod_name:", mod_name)
-        if mod_name in {"LUSTRE"}:
-            with pytest.raises(RuntimeError):
-                derived_metrics = backend.log_get_derived_metrics(log_path=log_path,
-                                                  mod_name=mod_name)
-        else:
-            derived_metrics = backend.log_get_derived_metrics(log_path=log_path,
-                                              mod_name=mod_name)
-        # TODO: assert against values from i.e., perl reports
+    actual_str = backend.log_get_bytes_bandwidth(log_path=log_path,
+                                                 mod_name=mod_name)
+    assert actual_str == expected_str