From 3fc254b077ebded7c85fe1636e9b865823de5315 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Thu, 27 Oct 2022 16:06:15 -0600 Subject: [PATCH 01/19] WIP: Python derived/accum interface * early draft of Python/CFFI interface to derived metrics/accumulators described in: - gh-642 - gh-677 * for now this definitely doesn't work, and feels like I'm basically reconstituting a C control flow in CFFI/Python instead of using a sensible exposure point between C and Python to pull out populated structs from a single entry point * perhaps folks can just help me sort out the current issues noted in the source changes rather than providing a convenient API, though once thorough regression tests are in place that might be something to consider in the future... (or even just maintaining it in `pandas`/Python someday if the perf is ~similar) --- .../pydarshan/darshan/backend/api_def_c.py | 33 +++++++++++++++++++ .../pydarshan/darshan/backend/cffi_backend.py | 25 ++++++++++++++ .../pydarshan/darshan/tests/test_cffi_misc.py | 12 +++++++ 3 files changed, 70 insertions(+) diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py index 5fce60b5e..7adaf510e 100644 --- a/darshan-util/pydarshan/darshan/backend/api_def_c.py +++ b/darshan-util/pydarshan/darshan/backend/api_def_c.py @@ -8,6 +8,34 @@ header = """/* from darshan-logutils.h */ + +struct darshan_derived_metrics { + int64_t total_bytes; + double unique_io_total_time_by_slowest; + double unique_rw_only_time_by_slowest; + double unique_md_only_time_by_slowest; + int unique_io_slowest_rank; + double shared_io_total_time_by_slowest; + double agg_perf_by_slowest; + double agg_time_by_slowest; + struct darshan_file_category_counters; +}; + + + +struct darshan_accumulator { + int64_t module_id; + int64_t job_nprocs; + void* agg_record; + int num_records; + void *file_hash_table; + double shared_io_total_time_by_slowest; + int64_t total_bytes; + double *rank_cumul_io_total_time; + double *rank_cumul_rw_only_time; + double *rank_cumul_md_only_time; +}; + struct darshan_mnt_info { char mnt_type[3015]; @@ -23,6 +51,11 @@ int partial_flag; }; +int darshan_accumulator_emit(struct darshan_accumulator, struct darshan_derived_metrics*, void* aggregation_record); +int darshan_accumulator_destroy(struct darshan_accumulator); +int darshan_accumulator_create(enum darshan_module_id, int64_t, struct darshan_accumulator*); +int darshan_accumulator_inject(struct darshan_accumulator, void*, int); + /* from darshan-log-format.h */ typedef uint64_t darshan_record_id; diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 982fb2e86..0d3711999 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -657,3 +657,28 @@ def _log_get_heatmap_record(log): libdutil.darshan_free(buf[0]) return rec + + +def log_get_accumulator(log, mod_name: str): + log = log_open(log) + jobrec = ffi.new("struct darshan_job *") + libdutil.darshan_log_get_job(log['handle'], jobrec) + modules = log_get_modules(log) + + if mod_name not in modules: + return None + mod_type = _structdefs[mod_name] + + darshan_accumulator = ffi.new("struct darshan_accumulator *") + buf = ffi.new("void **") + r = libdutil.darshan_log_get_record(log['handle'], modules[mod_name]['idx'], buf) + rbuf = ffi.cast(mod_type, buf) + + libdutil.darshan_accumulator_create(modules[mod_name]['idx'], + jobrec[0].nprocs, + darshan_accumulator) + + # TODO: fix the segfault on the inject call below + r = libdutil.darshan_accumulator_inject(darshan_accumulator[0], rbuf[0], 1) + # TODO: darshan_accumulator_emit and darshan_accumulator_destroy + return darshan_accumulator diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py index 4c95d52f2..8500e8980 100644 --- a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py +++ b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py @@ -159,3 +159,15 @@ def test_log_get_generic_record(dtype): # make sure the returned key/column names agree assert actual_counter_names == expected_counter_names assert actual_fcounter_names == expected_fcounter_names + + +@pytest.mark.parametrize("log_path", [ + "imbalanced-io.darshan", +]) +def test_accumulator_emit(log_path): + log_path = get_log_path(log_path) + report = darshan.DarshanReport(log_path, read_all=True) + for mod_name in report.modules: + acc = backend.log_get_accumulator(log=log_path, + mod_name=mod_name) + # TODO: assert against values from i.e., perl reports From 6bd2c2c87a398d39c98506ca21006e9c3b396f86 Mon Sep 17 00:00:00 2001 From: Shane Snyder Date: Tue, 1 Nov 2022 11:30:59 -0500 Subject: [PATCH 02/19] fix pydarshan defs for darshan-util accumulators --- .../pydarshan/darshan/backend/api_def_c.py | 27 ++++++------------- .../pydarshan/darshan/backend/cffi_backend.py | 8 +++--- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py index 7adaf510e..93834f7ec 100644 --- a/darshan-util/pydarshan/darshan/backend/api_def_c.py +++ b/darshan-util/pydarshan/darshan/backend/api_def_c.py @@ -21,21 +21,6 @@ struct darshan_file_category_counters; }; - - -struct darshan_accumulator { - int64_t module_id; - int64_t job_nprocs; - void* agg_record; - int num_records; - void *file_hash_table; - double shared_io_total_time_by_slowest; - int64_t total_bytes; - double *rank_cumul_io_total_time; - double *rank_cumul_rw_only_time; - double *rank_cumul_md_only_time; -}; - struct darshan_mnt_info { char mnt_type[3015]; @@ -51,10 +36,14 @@ int partial_flag; }; -int darshan_accumulator_emit(struct darshan_accumulator, struct darshan_derived_metrics*, void* aggregation_record); -int darshan_accumulator_destroy(struct darshan_accumulator); -int darshan_accumulator_create(enum darshan_module_id, int64_t, struct darshan_accumulator*); -int darshan_accumulator_inject(struct darshan_accumulator, void*, int); +/* opaque accumulator reference */ +struct darshan_accumulator_st; +typedef struct darshan_accumulator_st* darshan_accumulator; + +int darshan_accumulator_create(enum darshan_module_id, int64_t, darshan_accumulator*); +int darshan_accumulator_inject(darshan_accumulator, void*, int); +int darshan_accumulator_emit(darshan_accumulator, struct darshan_derived_metrics*, void* aggregation_record); +int darshan_accumulator_destroy(darshan_accumulator); /* from darshan-log-format.h */ typedef uint64_t darshan_record_id; diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 0d3711999..0b5ce48e8 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -669,14 +669,14 @@ def log_get_accumulator(log, mod_name: str): return None mod_type = _structdefs[mod_name] - darshan_accumulator = ffi.new("struct darshan_accumulator *") buf = ffi.new("void **") r = libdutil.darshan_log_get_record(log['handle'], modules[mod_name]['idx'], buf) rbuf = ffi.cast(mod_type, buf) - libdutil.darshan_accumulator_create(modules[mod_name]['idx'], - jobrec[0].nprocs, - darshan_accumulator) + darshan_accumulator = ffi.new("darshan_accumulator *") + r = libdutil.darshan_accumulator_create(modules[mod_name]['idx'], + jobrec[0].nprocs, + darshan_accumulator) # TODO: fix the segfault on the inject call below r = libdutil.darshan_accumulator_inject(darshan_accumulator[0], rbuf[0], 1) From 664047270d49c239944c97eaf295c01a2e52334a Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Fri, 4 Nov 2022 12:56:42 -0600 Subject: [PATCH 03/19] MAINT: PR 839 revisions * needed to expose `darshan_file_category_counters` so that its size is available for usage in `darshan_derived_metrics` struct * some improvements to the renamed `log_get_derived_metrics()` function, including first draft of error handling; the renamed `test_derived_metrics_basic` test was also adjusted to expect an error for `LUSTRE` module --- .../pydarshan/darshan/backend/api_def_c.py | 11 ++++ .../pydarshan/darshan/backend/cffi_backend.py | 60 ++++++++++++++++--- .../pydarshan/darshan/tests/test_cffi_misc.py | 22 ++++++- 3 files changed, 81 insertions(+), 12 deletions(-) diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py index 93834f7ec..dcdec66e5 100644 --- a/darshan-util/pydarshan/darshan/backend/api_def_c.py +++ b/darshan-util/pydarshan/darshan/backend/api_def_c.py @@ -9,6 +9,17 @@ header = """/* from darshan-logutils.h */ +struct darshan_file_category_counters { + int64_t count; /* number of files in this category */ + int64_t total_read_volume_bytes; /* total read traffic volume */ + int64_t total_write_volume_bytes;/* total write traffic volume */ + int64_t max_read_volume_bytes; /* maximum read traffic volume to 1 file */ + int64_t max_write_volume_bytes; /* maximum write traffic volume to 1 file */ + int64_t total_max_offset_bytes; /* summation of max_offsets */ + int64_t max_offset_bytes; /* largest max_offset */ + int64_t nprocs; /* how many procs accessed (-1 for "all") */ +}; + struct darshan_derived_metrics { int64_t total_bytes; double unique_io_total_time_by_slowest; diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 0b5ce48e8..adae297d9 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -659,26 +659,68 @@ def _log_get_heatmap_record(log): return rec -def log_get_accumulator(log, mod_name: str): - log = log_open(log) +def log_get_derived_metrics(log_path: str, mod_name: str): + """ + Returns the darshan_derived_metrics struct from CFFI/C accumulator code. + + Parameters: + log_path: Path to the darshan log file + mod_name: The name of the module to retrieve derived metrics for + + Returns: + darshan_derived_metrics struct (cdata object) + """ + # TODO: eventually add support for i.e., a regex filter on the records + # the user wants to get derived metrics for--like filtering to records + # with a single filename involved before accumulating the data? + log_handle = log_open(log_path) jobrec = ffi.new("struct darshan_job *") - libdutil.darshan_log_get_job(log['handle'], jobrec) - modules = log_get_modules(log) + libdutil.darshan_log_get_job(log_handle['handle'], jobrec) + modules = log_get_modules(log_handle) if mod_name not in modules: return None mod_type = _structdefs[mod_name] buf = ffi.new("void **") - r = libdutil.darshan_log_get_record(log['handle'], modules[mod_name]['idx'], buf) + r = libdutil.darshan_log_get_record(log_handle['handle'], modules[mod_name]['idx'], buf) rbuf = ffi.cast(mod_type, buf) darshan_accumulator = ffi.new("darshan_accumulator *") + print("before create") r = libdutil.darshan_accumulator_create(modules[mod_name]['idx'], jobrec[0].nprocs, darshan_accumulator) - - # TODO: fix the segfault on the inject call below + if r != 0: + raise RuntimeError("A nonzero exit code was received from " + "darshan_accumulator_create() at the C level. " + f"This could mean that the {mod_name} module does not " + "support derived metric calculation, or that " + "another kind of error occurred. It may be possible " + "to retrieve additional information from the stderr " + "stream.") + print("after create") + + print("before inject") r = libdutil.darshan_accumulator_inject(darshan_accumulator[0], rbuf[0], 1) - # TODO: darshan_accumulator_emit and darshan_accumulator_destroy - return darshan_accumulator + if r != 0: + raise RuntimeError("A nonzero exit code was received from " + "darshan_accumulator_inject() at the C level. " + "It may be possible " + "to retrieve additional information from the stderr " + "stream.") + print("after inject") + darshan_derived_metrics = ffi.new("struct darshan_derived_metrics *") + print("before emit") + r = libdutil.darshan_accumulator_emit(darshan_accumulator[0], + darshan_derived_metrics, + rbuf[0]) + if r != 0: + raise RuntimeError("A nonzero exit code was received from " + "darshan_accumulator_emit() at the C level. " + "It may be possible " + "to retrieve additional information from the stderr " + "stream.") + print("after emit") + #libdutil.darshan_accumulator_destroy(darshan_accumulator) + return darshan_derived_metrics diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py index 8500e8980..76a64b555 100644 --- a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py +++ b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py @@ -164,10 +164,26 @@ def test_log_get_generic_record(dtype): @pytest.mark.parametrize("log_path", [ "imbalanced-io.darshan", ]) -def test_accumulator_emit(log_path): +def test_derived_metrics_basic(log_path): + # test the basic scenario of retrieving + # the derived metrics from all records for a given + # module; the situation where you'd like to + # retrieve derived metrics for a subset of records (i.e., + # a particular filename) is not tested here log_path = get_log_path(log_path) report = darshan.DarshanReport(log_path, read_all=True) for mod_name in report.modules: - acc = backend.log_get_accumulator(log=log_path, - mod_name=mod_name) + # if support is added for accumulator work on these + # modules later on, the test will fail to raise an error, + # causing the test to ultimately fail; that is good, it will + # force us to acknowledge that the support was added intentionally + # under the hood + print("testing mod_name:", mod_name) + if mod_name in {"LUSTRE"}: + with pytest.raises(RuntimeError): + derived_metrics = backend.log_get_derived_metrics(log_path=log_path, + mod_name=mod_name) + else: + derived_metrics = backend.log_get_derived_metrics(log_path=log_path, + mod_name=mod_name) # TODO: assert against values from i.e., perl reports From 6bd237c94f499ef3891364fc21080156fba53edc Mon Sep 17 00:00:00 2001 From: Phil Carns Date: Thu, 10 Nov 2022 13:58:40 -0500 Subject: [PATCH 04/19] bug fix: missing array in derived metrics struct --- darshan-util/pydarshan/darshan/backend/api_def_c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py index dcdec66e5..2831dc69c 100644 --- a/darshan-util/pydarshan/darshan/backend/api_def_c.py +++ b/darshan-util/pydarshan/darshan/backend/api_def_c.py @@ -29,7 +29,7 @@ double shared_io_total_time_by_slowest; double agg_perf_by_slowest; double agg_time_by_slowest; - struct darshan_file_category_counters; + struct darshan_file_category_counters category_counters[7]; }; struct darshan_mnt_info From 774776460c378811bd24ab3fb28bceef8a37ff19 Mon Sep 17 00:00:00 2001 From: Phil Carns Date: Thu, 10 Nov 2022 14:12:57 -0500 Subject: [PATCH 05/19] silence warning: convert enum arg to int --- darshan-util/pydarshan/darshan/backend/api_def_c.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/darshan-util/pydarshan/darshan/backend/api_def_c.py b/darshan-util/pydarshan/darshan/backend/api_def_c.py index 2831dc69c..50ae9cae3 100644 --- a/darshan-util/pydarshan/darshan/backend/api_def_c.py +++ b/darshan-util/pydarshan/darshan/backend/api_def_c.py @@ -51,7 +51,12 @@ struct darshan_accumulator_st; typedef struct darshan_accumulator_st* darshan_accumulator; -int darshan_accumulator_create(enum darshan_module_id, int64_t, darshan_accumulator*); +/* NOTE: darshan_module_id is technically an enum in the C API, but we'll + * just use an int for now (equivalent type) to avoid warnings from cffi + * that we have not defined explicit enum values. We don't need that + * functionality. + */ +int darshan_accumulator_create(int darshan_module_id, int64_t, darshan_accumulator*); int darshan_accumulator_inject(darshan_accumulator, void*, int); int darshan_accumulator_emit(darshan_accumulator, struct darshan_derived_metrics*, void* aggregation_record); int darshan_accumulator_destroy(darshan_accumulator); From e306f8e62d25b36235731f93476a66f93986b889 Mon Sep 17 00:00:00 2001 From: Phil Carns Date: Thu, 10 Nov 2022 14:23:00 -0500 Subject: [PATCH 06/19] added some free wrapper calls - for record buffer in accumulator test - for error path in another test --- darshan-util/pydarshan/darshan/backend/cffi_backend.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index adae297d9..200e281ea 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -633,6 +633,7 @@ def _log_get_heatmap_record(log): buf = ffi.new("void **") r = libdutil.darshan_log_get_record(log['handle'], modules[mod_name]['idx'], buf) if r < 1: + libdutil.darshan_free(buf[0]) return None filerec = ffi.cast(mod_type, buf) @@ -692,6 +693,7 @@ def log_get_derived_metrics(log_path: str, mod_name: str): jobrec[0].nprocs, darshan_accumulator) if r != 0: + libdutil.darshan_free(buf[0]) raise RuntimeError("A nonzero exit code was received from " "darshan_accumulator_create() at the C level. " f"This could mean that the {mod_name} module does not " @@ -704,6 +706,7 @@ def log_get_derived_metrics(log_path: str, mod_name: str): print("before inject") r = libdutil.darshan_accumulator_inject(darshan_accumulator[0], rbuf[0], 1) if r != 0: + libdutil.darshan_free(buf[0]) raise RuntimeError("A nonzero exit code was received from " "darshan_accumulator_inject() at the C level. " "It may be possible " @@ -716,6 +719,7 @@ def log_get_derived_metrics(log_path: str, mod_name: str): darshan_derived_metrics, rbuf[0]) if r != 0: + libdutil.darshan_free(buf[0]) raise RuntimeError("A nonzero exit code was received from " "darshan_accumulator_emit() at the C level. " "It may be possible " @@ -723,4 +727,5 @@ def log_get_derived_metrics(log_path: str, mod_name: str): "stream.") print("after emit") #libdutil.darshan_accumulator_destroy(darshan_accumulator) + libdutil.darshan_free(buf[0]) return darshan_derived_metrics From e3f77646d8403cb2cceef35f6282f90053bb71d1 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Mon, 14 Nov 2022 12:08:03 -0700 Subject: [PATCH 07/19] MAINT: PR 839 revisions * `log_get_derived_metrics()` was adjusted to inject all the records for a given module, because this is our initial target to replicate the stats in the old perl summary report * a new `log_get_bytes_bandwidth()` function was drafted in as a convenience wrapper to get MiB (total bytes) and bandwidth (MiB/s) values printed out in the old perl report * renamed the regression test for this PR and adjusted it to compare against the bytes/bandwidth strings present in the perl reports; so far, only a small subset of the STDIO results are working properly (see the xfails in this test..) --- .../pydarshan/darshan/backend/cffi_backend.py | 43 +++++++++++----- .../pydarshan/darshan/tests/test_cffi_misc.py | 49 ++++++++++--------- 2 files changed, 58 insertions(+), 34 deletions(-) diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 200e281ea..a53536fb7 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -683,9 +683,6 @@ def log_get_derived_metrics(log_path: str, mod_name: str): return None mod_type = _structdefs[mod_name] - buf = ffi.new("void **") - r = libdutil.darshan_log_get_record(log_handle['handle'], modules[mod_name]['idx'], buf) - rbuf = ffi.cast(mod_type, buf) darshan_accumulator = ffi.new("darshan_accumulator *") print("before create") @@ -704,14 +701,21 @@ def log_get_derived_metrics(log_path: str, mod_name: str): print("after create") print("before inject") - r = libdutil.darshan_accumulator_inject(darshan_accumulator[0], rbuf[0], 1) - if r != 0: - libdutil.darshan_free(buf[0]) - raise RuntimeError("A nonzero exit code was received from " - "darshan_accumulator_inject() at the C level. " - "It may be possible " - "to retrieve additional information from the stderr " - "stream.") + buf = ffi.new("void **") + r = 1 + while r >= 1: + r = libdutil.darshan_log_get_record(log_handle['handle'], modules[mod_name]['idx'], buf) + if r < 1: + break + rbuf = ffi.cast(mod_type, buf) + r_i = libdutil.darshan_accumulator_inject(darshan_accumulator[0], rbuf[0], 1) + if r_i != 0: + libdutil.darshan_free(buf[0]) + raise RuntimeError("A nonzero exit code was received from " + "darshan_accumulator_inject() at the C level. " + "It may be possible " + "to retrieve additional information from the stderr " + "stream.") print("after inject") darshan_derived_metrics = ffi.new("struct darshan_derived_metrics *") print("before emit") @@ -729,3 +733,20 @@ def log_get_derived_metrics(log_path: str, mod_name: str): #libdutil.darshan_accumulator_destroy(darshan_accumulator) libdutil.darshan_free(buf[0]) return darshan_derived_metrics + + +def log_get_bytes_bandwidth(log_path: str, mod_name: str) -> str: + # get total bytes (in MiB) and bandwidth (in MiB/s) for + # a given module -- this information was commonly reported + # in the old perl-based summary reports + darshan_derived_metrics = log_get_derived_metrics(log_path=log_path, + mod_name=mod_name) + total_mib = darshan_derived_metrics.total_bytes / 2 ** 20 + report = darshan.DarshanReport(log_path, read_all=True) + fcounters_df = report.records[f"{mod_name}"].to_df()['fcounters'] + total_rw_time = (fcounters_df[f"{mod_name}_F_READ_TIME"].sum() + + fcounters_df[f"{mod_name}_F_WRITE_TIME"].sum() + + fcounters_df[f"{mod_name}_F_META_TIME"].sum()) + total_bw = total_mib / total_rw_time + ret_str = f"I/O performance estimate (at the {mod_name} layer): transferred {total_mib:.1f} MiB at {total_bw:.2f} MiB/s" + return ret_str diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py index 76a64b555..adabf489a 100644 --- a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py +++ b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py @@ -161,29 +161,32 @@ def test_log_get_generic_record(dtype): assert actual_fcounter_names == expected_fcounter_names -@pytest.mark.parametrize("log_path", [ - "imbalanced-io.darshan", +@pytest.mark.parametrize("log_path, mod_name, expected_str", [ + # the expected bytes/bandwidth strings are pasted + # directly from the old perl summary reports + ("imbalanced-io.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 1.1 MiB at 0.01 MiB/s"), + ("laytonjb_test1_id28730_6-7-43012-2131301613401632697_1.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 4.22 MiB/s"), + pytest.param("runtime_and_dxt_heatmaps_diagonal_write_only.darshan", + "POSIX", + "I/O performance estimate (at the POSIX layer): transferred 0.0 MiB at 0.02 MiB/s", + marks=pytest.mark.xfail(reason="Not sure why modules other than STDIO fail yet...")), + pytest.param("treddy_mpi-io-test_id4373053_6-2-60198-9815401321915095332_1.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 16.47 MiB/s", + marks=pytest.mark.xfail(reason="Something extra needed to account for MPI-IO?")), ]) -def test_derived_metrics_basic(log_path): +def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): # test the basic scenario of retrieving - # the derived metrics from all records for a given - # module; the situation where you'd like to - # retrieve derived metrics for a subset of records (i.e., - # a particular filename) is not tested here + # the total data transferred and bandwidth + # for all records in a given module; the situation + # of accumulating drived metrics with filtering + # (i.e., for a single filename) is not tested here + log_path = get_log_path(log_path) - report = darshan.DarshanReport(log_path, read_all=True) - for mod_name in report.modules: - # if support is added for accumulator work on these - # modules later on, the test will fail to raise an error, - # causing the test to ultimately fail; that is good, it will - # force us to acknowledge that the support was added intentionally - # under the hood - print("testing mod_name:", mod_name) - if mod_name in {"LUSTRE"}: - with pytest.raises(RuntimeError): - derived_metrics = backend.log_get_derived_metrics(log_path=log_path, - mod_name=mod_name) - else: - derived_metrics = backend.log_get_derived_metrics(log_path=log_path, - mod_name=mod_name) - # TODO: assert against values from i.e., perl reports + actual_str = backend.log_get_bytes_bandwidth(log_path=log_path, + mod_name=mod_name) + assert actual_str == expected_str From f85add6d6bea9c847b2c658f3b7f64e4482de3b5 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sun, 20 Nov 2022 14:49:50 -0700 Subject: [PATCH 08/19] MAINT: PR 839 revisions * `log_get_bytes_bandwidth()` has been simplified substantially as it no longer needs to generate a new `report` object, and can instead use the `agg_perf_by_slowest` structure member * the `xfail` marks have all been removed from `test_derived_metrics_bytes_and_bandwidth()`--those cases all pass now thanks to the above changes * remove some debug prints and unused code from `log_get_derived_metrics()` --- .../pydarshan/darshan/backend/cffi_backend.py | 16 +--------------- .../pydarshan/darshan/tests/test_cffi_misc.py | 10 ++++------ 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index a53536fb7..d009177b8 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -682,10 +682,7 @@ def log_get_derived_metrics(log_path: str, mod_name: str): if mod_name not in modules: return None mod_type = _structdefs[mod_name] - - darshan_accumulator = ffi.new("darshan_accumulator *") - print("before create") r = libdutil.darshan_accumulator_create(modules[mod_name]['idx'], jobrec[0].nprocs, darshan_accumulator) @@ -698,9 +695,7 @@ def log_get_derived_metrics(log_path: str, mod_name: str): "another kind of error occurred. It may be possible " "to retrieve additional information from the stderr " "stream.") - print("after create") - print("before inject") buf = ffi.new("void **") r = 1 while r >= 1: @@ -716,9 +711,7 @@ def log_get_derived_metrics(log_path: str, mod_name: str): "It may be possible " "to retrieve additional information from the stderr " "stream.") - print("after inject") darshan_derived_metrics = ffi.new("struct darshan_derived_metrics *") - print("before emit") r = libdutil.darshan_accumulator_emit(darshan_accumulator[0], darshan_derived_metrics, rbuf[0]) @@ -729,8 +722,6 @@ def log_get_derived_metrics(log_path: str, mod_name: str): "It may be possible " "to retrieve additional information from the stderr " "stream.") - print("after emit") - #libdutil.darshan_accumulator_destroy(darshan_accumulator) libdutil.darshan_free(buf[0]) return darshan_derived_metrics @@ -742,11 +733,6 @@ def log_get_bytes_bandwidth(log_path: str, mod_name: str) -> str: darshan_derived_metrics = log_get_derived_metrics(log_path=log_path, mod_name=mod_name) total_mib = darshan_derived_metrics.total_bytes / 2 ** 20 - report = darshan.DarshanReport(log_path, read_all=True) - fcounters_df = report.records[f"{mod_name}"].to_df()['fcounters'] - total_rw_time = (fcounters_df[f"{mod_name}_F_READ_TIME"].sum() + - fcounters_df[f"{mod_name}_F_WRITE_TIME"].sum() + - fcounters_df[f"{mod_name}_F_META_TIME"].sum()) - total_bw = total_mib / total_rw_time + total_bw = darshan_derived_metrics.agg_perf_by_slowest ret_str = f"I/O performance estimate (at the {mod_name} layer): transferred {total_mib:.1f} MiB at {total_bw:.2f} MiB/s" return ret_str diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py index adabf489a..f654cedff 100644 --- a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py +++ b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py @@ -170,14 +170,12 @@ def test_log_get_generic_record(dtype): ("laytonjb_test1_id28730_6-7-43012-2131301613401632697_1.darshan", "STDIO", "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 4.22 MiB/s"), - pytest.param("runtime_and_dxt_heatmaps_diagonal_write_only.darshan", + ("runtime_and_dxt_heatmaps_diagonal_write_only.darshan", "POSIX", - "I/O performance estimate (at the POSIX layer): transferred 0.0 MiB at 0.02 MiB/s", - marks=pytest.mark.xfail(reason="Not sure why modules other than STDIO fail yet...")), - pytest.param("treddy_mpi-io-test_id4373053_6-2-60198-9815401321915095332_1.darshan", + "I/O performance estimate (at the POSIX layer): transferred 0.0 MiB at 0.02 MiB/s"), + ("treddy_mpi-io-test_id4373053_6-2-60198-9815401321915095332_1.darshan", "STDIO", - "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 16.47 MiB/s", - marks=pytest.mark.xfail(reason="Something extra needed to account for MPI-IO?")), + "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 16.47 MiB/s"), ]) def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): # test the basic scenario of retrieving From b95dbcb9003a414c2fca2e9455cfd50c1d462a01 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sun, 20 Nov 2022 15:28:53 -0700 Subject: [PATCH 09/19] MAINT: PR 839 revisions * a number of additional `MPI-IO` and `STDIO` test cases were added from the logs repo to `test_derived_metrics_bytes_and_bandwidth()` * for the `MPI-IO` cases to pass, special casing was added to `log_get_bytes_bandwidth()` such that `total_bytes` is actually extracted from `POSIX` --- .../pydarshan/darshan/backend/cffi_backend.py | 11 ++++++++++- .../pydarshan/darshan/tests/test_cffi_misc.py | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index d009177b8..12e9b227c 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -732,7 +732,16 @@ def log_get_bytes_bandwidth(log_path: str, mod_name: str) -> str: # in the old perl-based summary reports darshan_derived_metrics = log_get_derived_metrics(log_path=log_path, mod_name=mod_name) - total_mib = darshan_derived_metrics.total_bytes / 2 ** 20 + if mod_name == "MPI-IO": + # for whatever reason, this seems to require + # total_bytes reported from POSIX to match the + # old perl summary reports + darshan_derived_metrics_posix = log_get_derived_metrics(log_path=log_path, + mod_name="POSIX") + total_mib = darshan_derived_metrics_posix.total_bytes / 2 ** 20 + else: + total_mib = darshan_derived_metrics.total_bytes / 2 ** 20 + total_bw = darshan_derived_metrics.agg_perf_by_slowest ret_str = f"I/O performance estimate (at the {mod_name} layer): transferred {total_mib:.1f} MiB at {total_bw:.2f} MiB/s" return ret_str diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py index f654cedff..3e92554c0 100644 --- a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py +++ b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py @@ -167,6 +167,9 @@ def test_log_get_generic_record(dtype): ("imbalanced-io.darshan", "STDIO", "I/O performance estimate (at the STDIO layer): transferred 1.1 MiB at 0.01 MiB/s"), + ("imbalanced-io.darshan", + "MPI-IO", + "I/O performance estimate (at the MPI-IO layer): transferred 101785.8 MiB at 101.58 MiB/s"), ("laytonjb_test1_id28730_6-7-43012-2131301613401632697_1.darshan", "STDIO", "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 4.22 MiB/s"), @@ -176,6 +179,18 @@ def test_log_get_generic_record(dtype): ("treddy_mpi-io-test_id4373053_6-2-60198-9815401321915095332_1.darshan", "STDIO", "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 16.47 MiB/s"), + ("e3sm_io_heatmap_only.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 3.26 MiB/s"), + ("e3sm_io_heatmap_only.darshan", + "MPI-IO", + "I/O performance estimate (at the MPI-IO layer): transferred 290574.1 MiB at 105.69 MiB/s"), + ("partial_data_stdio.darshan", + "MPI-IO", + "I/O performance estimate (at the MPI-IO layer): transferred 32.0 MiB at 2317.98 MiB/s"), + ("partial_data_stdio.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 16336.0 MiB at 2999.14 MiB/s"), ]) def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): # test the basic scenario of retrieving From e47d3cfa015c05f0000809b26886e0e619974152 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sun, 20 Nov 2022 15:47:26 -0700 Subject: [PATCH 10/19] MAINT: PR 839 revisions * removed an invalid `darshan_free()` from `log_get_derived_metrics()`-- the `buf` object didn't even exist at that point in the control flow * add a `LUSTRE` test case, which raises a `RuntimeError` as expected * add a tentatie `POSIX` test case, which reports a bandwidth string at the Python level, but is not included in the Perl summary reports... --- .../pydarshan/darshan/backend/cffi_backend.py | 1 - .../pydarshan/darshan/tests/test_cffi_misc.py | 32 ++++++++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 12e9b227c..5e2079e78 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -687,7 +687,6 @@ def log_get_derived_metrics(log_path: str, mod_name: str): jobrec[0].nprocs, darshan_accumulator) if r != 0: - libdutil.darshan_free(buf[0]) raise RuntimeError("A nonzero exit code was received from " "darshan_accumulator_create() at the C level. " f"This could mean that the {mod_name} module does not " diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py index 3e92554c0..0e2ce6ce5 100644 --- a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py +++ b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py @@ -163,13 +163,32 @@ def test_log_get_generic_record(dtype): @pytest.mark.parametrize("log_path, mod_name, expected_str", [ # the expected bytes/bandwidth strings are pasted - # directly from the old perl summary reports + # directly from the old perl summary reports; + # exceptions noted below ("imbalanced-io.darshan", "STDIO", "I/O performance estimate (at the STDIO layer): transferred 1.1 MiB at 0.01 MiB/s"), ("imbalanced-io.darshan", "MPI-IO", "I/O performance estimate (at the MPI-IO layer): transferred 101785.8 MiB at 101.58 MiB/s"), + # imbalanced-io.darshan does have LUSTRE data, + # but it doesn't support derived metrics at time + # of writing + ("imbalanced-io.darshan", + "LUSTRE", + "RuntimeError"), + # imbalanced-io.darshan has POSIX data, but it is + # incomplete, and the Perl summary report opts to + # include the summary string only for STDIO and + # MPI-IO + # TODO: confirm with darshan team that we DO want + # to include POSIX reporting as below rather than + # raising an error (note that the Perl report DOES + # include reports for partial data modules as can + # be seen with partial_data_stdio.darshan below) + ("imbalanced-io.darshan", + "POSIX", + "I/O performance estimate (at the POSIX layer): transferred 101785.8 MiB at 164.99 MiB/s"), ("laytonjb_test1_id28730_6-7-43012-2131301613401632697_1.darshan", "STDIO", "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 4.22 MiB/s"), @@ -200,6 +219,11 @@ def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): # (i.e., for a single filename) is not tested here log_path = get_log_path(log_path) - actual_str = backend.log_get_bytes_bandwidth(log_path=log_path, - mod_name=mod_name) - assert actual_str == expected_str + if expected_str == "RuntimeError": + with pytest.raises(RuntimeError): + backend.log_get_bytes_bandwidth(log_path=log_path, + mod_name=mod_name) + else: + actual_str = backend.log_get_bytes_bandwidth(log_path=log_path, + mod_name=mod_name) + assert actual_str == expected_str From 2eeb136a69006d7e3d6d56de7bbb7cdf2893189d Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sun, 20 Nov 2022 16:01:08 -0700 Subject: [PATCH 11/19] MAINT: PR 839 revisions * when `log_get_derived_metrics()` receives a module name that doesn't exist in the log file it received, it will now raise a `ValueError` for clarity of feedback * update `test_derived_metrics_bytes_and_bandwidth()` accordingly, and also start regex matching on expected error messages in this test --- .../pydarshan/darshan/backend/cffi_backend.py | 4 +++- .../pydarshan/darshan/tests/test_cffi_misc.py | 14 +++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 5e2079e78..71369a0d0 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -680,7 +680,9 @@ def log_get_derived_metrics(log_path: str, mod_name: str): modules = log_get_modules(log_handle) if mod_name not in modules: - return None + raise ValueError(f"{mod_name} is not in the available log file " + f"modules: {modules.keys()}") + mod_type = _structdefs[mod_name] darshan_accumulator = ffi.new("darshan_accumulator *") r = libdutil.darshan_accumulator_create(modules[mod_name]['idx'], diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py index 0e2ce6ce5..d66efb7ed 100644 --- a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py +++ b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py @@ -210,6 +210,12 @@ def test_log_get_generic_record(dtype): ("partial_data_stdio.darshan", "STDIO", "I/O performance estimate (at the STDIO layer): transferred 16336.0 MiB at 2999.14 MiB/s"), + # the C derived metrics code can't distinguish + # between different kinds of errors at this time, + # but we can still intercept in some cases... + ("partial_data_stdio.darshan", + "GARBAGE", + "ValueError"), ]) def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): # test the basic scenario of retrieving @@ -220,7 +226,13 @@ def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): log_path = get_log_path(log_path) if expected_str == "RuntimeError": - with pytest.raises(RuntimeError): + with pytest.raises(RuntimeError, + match=f"{mod_name} module does not support derived"): + backend.log_get_bytes_bandwidth(log_path=log_path, + mod_name=mod_name) + elif expected_str == "ValueError": + with pytest.raises(ValueError, + match=f"{mod_name} is not in the available log"): backend.log_get_bytes_bandwidth(log_path=log_path, mod_name=mod_name) else: From a9b97657ae2c32d3219d70de7801d9fe1a714704 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sun, 20 Nov 2022 16:51:54 -0700 Subject: [PATCH 12/19] MAINT: PR 839 revisions * add the bandwidth summary string to the Python report proper, and include a test for the presence of this string in logs repo-based summary reports --- darshan-util/pydarshan/darshan/cli/summary.py | 24 +++++++++++++++++++ .../pydarshan/darshan/tests/test_summary.py | 5 ++++ 2 files changed, 29 insertions(+) diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py index e7066227c..75199a38f 100644 --- a/darshan-util/pydarshan/darshan/cli/summary.py +++ b/darshan-util/pydarshan/darshan/cli/summary.py @@ -17,6 +17,7 @@ import darshan import darshan.cli +from darshan.backend.cffi_backend import log_get_bytes_bandwidth from darshan.experimental.plots import ( plot_dxt_heatmap, plot_io_cost, @@ -489,6 +490,29 @@ def register_figures(self): ) self.figures.append(opcount_fig) + try: + # this is really just some text + # so using ReportFigure feels awkward... + bandwidth_fig = ReportFigure( + section_title=sect_title, + fig_title="", + fig_func=None, + fig_args=None, + fig_description=log_get_bytes_bandwidth(log_path=self.log_path, + mod_name=mod)) + self.figures.append(bandwidth_fig) + except (RuntimeError, KeyError): + # the module probably doesn't support derived metrics + # calculations, but the C code doesn't distinguish other + # types of errors + + # the KeyError appears to be needed for a subset of logs + # for which _structdefs lacks APMPI or APXC entries; + # for example `e3sm_io_heatmap_only.darshan` in logs + # repo + pass + + ######################### # Data Access by Category if not {"POSIX", "STDIO"}.isdisjoint(set(self.report.modules)): diff --git a/darshan-util/pydarshan/darshan/tests/test_summary.py b/darshan-util/pydarshan/darshan/tests/test_summary.py index 282ab7d36..11adc31cc 100644 --- a/darshan-util/pydarshan/darshan/tests/test_summary.py +++ b/darshan-util/pydarshan/darshan/tests/test_summary.py @@ -236,6 +236,11 @@ def test_main_all_logs_repo_files(tmpdir, log_filepath): else: assert actual_runtime_heatmap_titles == 0 + # check for presence of bandwidth summary strings + # (more detailed per-module probes are present + # in test_derived_metrics_bytes_and_bandwidth()) + assert "I/O performance estimate" in report_str + class TestReportData: From d0704fdf27366c24ef6371a1a0cefbee4de735c2 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sun, 20 Nov 2022 16:58:08 -0700 Subject: [PATCH 13/19] MAINT: PR 839 revisions * add one of the tricky `APMPI` cases I discovered to `test_derived_metrics_bytes_and_bandwidth()`, pending discussion with team re: how I should handle this --- darshan-util/pydarshan/darshan/tests/test_cffi_misc.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py index d66efb7ed..683a1486c 100644 --- a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py +++ b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py @@ -216,6 +216,13 @@ def test_log_get_generic_record(dtype): ("partial_data_stdio.darshan", "GARBAGE", "ValueError"), + # TODO: determine if the lack of APMPI and + # any other "add-ons" in _structdefs is a bug + # in the control flow for `log_get_derived_metrics()`? + pytest.param("e3sm_io_heatmap_only.darshan", + "APMPI", + "", + marks=pytest.mark.xfail(reason="APMPI and derived metrics control flow?")), ]) def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): # test the basic scenario of retrieving From 029b7c96a401c861800613fd53e11d7921419d3f Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Fri, 25 Nov 2022 17:34:50 -0700 Subject: [PATCH 14/19] MAINT: PR 839 revisions * adjust tests to more closely match `darshan-parser` instead of the old Perl report in cases where MPI-IO and POSIX are both involved; this allows me to remove the weird MPI-IO shim in `log_get_bytes_bandwidth()` --- .../pydarshan/darshan/backend/cffi_backend.py | 11 +---------- .../pydarshan/darshan/tests/test_cffi_misc.py | 15 ++++----------- 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 71369a0d0..2bb7e3411 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -733,16 +733,7 @@ def log_get_bytes_bandwidth(log_path: str, mod_name: str) -> str: # in the old perl-based summary reports darshan_derived_metrics = log_get_derived_metrics(log_path=log_path, mod_name=mod_name) - if mod_name == "MPI-IO": - # for whatever reason, this seems to require - # total_bytes reported from POSIX to match the - # old perl summary reports - darshan_derived_metrics_posix = log_get_derived_metrics(log_path=log_path, - mod_name="POSIX") - total_mib = darshan_derived_metrics_posix.total_bytes / 2 ** 20 - else: - total_mib = darshan_derived_metrics.total_bytes / 2 ** 20 - + total_mib = darshan_derived_metrics.total_bytes / 2 ** 20 total_bw = darshan_derived_metrics.agg_perf_by_slowest ret_str = f"I/O performance estimate (at the {mod_name} layer): transferred {total_mib:.1f} MiB at {total_bw:.2f} MiB/s" return ret_str diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py index 683a1486c..7badad54d 100644 --- a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py +++ b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py @@ -165,27 +165,20 @@ def test_log_get_generic_record(dtype): # the expected bytes/bandwidth strings are pasted # directly from the old perl summary reports; # exceptions noted below + # in some cases we defer to darshan-parser for the expected + # values; see discussion in gh-839 ("imbalanced-io.darshan", "STDIO", "I/O performance estimate (at the STDIO layer): transferred 1.1 MiB at 0.01 MiB/s"), ("imbalanced-io.darshan", "MPI-IO", - "I/O performance estimate (at the MPI-IO layer): transferred 101785.8 MiB at 101.58 MiB/s"), + "I/O performance estimate (at the MPI-IO layer): transferred 126326.8 MiB at 101.58 MiB/s"), # imbalanced-io.darshan does have LUSTRE data, # but it doesn't support derived metrics at time # of writing ("imbalanced-io.darshan", "LUSTRE", "RuntimeError"), - # imbalanced-io.darshan has POSIX data, but it is - # incomplete, and the Perl summary report opts to - # include the summary string only for STDIO and - # MPI-IO - # TODO: confirm with darshan team that we DO want - # to include POSIX reporting as below rather than - # raising an error (note that the Perl report DOES - # include reports for partial data modules as can - # be seen with partial_data_stdio.darshan below) ("imbalanced-io.darshan", "POSIX", "I/O performance estimate (at the POSIX layer): transferred 101785.8 MiB at 164.99 MiB/s"), @@ -203,7 +196,7 @@ def test_log_get_generic_record(dtype): "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 3.26 MiB/s"), ("e3sm_io_heatmap_only.darshan", "MPI-IO", - "I/O performance estimate (at the MPI-IO layer): transferred 290574.1 MiB at 105.69 MiB/s"), + "I/O performance estimate (at the MPI-IO layer): transferred 73880.2 MiB at 105.69 MiB/s"), ("partial_data_stdio.darshan", "MPI-IO", "I/O performance estimate (at the MPI-IO layer): transferred 32.0 MiB at 2317.98 MiB/s"), From aa46cad544b5f4dec73a0e2a75f9a19aae82fb67 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Sat, 26 Nov 2022 17:30:06 -0700 Subject: [PATCH 15/19] MAINT: PR 839 revisions * the bandwidth text in the Python summary report is now colored "blue," along with a regression test, based on reviewer feedback * added `skew-app.darshan` log to `test_derived_metrics_bytes_and_bandwidth()`--we get the same results as `darshan-parser` * replaced the `xfail` for `e3sm_io_heatmap_only.darshan` with an expected `KeyError` when handling `APMPI` (this should already be handled gracefully/ignored by the Python summary report) --- darshan-util/pydarshan/darshan/cli/base.html | 3 ++- darshan-util/pydarshan/darshan/cli/summary.py | 12 +++++++++++- .../pydarshan/darshan/tests/test_cffi_misc.py | 15 ++++++++++++--- .../pydarshan/darshan/tests/test_summary.py | 1 + 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/darshan-util/pydarshan/darshan/cli/base.html b/darshan-util/pydarshan/darshan/cli/base.html index 99382cb34..302858046 100644 --- a/darshan-util/pydarshan/darshan/cli/base.html +++ b/darshan-util/pydarshan/darshan/cli/base.html @@ -49,7 +49,8 @@

${fig_title}

${fig.fig_description}
% else: -
+ +
${fig.fig_description}
% endif diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py index 75199a38f..8dd0b872a 100644 --- a/darshan-util/pydarshan/darshan/cli/summary.py +++ b/darshan-util/pydarshan/darshan/cli/summary.py @@ -57,6 +57,11 @@ def __init__( fig_args: dict, fig_description: str = "", fig_width: int = 500, + # when there is no HTML data generated + # for the figure (i.e., no image/plot), + # we have the option of changing the caption + # text color for a warning/important standalone text + text_only_color: str = "red", ): self.section_title = section_title if not fig_title: @@ -69,7 +74,11 @@ def __init__( # temporary handling for DXT disabled cases # so special error message can be passed # in place of an encoded image + # NOTE: this code path is now also + # being used for adding the bandwidth + # text, which doesn't really have an image... self.fig_html = None + self.text_only_color = text_only_color if self.fig_func: self.generate_fig() @@ -499,7 +508,8 @@ def register_figures(self): fig_func=None, fig_args=None, fig_description=log_get_bytes_bandwidth(log_path=self.log_path, - mod_name=mod)) + mod_name=mod), + text_only_color="blue") self.figures.append(bandwidth_fig) except (RuntimeError, KeyError): # the module probably doesn't support derived metrics diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py index 7badad54d..578341946 100644 --- a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py +++ b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py @@ -212,10 +212,15 @@ def test_log_get_generic_record(dtype): # TODO: determine if the lack of APMPI and # any other "add-ons" in _structdefs is a bug # in the control flow for `log_get_derived_metrics()`? - pytest.param("e3sm_io_heatmap_only.darshan", + ("e3sm_io_heatmap_only.darshan", "APMPI", - "", - marks=pytest.mark.xfail(reason="APMPI and derived metrics control flow?")), + "KeyError"), + ("skew-app.darshan", + "POSIX", + "I/O performance estimate (at the POSIX layer): transferred 41615.8 MiB at 157.49 MiB/s"), + ("skew-app.darshan", + "MPI-IO", + "I/O performance estimate (at the MPI-IO layer): transferred 41615.8 MiB at 55.22 MiB/s"), ]) def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): # test the basic scenario of retrieving @@ -235,6 +240,10 @@ def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): match=f"{mod_name} is not in the available log"): backend.log_get_bytes_bandwidth(log_path=log_path, mod_name=mod_name) + elif expected_str == "KeyError": + with pytest.raises(KeyError, match=f"{mod_name}"): + backend.log_get_bytes_bandwidth(log_path=log_path, + mod_name=mod_name) else: actual_str = backend.log_get_bytes_bandwidth(log_path=log_path, mod_name=mod_name) diff --git a/darshan-util/pydarshan/darshan/tests/test_summary.py b/darshan-util/pydarshan/darshan/tests/test_summary.py index 11adc31cc..8edcce921 100644 --- a/darshan-util/pydarshan/darshan/tests/test_summary.py +++ b/darshan-util/pydarshan/darshan/tests/test_summary.py @@ -240,6 +240,7 @@ def test_main_all_logs_repo_files(tmpdir, log_filepath): # (more detailed per-module probes are present # in test_derived_metrics_bytes_and_bandwidth()) assert "I/O performance estimate" in report_str + assert "color: blue" in report_str class TestReportData: From f1bac18d6b18b7ed038aeb5520aeb799ecbf008c Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Wed, 30 Nov 2022 10:31:51 -0700 Subject: [PATCH 16/19] MAINT: PR 839 revisions * the testsuite now always uses `DarshanReport` with a context manager to avoid shenanigans with `__del__` and garbage collection/`pytest`/multiple threads * this appears to fix the problem with testsuite hangs described in gh-839 and gh-851 --- darshan-util/pydarshan/darshan/tests/test_summary.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/darshan-util/pydarshan/darshan/tests/test_summary.py b/darshan-util/pydarshan/darshan/tests/test_summary.py index 8edcce921..1c1acd2fa 100644 --- a/darshan-util/pydarshan/darshan/tests/test_summary.py +++ b/darshan-util/pydarshan/darshan/tests/test_summary.py @@ -236,11 +236,11 @@ def test_main_all_logs_repo_files(tmpdir, log_filepath): else: assert actual_runtime_heatmap_titles == 0 - # check for presence of bandwidth summary strings - # (more detailed per-module probes are present - # in test_derived_metrics_bytes_and_bandwidth()) - assert "I/O performance estimate" in report_str - assert "color: blue" in report_str + # check for presence of bandwidth summary strings + # (more detailed per-module probes are present + # in test_derived_metrics_bytes_and_bandwidth()) + assert "I/O performance estimate" in report_str + assert "color: blue" in report_str class TestReportData: From 96806fa2310e27f88d1fcfb24ba4733e9a1d6303 Mon Sep 17 00:00:00 2001 From: Tyler Reddy Date: Fri, 16 Dec 2022 09:11:00 -0700 Subject: [PATCH 17/19] MAINT: PR 839 revisions * `cffi_backend` module changes requested from PR review - remove a spurious `darshan_free` from `_log_get_heatmap_record()` - fix the scoping of the `darshan_free` of `buf` object used with `darshan_accumulator_inject` in `log_get_derived_metrics` - adding a missing `log_close()` to `log_get_derived_metrics` (maybe we can wrap in Python contexts in the future though) - use a separate buffer for `darshan_accumulator_emit()` inside `log_get_derived_metrics` * note that making the above CFFI/free-related changes caused a segfault in the testuite, so in the end I adjusted the location of the memory freeing as I saw fit to avoid segfaults--I'd say at this point please provide concrete evidence with a memory leak plot or failing test for additional adjustments there, or just push the change in * in the end, there is a slightly more concise usage of `darshan_free()` but no meaningful change in the free operations * I also reverted the suggested changed to `darshan_accumulator_emit()` usage--there was no testable evidence of an issue, and it was also causing segfaults.. * address many of the discussion points that came up in gh-868: - `log_get_derived_metrics()` now uses an LRU cache, which effectively means that we use memoization to return derived metrics data rather than doing another pass over the log file if the same log path and module name have already been accumulated from; we still need to pass over a given log twice in most cases--once at initial read-in and once for using `log_get_derived_metrics`; how we decide to add filtering of records prior to accumulation interface in Python is probably a deeper discussion/for later - `log_get_bytes_bandwidth()` and its associated testing have been migrated to modules not named after "CFFI", like the in the above PR, because I think we should only use the "CFFI" named modules for direct CFFI interaction/testing, and for other analyses we should probably use more distinct names. Also, to some extent everything depends on the CFFI layer, so trying to restrict "CFFI" modules to direct rather than direct interaction will help keep them manageably sized, especially given the proclivity for surprising memory issues/segfaults in those parts of the code. - add a proper docstring with examples for `log_get_bytes_bandwidth()` --- darshan-util/pydarshan/.gitignore | 1 - .../pydarshan/darshan/backend/cffi_backend.py | 18 +--- darshan-util/pydarshan/darshan/cli/summary.py | 2 +- darshan-util/pydarshan/darshan/lib/accum.py | 53 +++++++++++ .../pydarshan/darshan/tests/test_cffi_misc.py | 89 ------------------ .../pydarshan/darshan/tests/test_lib_accum.py | 93 +++++++++++++++++++ 6 files changed, 150 insertions(+), 106 deletions(-) create mode 100644 darshan-util/pydarshan/darshan/lib/accum.py create mode 100644 darshan-util/pydarshan/darshan/tests/test_lib_accum.py diff --git a/darshan-util/pydarshan/.gitignore b/darshan-util/pydarshan/.gitignore index 826150ea1..b2e538724 100644 --- a/darshan-util/pydarshan/.gitignore +++ b/darshan-util/pydarshan/.gitignore @@ -24,7 +24,6 @@ dist/ downloads/ eggs/ .eggs/ -lib/ lib64/ parts/ sdist/ diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 2bb7e3411..6cc71d08a 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -633,7 +633,6 @@ def _log_get_heatmap_record(log): buf = ffi.new("void **") r = libdutil.darshan_log_get_record(log['handle'], modules[mod_name]['idx'], buf) if r < 1: - libdutil.darshan_free(buf[0]) return None filerec = ffi.cast(mod_type, buf) @@ -660,6 +659,7 @@ def _log_get_heatmap_record(log): return rec +@functools.lru_cache() def log_get_derived_metrics(log_path: str, mod_name: str): """ Returns the darshan_derived_metrics struct from CFFI/C accumulator code. @@ -716,24 +716,12 @@ def log_get_derived_metrics(log_path: str, mod_name: str): r = libdutil.darshan_accumulator_emit(darshan_accumulator[0], darshan_derived_metrics, rbuf[0]) + libdutil.darshan_free(buf[0]) if r != 0: - libdutil.darshan_free(buf[0]) raise RuntimeError("A nonzero exit code was received from " "darshan_accumulator_emit() at the C level. " "It may be possible " "to retrieve additional information from the stderr " "stream.") - libdutil.darshan_free(buf[0]) + log_close(log_handle) return darshan_derived_metrics - - -def log_get_bytes_bandwidth(log_path: str, mod_name: str) -> str: - # get total bytes (in MiB) and bandwidth (in MiB/s) for - # a given module -- this information was commonly reported - # in the old perl-based summary reports - darshan_derived_metrics = log_get_derived_metrics(log_path=log_path, - mod_name=mod_name) - total_mib = darshan_derived_metrics.total_bytes / 2 ** 20 - total_bw = darshan_derived_metrics.agg_perf_by_slowest - ret_str = f"I/O performance estimate (at the {mod_name} layer): transferred {total_mib:.1f} MiB at {total_bw:.2f} MiB/s" - return ret_str diff --git a/darshan-util/pydarshan/darshan/cli/summary.py b/darshan-util/pydarshan/darshan/cli/summary.py index 8dd0b872a..87e5055d1 100644 --- a/darshan-util/pydarshan/darshan/cli/summary.py +++ b/darshan-util/pydarshan/darshan/cli/summary.py @@ -17,7 +17,7 @@ import darshan import darshan.cli -from darshan.backend.cffi_backend import log_get_bytes_bandwidth +from darshan.lib.accum import log_get_bytes_bandwidth from darshan.experimental.plots import ( plot_dxt_heatmap, plot_io_cost, diff --git a/darshan-util/pydarshan/darshan/lib/accum.py b/darshan-util/pydarshan/darshan/lib/accum.py new file mode 100644 index 000000000..2bfe8898a --- /dev/null +++ b/darshan-util/pydarshan/darshan/lib/accum.py @@ -0,0 +1,53 @@ +from darshan.backend.cffi_backend import log_get_derived_metrics + + +def log_get_bytes_bandwidth(log_path: str, mod_name: str) -> str: + """ + Summarize I/O performance for a given darshan module. + + Parameters + ---------- + log_path : str + Path to the darshan binary log file. + mod_name : str + Name of the darshan module to summarize the I/O + performance for. + + Returns + ------- + out: str + A short string summarizing the performance of the given module + in the provided log file, including bandwidth and total data + transferred. + + Raises + ------ + RuntimeError + When a provided module name is not supported for the accumulator + interface for provision of the summary data, or for any other + error that occurs in the C/CFFI interface. + ValueError + When a provided module name does not exist in the log file. + + Examples + -------- + + >>> from darshan.log_utils import get_log_path + >>> from darshan.lib.accum import log_get_bytes_bandwidth + + >>> log_path = get_log_path("imbalanced-io.darshan") + >>> log_get_bytes_bandwidth(log_path, "POSIX") + I/O performance estimate (at the POSIX layer): transferred 101785.8 MiB at 164.99 MiB/s + + >>> log_get_bytes_bandwidth(log_path, "MPI-IO") + I/O performance estimate (at the MPI-IO layer): transferred 126326.8 MiB at 101.58 MiB/s + """ + # get total bytes (in MiB) and bandwidth (in MiB/s) for + # a given module -- this information was commonly reported + # in the old perl-based summary reports + darshan_derived_metrics = log_get_derived_metrics(log_path=log_path, + mod_name=mod_name) + total_mib = darshan_derived_metrics.total_bytes / 2 ** 20 + total_bw = darshan_derived_metrics.agg_perf_by_slowest + ret_str = f"I/O performance estimate (at the {mod_name} layer): transferred {total_mib:.1f} MiB at {total_bw:.2f} MiB/s" + return ret_str diff --git a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py index 578341946..4c95d52f2 100644 --- a/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py +++ b/darshan-util/pydarshan/darshan/tests/test_cffi_misc.py @@ -159,92 +159,3 @@ def test_log_get_generic_record(dtype): # make sure the returned key/column names agree assert actual_counter_names == expected_counter_names assert actual_fcounter_names == expected_fcounter_names - - -@pytest.mark.parametrize("log_path, mod_name, expected_str", [ - # the expected bytes/bandwidth strings are pasted - # directly from the old perl summary reports; - # exceptions noted below - # in some cases we defer to darshan-parser for the expected - # values; see discussion in gh-839 - ("imbalanced-io.darshan", - "STDIO", - "I/O performance estimate (at the STDIO layer): transferred 1.1 MiB at 0.01 MiB/s"), - ("imbalanced-io.darshan", - "MPI-IO", - "I/O performance estimate (at the MPI-IO layer): transferred 126326.8 MiB at 101.58 MiB/s"), - # imbalanced-io.darshan does have LUSTRE data, - # but it doesn't support derived metrics at time - # of writing - ("imbalanced-io.darshan", - "LUSTRE", - "RuntimeError"), - ("imbalanced-io.darshan", - "POSIX", - "I/O performance estimate (at the POSIX layer): transferred 101785.8 MiB at 164.99 MiB/s"), - ("laytonjb_test1_id28730_6-7-43012-2131301613401632697_1.darshan", - "STDIO", - "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 4.22 MiB/s"), - ("runtime_and_dxt_heatmaps_diagonal_write_only.darshan", - "POSIX", - "I/O performance estimate (at the POSIX layer): transferred 0.0 MiB at 0.02 MiB/s"), - ("treddy_mpi-io-test_id4373053_6-2-60198-9815401321915095332_1.darshan", - "STDIO", - "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 16.47 MiB/s"), - ("e3sm_io_heatmap_only.darshan", - "STDIO", - "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 3.26 MiB/s"), - ("e3sm_io_heatmap_only.darshan", - "MPI-IO", - "I/O performance estimate (at the MPI-IO layer): transferred 73880.2 MiB at 105.69 MiB/s"), - ("partial_data_stdio.darshan", - "MPI-IO", - "I/O performance estimate (at the MPI-IO layer): transferred 32.0 MiB at 2317.98 MiB/s"), - ("partial_data_stdio.darshan", - "STDIO", - "I/O performance estimate (at the STDIO layer): transferred 16336.0 MiB at 2999.14 MiB/s"), - # the C derived metrics code can't distinguish - # between different kinds of errors at this time, - # but we can still intercept in some cases... - ("partial_data_stdio.darshan", - "GARBAGE", - "ValueError"), - # TODO: determine if the lack of APMPI and - # any other "add-ons" in _structdefs is a bug - # in the control flow for `log_get_derived_metrics()`? - ("e3sm_io_heatmap_only.darshan", - "APMPI", - "KeyError"), - ("skew-app.darshan", - "POSIX", - "I/O performance estimate (at the POSIX layer): transferred 41615.8 MiB at 157.49 MiB/s"), - ("skew-app.darshan", - "MPI-IO", - "I/O performance estimate (at the MPI-IO layer): transferred 41615.8 MiB at 55.22 MiB/s"), -]) -def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): - # test the basic scenario of retrieving - # the total data transferred and bandwidth - # for all records in a given module; the situation - # of accumulating drived metrics with filtering - # (i.e., for a single filename) is not tested here - - log_path = get_log_path(log_path) - if expected_str == "RuntimeError": - with pytest.raises(RuntimeError, - match=f"{mod_name} module does not support derived"): - backend.log_get_bytes_bandwidth(log_path=log_path, - mod_name=mod_name) - elif expected_str == "ValueError": - with pytest.raises(ValueError, - match=f"{mod_name} is not in the available log"): - backend.log_get_bytes_bandwidth(log_path=log_path, - mod_name=mod_name) - elif expected_str == "KeyError": - with pytest.raises(KeyError, match=f"{mod_name}"): - backend.log_get_bytes_bandwidth(log_path=log_path, - mod_name=mod_name) - else: - actual_str = backend.log_get_bytes_bandwidth(log_path=log_path, - mod_name=mod_name) - assert actual_str == expected_str diff --git a/darshan-util/pydarshan/darshan/tests/test_lib_accum.py b/darshan-util/pydarshan/darshan/tests/test_lib_accum.py new file mode 100644 index 000000000..786796823 --- /dev/null +++ b/darshan-util/pydarshan/darshan/tests/test_lib_accum.py @@ -0,0 +1,93 @@ +from darshan.lib.accum import log_get_bytes_bandwidth +from darshan.log_utils import get_log_path + +import pytest + + +@pytest.mark.parametrize("log_path, mod_name, expected_str", [ + # the expected bytes/bandwidth strings are pasted + # directly from the old perl summary reports; + # exceptions noted below + # in some cases we defer to darshan-parser for the expected + # values; see discussion in gh-839 + ("imbalanced-io.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 1.1 MiB at 0.01 MiB/s"), + ("imbalanced-io.darshan", + "MPI-IO", + "I/O performance estimate (at the MPI-IO layer): transferred 126326.8 MiB at 101.58 MiB/s"), + # imbalanced-io.darshan does have LUSTRE data, + # but it doesn't support derived metrics at time + # of writing + ("imbalanced-io.darshan", + "LUSTRE", + "RuntimeError"), + ("imbalanced-io.darshan", + "POSIX", + "I/O performance estimate (at the POSIX layer): transferred 101785.8 MiB at 164.99 MiB/s"), + ("laytonjb_test1_id28730_6-7-43012-2131301613401632697_1.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 4.22 MiB/s"), + ("runtime_and_dxt_heatmaps_diagonal_write_only.darshan", + "POSIX", + "I/O performance estimate (at the POSIX layer): transferred 0.0 MiB at 0.02 MiB/s"), + ("treddy_mpi-io-test_id4373053_6-2-60198-9815401321915095332_1.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 16.47 MiB/s"), + ("e3sm_io_heatmap_only.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 3.26 MiB/s"), + ("e3sm_io_heatmap_only.darshan", + "MPI-IO", + "I/O performance estimate (at the MPI-IO layer): transferred 73880.2 MiB at 105.69 MiB/s"), + ("partial_data_stdio.darshan", + "MPI-IO", + "I/O performance estimate (at the MPI-IO layer): transferred 32.0 MiB at 2317.98 MiB/s"), + ("partial_data_stdio.darshan", + "STDIO", + "I/O performance estimate (at the STDIO layer): transferred 16336.0 MiB at 2999.14 MiB/s"), + # the C derived metrics code can't distinguish + # between different kinds of errors at this time, + # but we can still intercept in some cases... + ("partial_data_stdio.darshan", + "GARBAGE", + "ValueError"), + # TODO: determine if the lack of APMPI and + # any other "add-ons" in _structdefs is a bug + # in the control flow for `log_get_derived_metrics()`? + ("e3sm_io_heatmap_only.darshan", + "APMPI", + "KeyError"), + ("skew-app.darshan", + "POSIX", + "I/O performance estimate (at the POSIX layer): transferred 41615.8 MiB at 157.49 MiB/s"), + ("skew-app.darshan", + "MPI-IO", + "I/O performance estimate (at the MPI-IO layer): transferred 41615.8 MiB at 55.22 MiB/s"), +]) +def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): + # test the basic scenario of retrieving + # the total data transferred and bandwidth + # for all records in a given module; the situation + # of accumulating drived metrics with filtering + # (i.e., for a single filename) is not tested here + + log_path = get_log_path(log_path) + if expected_str == "RuntimeError": + with pytest.raises(RuntimeError, + match=f"{mod_name} module does not support derived"): + log_get_bytes_bandwidth(log_path=log_path, + mod_name=mod_name) + elif expected_str == "ValueError": + with pytest.raises(ValueError, + match=f"{mod_name} is not in the available log"): + log_get_bytes_bandwidth(log_path=log_path, + mod_name=mod_name) + elif expected_str == "KeyError": + with pytest.raises(KeyError, match=f"{mod_name}"): + log_get_bytes_bandwidth(log_path=log_path, + mod_name=mod_name) + else: + actual_str = log_get_bytes_bandwidth(log_path=log_path, + mod_name=mod_name) + assert actual_str == expected_str From 8f83274dad17664d763e9164ffc0f4efcbcd2d2e Mon Sep 17 00:00:00 2001 From: Shane Snyder Date: Fri, 27 Jan 2023 14:00:27 -0600 Subject: [PATCH 18/19] make sure to call accumulator_destroy() --- darshan-util/pydarshan/darshan/backend/cffi_backend.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/darshan-util/pydarshan/darshan/backend/cffi_backend.py b/darshan-util/pydarshan/darshan/backend/cffi_backend.py index 6cc71d08a..5f92c3e60 100644 --- a/darshan-util/pydarshan/darshan/backend/cffi_backend.py +++ b/darshan-util/pydarshan/darshan/backend/cffi_backend.py @@ -717,11 +717,12 @@ def log_get_derived_metrics(log_path: str, mod_name: str): darshan_derived_metrics, rbuf[0]) libdutil.darshan_free(buf[0]) + libdutil.darshan_accumulator_destroy(darshan_accumulator[0]) + log_close(log_handle) if r != 0: raise RuntimeError("A nonzero exit code was received from " "darshan_accumulator_emit() at the C level. " "It may be possible " "to retrieve additional information from the stderr " "stream.") - log_close(log_handle) return darshan_derived_metrics From 02bf33f091c202ce35173f0d4aec59ab6a0197ef Mon Sep 17 00:00:00 2001 From: Shane Snyder Date: Fri, 27 Jan 2023 17:27:46 -0600 Subject: [PATCH 19/19] sync gitignore [skip actions] --- darshan-util/pydarshan/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/darshan-util/pydarshan/.gitignore b/darshan-util/pydarshan/.gitignore index b2e538724..826150ea1 100644 --- a/darshan-util/pydarshan/.gitignore +++ b/darshan-util/pydarshan/.gitignore @@ -24,6 +24,7 @@ dist/ downloads/ eggs/ .eggs/ +lib/ lib64/ parts/ sdist/