-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* `cffi_backend` module changes requested from PR review - remove a spurious `darshan_free` from `_log_get_heatmap_record()` - fix the scoping of the `darshan_free` of `buf` object used with `darshan_accumulator_inject` in `log_get_derived_metrics` - adding a missing `log_close()` to `log_get_derived_metrics` (maybe we can wrap in Python contexts in the future though) - use a separate buffer for `darshan_accumulator_emit()` inside `log_get_derived_metrics` * note that making the above CFFI/free-related changes caused a segfault in the testuite, so in the end I adjusted the location of the memory freeing as I saw fit to avoid segfaults--I'd say at this point please provide concrete evidence with a memory leak plot or failing test for additional adjustments there, or just push the change in * in the end, there is a slightly more concise usage of `darshan_free()` but no meaningful change in the free operations * I also reverted the suggested changed to `darshan_accumulator_emit()` usage--there was no testable evidence of an issue, and it was also causing segfaults.. * address many of the discussion points that came up in gh-868: - `log_get_derived_metrics()` now uses an LRU cache, which effectively means that we use memoization to return derived metrics data rather than doing another pass over the log file if the same log path and module name have already been accumulated from; we still need to pass over a given log twice in most cases--once at initial read-in and once for using `log_get_derived_metrics`; how we decide to add filtering of records prior to accumulation interface in Python is probably a deeper discussion/for later - `log_get_bytes_bandwidth()` and its associated testing have been migrated to modules not named after "CFFI", like the in the above PR, because I think we should only use the "CFFI" named modules for direct CFFI interaction/testing, and for other analyses we should probably use more distinct names. Also, to some extent everything depends on the CFFI layer, so trying to restrict "CFFI" modules to direct rather than direct interaction will help keep them manageably sized, especially given the proclivity for surprising memory issues/segfaults in those parts of the code. - add a proper docstring with examples for `log_get_bytes_bandwidth()`
- Loading branch information
1 parent
f1bac18
commit 96806fa
Showing
6 changed files
with
150 additions
and
106 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,6 @@ dist/ | |
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
from darshan.backend.cffi_backend import log_get_derived_metrics | ||
|
||
|
||
def log_get_bytes_bandwidth(log_path: str, mod_name: str) -> str: | ||
""" | ||
Summarize I/O performance for a given darshan module. | ||
Parameters | ||
---------- | ||
log_path : str | ||
Path to the darshan binary log file. | ||
mod_name : str | ||
Name of the darshan module to summarize the I/O | ||
performance for. | ||
Returns | ||
------- | ||
out: str | ||
A short string summarizing the performance of the given module | ||
in the provided log file, including bandwidth and total data | ||
transferred. | ||
Raises | ||
------ | ||
RuntimeError | ||
When a provided module name is not supported for the accumulator | ||
interface for provision of the summary data, or for any other | ||
error that occurs in the C/CFFI interface. | ||
ValueError | ||
When a provided module name does not exist in the log file. | ||
Examples | ||
-------- | ||
>>> from darshan.log_utils import get_log_path | ||
>>> from darshan.lib.accum import log_get_bytes_bandwidth | ||
>>> log_path = get_log_path("imbalanced-io.darshan") | ||
>>> log_get_bytes_bandwidth(log_path, "POSIX") | ||
I/O performance estimate (at the POSIX layer): transferred 101785.8 MiB at 164.99 MiB/s | ||
>>> log_get_bytes_bandwidth(log_path, "MPI-IO") | ||
I/O performance estimate (at the MPI-IO layer): transferred 126326.8 MiB at 101.58 MiB/s | ||
""" | ||
# get total bytes (in MiB) and bandwidth (in MiB/s) for | ||
# a given module -- this information was commonly reported | ||
# in the old perl-based summary reports | ||
darshan_derived_metrics = log_get_derived_metrics(log_path=log_path, | ||
mod_name=mod_name) | ||
total_mib = darshan_derived_metrics.total_bytes / 2 ** 20 | ||
total_bw = darshan_derived_metrics.agg_perf_by_slowest | ||
ret_str = f"I/O performance estimate (at the {mod_name} layer): transferred {total_mib:.1f} MiB at {total_bw:.2f} MiB/s" | ||
return ret_str |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
from darshan.lib.accum import log_get_bytes_bandwidth | ||
from darshan.log_utils import get_log_path | ||
|
||
import pytest | ||
|
||
|
||
@pytest.mark.parametrize("log_path, mod_name, expected_str", [ | ||
# the expected bytes/bandwidth strings are pasted | ||
# directly from the old perl summary reports; | ||
# exceptions noted below | ||
# in some cases we defer to darshan-parser for the expected | ||
# values; see discussion in gh-839 | ||
("imbalanced-io.darshan", | ||
"STDIO", | ||
"I/O performance estimate (at the STDIO layer): transferred 1.1 MiB at 0.01 MiB/s"), | ||
("imbalanced-io.darshan", | ||
"MPI-IO", | ||
"I/O performance estimate (at the MPI-IO layer): transferred 126326.8 MiB at 101.58 MiB/s"), | ||
# imbalanced-io.darshan does have LUSTRE data, | ||
# but it doesn't support derived metrics at time | ||
# of writing | ||
("imbalanced-io.darshan", | ||
"LUSTRE", | ||
"RuntimeError"), | ||
("imbalanced-io.darshan", | ||
"POSIX", | ||
"I/O performance estimate (at the POSIX layer): transferred 101785.8 MiB at 164.99 MiB/s"), | ||
("laytonjb_test1_id28730_6-7-43012-2131301613401632697_1.darshan", | ||
"STDIO", | ||
"I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 4.22 MiB/s"), | ||
("runtime_and_dxt_heatmaps_diagonal_write_only.darshan", | ||
"POSIX", | ||
"I/O performance estimate (at the POSIX layer): transferred 0.0 MiB at 0.02 MiB/s"), | ||
("treddy_mpi-io-test_id4373053_6-2-60198-9815401321915095332_1.darshan", | ||
"STDIO", | ||
"I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 16.47 MiB/s"), | ||
("e3sm_io_heatmap_only.darshan", | ||
"STDIO", | ||
"I/O performance estimate (at the STDIO layer): transferred 0.0 MiB at 3.26 MiB/s"), | ||
("e3sm_io_heatmap_only.darshan", | ||
"MPI-IO", | ||
"I/O performance estimate (at the MPI-IO layer): transferred 73880.2 MiB at 105.69 MiB/s"), | ||
("partial_data_stdio.darshan", | ||
"MPI-IO", | ||
"I/O performance estimate (at the MPI-IO layer): transferred 32.0 MiB at 2317.98 MiB/s"), | ||
("partial_data_stdio.darshan", | ||
"STDIO", | ||
"I/O performance estimate (at the STDIO layer): transferred 16336.0 MiB at 2999.14 MiB/s"), | ||
# the C derived metrics code can't distinguish | ||
# between different kinds of errors at this time, | ||
# but we can still intercept in some cases... | ||
("partial_data_stdio.darshan", | ||
"GARBAGE", | ||
"ValueError"), | ||
# TODO: determine if the lack of APMPI and | ||
# any other "add-ons" in _structdefs is a bug | ||
# in the control flow for `log_get_derived_metrics()`? | ||
("e3sm_io_heatmap_only.darshan", | ||
"APMPI", | ||
"KeyError"), | ||
("skew-app.darshan", | ||
"POSIX", | ||
"I/O performance estimate (at the POSIX layer): transferred 41615.8 MiB at 157.49 MiB/s"), | ||
("skew-app.darshan", | ||
"MPI-IO", | ||
"I/O performance estimate (at the MPI-IO layer): transferred 41615.8 MiB at 55.22 MiB/s"), | ||
]) | ||
def test_derived_metrics_bytes_and_bandwidth(log_path, mod_name, expected_str): | ||
# test the basic scenario of retrieving | ||
# the total data transferred and bandwidth | ||
# for all records in a given module; the situation | ||
# of accumulating drived metrics with filtering | ||
# (i.e., for a single filename) is not tested here | ||
|
||
log_path = get_log_path(log_path) | ||
if expected_str == "RuntimeError": | ||
with pytest.raises(RuntimeError, | ||
match=f"{mod_name} module does not support derived"): | ||
log_get_bytes_bandwidth(log_path=log_path, | ||
mod_name=mod_name) | ||
elif expected_str == "ValueError": | ||
with pytest.raises(ValueError, | ||
match=f"{mod_name} is not in the available log"): | ||
log_get_bytes_bandwidth(log_path=log_path, | ||
mod_name=mod_name) | ||
elif expected_str == "KeyError": | ||
with pytest.raises(KeyError, match=f"{mod_name}"): | ||
log_get_bytes_bandwidth(log_path=log_path, | ||
mod_name=mod_name) | ||
else: | ||
actual_str = log_get_bytes_bandwidth(log_path=log_path, | ||
mod_name=mod_name) | ||
assert actual_str == expected_str |