From f3e499c9ca87093ac92bc1300d567e6d20b08318 Mon Sep 17 00:00:00 2001 From: Jiri Pavela Date: Tue, 13 Aug 2024 16:05:47 +0200 Subject: [PATCH 01/14] Polish the formatting of exit codes in showdiff --- perun/profile/imports.py | 31 ++++++++++++++++--------------- perun/utils/common/diff_kit.py | 18 ++++++++++++++++-- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/perun/profile/imports.py b/perun/profile/imports.py index 6e07f56e..a8491e44 100755 --- a/perun/profile/imports.py +++ b/perun/profile/imports.py @@ -18,7 +18,7 @@ # Perun Imports from perun.collect.kperf import parser -from perun.profile import helpers as p_helpers +from perun.profile import helpers as profile_helpers from perun.logic import commands, index, pcs from perun.utils import log, streams from perun.utils.common import script_kit, common_kit @@ -55,12 +55,12 @@ class ImportedProfiles: def __init__(self, targets: list[str], import_dir: str | None, stats_info: str | None) -> None: self.import_dir: Path = Path(import_dir) if import_dir is not None else Path.cwd() # Parse the CLI stats if available - self.stats: list[p_helpers.ProfileStat] = [] + self.stats: list[profile_helpers.ProfileStat] = [] self.profiles: list[ImportProfileSpec] = [] if stats_info is not None: self.stats = [ - p_helpers.ProfileStat.from_string(*stat.split("|")) + profile_helpers.ProfileStat.from_string(*stat.split("|")) for stat in stats_info.split(",") ] @@ -78,12 +78,12 @@ def __iter__(self) -> Iterator[ImportProfileSpec]: def __len__(self) -> int: return len(self.profiles) - def get_exit_codes(self) -> str: - return ", ".join(str(p.exit_code) for p in self.profiles) + def get_exit_codes(self) -> list[int]: + return [p.exit_code for p in self.profiles] def aggregate_stats( self, agg: Callable[[list[float | int]], float] - ) -> Iterator[p_helpers.ProfileStat]: + ) -> Iterator[profile_helpers.ProfileStat]: stat_value_lists: list[list[float | int]] = [[] for _ in range(len(self.stats))] for profile in self.profiles: value_list: list[float | int] @@ -98,8 +98,8 @@ def _parse_import_csv(self, target: str) -> None: with open(self.import_dir / target, "r") as csvfile: csv_reader = csv.reader(csvfile, delimiter=",") header: list[str] = next(csv_reader) - stats: list[p_helpers.ProfileStat] = [ - p_helpers.ProfileStat.from_string(*stat_definition.split("|")) + stats: list[profile_helpers.ProfileStat] = [ + profile_helpers.ProfileStat.from_string(*stat_definition.split("|")) for stat_definition in header[2:] ] # Parse the CSV stat definition and check that they are not in conflict with the CLI @@ -122,13 +122,14 @@ def _add_imported_profile(self, target: list[str]) -> None: # Empty profile specification, warn log.warn("Empty import profile specification. Skipping.") else: - self.profiles.append( - ImportProfileSpec( - self.import_dir / target[0], - int(target[1]) if len(target) >= 2 else ImportProfileSpec.exit_code, - list(map(float, target[2:])), - ) + profile_info = ImportProfileSpec( + self.import_dir / target[0], + int(target[1]) if len(target) >= 2 else ImportProfileSpec.exit_code, + list(map(float, target[2:])), ) + if profile_info.exit_code != 0: + log.warn("Importing a profile with non-zero exit code.") + self.profiles.append(profile_info) def load_file(filepath: Path) -> str: @@ -226,7 +227,7 @@ def save_imported_profile(prof: Profile, save_to_index: bool, minor_version: Min :param minor_version: minor version corresponding to the imported profiles :param save_to_index: indication whether we should save the imported profiles to index """ - full_profile_name = p_helpers.generate_profile_name(prof) + full_profile_name = profile_helpers.generate_profile_name(prof) profile_directory = pcs.get_job_directory() full_profile_path = os.path.join(profile_directory, full_profile_name) diff --git a/perun/utils/common/diff_kit.py b/perun/utils/common/diff_kit.py index e3479a78..e4019cc6 100755 --- a/perun/utils/common/diff_kit.py +++ b/perun/utils/common/diff_kit.py @@ -79,7 +79,7 @@ def generate_header(profile: Profile) -> list[tuple[str, Any, str]]: :return: list of tuples (key and value) """ command = " ".join([profile["header"]["cmd"], profile["header"]["workload"]]).strip() - exitcode = profile["header"].get("exitcode", "?") + exitcode = _format_exit_codes(profile["header"].get("exitcode", "?")) machine_info = profile.get("machine", {}) return [ ( @@ -88,7 +88,7 @@ def generate_header(profile: Profile) -> list[tuple[str, Any, str]]: "The version control version, for which the profile was measured.", ), ("command", command, "The workload / command, for which the profile was measured."), - ("exitcode", exitcode, "The maximal exit code that was returned by underlying command."), + ("exitcode", exitcode, "The exit code that was returned by the underlying command."), ( "collector command", log.collector_to_command(profile.get("collector_info", {})), @@ -177,6 +177,20 @@ def _color_stat_value_diff( return lhs_value, rhs_value +def _format_exit_codes(exit_code: str | list[str] | list[int]) -> str: + # Unify the exit code types + exit_codes: list[str] = [] + if isinstance(exit_code, str): + exit_codes.append(exit_code) + else: + exit_codes = list(map(str, exit_code)) + # Color exit codes that are not zero + return ", ".join( + code if code == "0" else f'{code}' + for code in exit_codes + ) + + def generate_diff_of_stats( lhs_stats: list[helpers.ProfileStat], rhs_stats: list[helpers.ProfileStat] ) -> tuple[list[tuple[str, str, str]], list[tuple[str, str, str]]]: From f4754145ddfb5cc33de220fdab1a2b9b5edba311 Mon Sep 17 00:00:00 2001 From: Jiri Pavela Date: Tue, 13 Aug 2024 16:26:46 +0200 Subject: [PATCH 02/14] Rework the profile stats format and parsing The profile stats format now defines the ordering before the unit. Also, when parsing the stats and imported profiles, some leading and trailing whitespaces are now being stripped. --- perun/profile/helpers.py | 13 +++++++------ perun/profile/imports.py | 7 ++++--- perun/view_diff/report/run.py | 4 ++-- tests/sources/imports/import.csv | 4 ++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/perun/profile/helpers.py b/perun/profile/helpers.py index 062d605c..a422792e 100644 --- a/perun/profile/helpers.py +++ b/perun/profile/helpers.py @@ -620,23 +620,24 @@ class ProfileStat: } name: str - unit: str = "#" ordering: bool = True + unit: str = "#" tooltip: str = "" value: int | float = 0.0 @classmethod def from_string( cls, - name: str = "empty", - unit: str = "#", + name: str = "", ordering: str = "higher_is_better", + unit: str = "#", tooltip: str = "", *_: Any, ) -> ProfileStat: - if name == "empty": + if name == "": # Invalid stat specification, warn - perun_log.warn("Empty profile stat specification. Creating a dummy 'empty' stat.") + perun_log.warn("Empty profile stat specification. Creating a dummy '' stat.") + ordering = ordering.strip() if ordering not in cls.ALLOWED_ORDERING: # Invalid stat ordering, warn perun_log.warn( @@ -647,7 +648,7 @@ def from_string( ordering_bool = ProfileStat.ordering else: ordering_bool = cls.ALLOWED_ORDERING[ordering] - return cls(name, unit, ordering_bool, tooltip) + return cls(name, ordering_bool, unit, tooltip) def get_normalized_tooltip(self) -> str: # Find the string representation of the ordering to use in the tooltip diff --git a/perun/profile/imports.py b/perun/profile/imports.py index a8491e44..839cf18d 100755 --- a/perun/profile/imports.py +++ b/perun/profile/imports.py @@ -122,10 +122,11 @@ def _add_imported_profile(self, target: list[str]) -> None: # Empty profile specification, warn log.warn("Empty import profile specification. Skipping.") else: + # Make sure we strip the leading and trailing whitespaces in each column value profile_info = ImportProfileSpec( - self.import_dir / target[0], - int(target[1]) if len(target) >= 2 else ImportProfileSpec.exit_code, - list(map(float, target[2:])), + self.import_dir / target[0].strip(), + int(target[1].strip()) if len(target) >= 2 else ImportProfileSpec.exit_code, + list(map(lambda stat_value: float(stat_value.strip()), target[2:])), ) if profile_info.exit_code != 0: log.warn("Importing a profile with non-zero exit code.") diff --git a/perun/view_diff/report/run.py b/perun/view_diff/report/run.py index e9165cb0..9ceaf304 100755 --- a/perun/view_diff/report/run.py +++ b/perun/view_diff/report/run.py @@ -533,8 +533,8 @@ def process_traces( Config().profile_stats[profile_type].append( profile_helpers.ProfileStat( f"Overall {name}", - unit, False, + unit, f"The overall value of the {name} for the root value", max_samples[key], ) @@ -542,8 +542,8 @@ def process_traces( Config().profile_stats[profile_type].append( profile_helpers.ProfileStat( "Maximum Trace Length", - "#", False, + "#", "Maximum length of the trace in the profile", max_trace, ) diff --git a/tests/sources/imports/import.csv b/tests/sources/imports/import.csv index 800ae203..df52bf0c 100644 --- a/tests/sources/imports/import.csv +++ b/tests/sources/imports/import.csv @@ -1,3 +1,3 @@ -#Stackfile,Exit_code,bogo-ops-per-second-real-time|bogo-ops-per-second|higher_is_better,stat2|stat2_unit|stat2_direction -import.stack.gz,0,18511.379883,956.36 +#Stackfile,Exit_code,bogo-ops-per-second-real-time| higher_is_better|bogo-ops-per-second ,stat2|stat2_direction|stat2_unit +import.stack.gz, 0, 18511.379883,956.36 import.stack,0,17894.875698,897.01 \ No newline at end of file From a02bf246881889d16647414f8aaa9364c2d264cc Mon Sep 17 00:00:00 2001 From: Jiri Pavela Date: Wed, 4 Sep 2024 15:24:12 +0200 Subject: [PATCH 03/14] Add support for non-numeric profile stats --- perun/cli_groups/import_cli.py | 2 +- perun/profile/helpers.py | 52 +----- perun/profile/imports.py | 40 ++--- perun/profile/meson.build | 1 + perun/profile/stats.py | 284 +++++++++++++++++++++++++++++++++ perun/utils/common/diff_kit.py | 105 +++++++----- perun/view_diff/report/run.py | 20 +-- 7 files changed, 378 insertions(+), 126 deletions(-) create mode 100644 perun/profile/stats.py diff --git a/perun/cli_groups/import_cli.py b/perun/cli_groups/import_cli.py index de282661..79998062 100755 --- a/perun/cli_groups/import_cli.py +++ b/perun/cli_groups/import_cli.py @@ -41,7 +41,7 @@ "-t", nargs=1, default=None, - metavar="", + metavar="[STAT]", help="Describes the stats associated with the imported profiles. Please see the import " "documentation for details regarding the stat description format.", ) diff --git a/perun/profile/helpers.py b/perun/profile/helpers.py index a422792e..a9ffec05 100644 --- a/perun/profile/helpers.py +++ b/perun/profile/helpers.py @@ -17,13 +17,12 @@ from __future__ import annotations # Standard Imports -from typing import Any, TYPE_CHECKING, ClassVar import json import operator import os import re import time -from dataclasses import dataclass +from typing import Any, TYPE_CHECKING # Third-Party Imports @@ -610,52 +609,3 @@ def is_compatible_with_profile(self, profile: profiles.Profile) -> bool: "checksum", "source", ] - - -@dataclass -class ProfileStat: - ALLOWED_ORDERING: ClassVar[dict[str, bool]] = { - "higher_is_better": True, - "lower_is_better": False, - } - - name: str - ordering: bool = True - unit: str = "#" - tooltip: str = "" - value: int | float = 0.0 - - @classmethod - def from_string( - cls, - name: str = "", - ordering: str = "higher_is_better", - unit: str = "#", - tooltip: str = "", - *_: Any, - ) -> ProfileStat: - if name == "": - # Invalid stat specification, warn - perun_log.warn("Empty profile stat specification. Creating a dummy '' stat.") - ordering = ordering.strip() - if ordering not in cls.ALLOWED_ORDERING: - # Invalid stat ordering, warn - perun_log.warn( - f"Unknown stat ordering: {ordering}. Please choose one of " - f"({', '.join(cls.ALLOWED_ORDERING.keys())}). " - f"Using the default stat ordering value." - ) - ordering_bool = ProfileStat.ordering - else: - ordering_bool = cls.ALLOWED_ORDERING[ordering] - return cls(name, ordering_bool, unit, tooltip) - - def get_normalized_tooltip(self) -> str: - # Find the string representation of the ordering to use in the tooltip - ordering: str = "" - for str_desc, bool_repr in self.ALLOWED_ORDERING.items(): - if bool_repr == self.ordering: - ordering = str_desc.replace("_", " ") - if self.tooltip: - return f"{self.tooltip} ({ordering})" - return ordering diff --git a/perun/profile/imports.py b/perun/profile/imports.py index 839cf18d..6cff9a67 100755 --- a/perun/profile/imports.py +++ b/perun/profile/imports.py @@ -4,27 +4,26 @@ # Standard Imports from collections import defaultdict -from dataclasses import dataclass, field, asdict -from pathlib import Path -from typing import Any, Optional, Iterator, Callable import csv +from dataclasses import dataclass, field, asdict +import gzip import json import os -import statistics +from pathlib import Path import subprocess +from typing import Any, Optional, Iterator # Third-Party Imports -import gzip # Perun Imports from perun.collect.kperf import parser -from perun.profile import helpers as profile_helpers from perun.logic import commands, index, pcs +from perun.profile import helpers as profile_helpers, stats as profile_stats +from perun.profile.factory import Profile from perun.utils import log, streams from perun.utils.common import script_kit, common_kit from perun.utils.external import commands as external_commands, environment from perun.utils.structs import MinorVersion -from perun.profile.factory import Profile from perun.vcs import vcs_kit @@ -37,7 +36,7 @@ class ImportProfileSpec: path: Path exit_code: int = 0 - values: list[float] = field(default_factory=list) + values: list[float | str] = field(default_factory=list) class ImportedProfiles: @@ -55,12 +54,12 @@ class ImportedProfiles: def __init__(self, targets: list[str], import_dir: str | None, stats_info: str | None) -> None: self.import_dir: Path = Path(import_dir) if import_dir is not None else Path.cwd() # Parse the CLI stats if available - self.stats: list[profile_helpers.ProfileStat] = [] + self.stats: list[profile_stats.ProfileStat] = [] self.profiles: list[ImportProfileSpec] = [] if stats_info is not None: self.stats = [ - profile_helpers.ProfileStat.from_string(*stat.split("|")) + profile_stats.ProfileStat.from_string(*stat.split("|")) for stat in stats_info.split(",") ] @@ -81,25 +80,26 @@ def __len__(self) -> int: def get_exit_codes(self) -> list[int]: return [p.exit_code for p in self.profiles] - def aggregate_stats( - self, agg: Callable[[list[float | int]], float] - ) -> Iterator[profile_helpers.ProfileStat]: - stat_value_lists: list[list[float | int]] = [[] for _ in range(len(self.stats))] + def aggregate_stats(self) -> Iterator[profile_stats.ProfileStat]: + stat_value_lists: list[list[float | str]] = [[] for _ in range(len(self.stats))] for profile in self.profiles: - value_list: list[float | int] - stat_value: float | int + value_list: list[float | str] + stat_value: float | str for value_list, stat_value in zip(stat_value_lists, profile.values): value_list.append(stat_value) for value_list, stat_obj in zip(stat_value_lists, self.stats): - stat_obj.value = agg(value_list) + if len(value_list) == 1: + stat_obj.value = value_list[0] + else: + stat_obj.value = value_list yield stat_obj def _parse_import_csv(self, target: str) -> None: with open(self.import_dir / target, "r") as csvfile: csv_reader = csv.reader(csvfile, delimiter=",") header: list[str] = next(csv_reader) - stats: list[profile_helpers.ProfileStat] = [ - profile_helpers.ProfileStat.from_string(*stat_definition.split("|")) + stats: list[profile_stats.ProfileStat] = [ + profile_stats.ProfileStat.from_string(*stat_definition.split("|")) for stat_definition in header[2:] ] # Parse the CSV stat definition and check that they are not in conflict with the CLI @@ -192,7 +192,7 @@ def import_perf_profile( ) prof.update({"origin": minor_version.checksum}) prof.update({"machine": get_machine_info(machine_info)}) - prof.update({"stats": [asdict(stat) for stat in profiles.aggregate_stats(statistics.median)]}), + prof.update({"stats": [asdict(stat) for stat in profiles.aggregate_stats()]}), prof.update( { "header": { diff --git a/perun/profile/meson.build b/perun/profile/meson.build index e4fe1166..ba4baac8 100644 --- a/perun/profile/meson.build +++ b/perun/profile/meson.build @@ -7,6 +7,7 @@ perun_profile_files = files( 'helpers.py', 'imports.py', 'query.py', + 'stats.py', ) py3.install_sources( diff --git a/perun/profile/stats.py b/perun/profile/stats.py new file mode 100644 index 00000000..a7bca3d4 --- /dev/null +++ b/perun/profile/stats.py @@ -0,0 +1,284 @@ +from __future__ import annotations + +# Standard Imports +from collections import Counter +import dataclasses +import enum +import statistics +from typing import Any, Protocol, Iterable, ClassVar, Union, cast + +# Third-Party Imports + +# Perun Imports +from perun.utils import log as perun_log + + +class ProfileStatOrdering(str, enum.Enum): + # The class is derived from str so that ProfileStat serialization using asdict() works properly + AUTO = "auto" + HIGHER = "higher_is_better" + LOWER = "lower_is_better" + EQUALITY = "equality" + + @staticmethod + def supported() -> set[str]: + return {ordering.value for ordering in ProfileStatOrdering} + + @staticmethod + def default() -> ProfileStatOrdering: + return ProfileStatOrdering.AUTO + + +class StatComparisonResult(enum.Enum): + EQUAL = 1 + UNEQUAL = 2 + BASELINE_BETTER = 3 + TARGET_BETTER = 4 + INVALID = 5 + + +@dataclasses.dataclass +class ProfileStat: + # TODO: add the aggregate key + name: str + ordering: ProfileStatOrdering = ProfileStatOrdering.default() + unit: str = "#" + tooltip: str = "" + value: object = "" + + @classmethod + def from_string( + cls, + name: str = "", + ordering: str = "", + unit: str = "#", + tooltip: str = "", + *_: Any, + ) -> ProfileStat: + if name == "": + # Invalid stat specification, warn + perun_log.warn("Empty profile stat specification. Creating a dummy '' stat.") + ordering_enum = cls._convert_ordering(ordering) + return cls(name, ordering_enum, unit, tooltip) + + @classmethod + def from_profile(cls, stat: dict[str, Any]) -> ProfileStat: + stat["ordering"] = cls._convert_ordering(stat.get("ordering", "")) + return cls(**stat) + + @staticmethod + def _convert_ordering(ordering: str) -> ProfileStatOrdering: + if not ordering: + return ProfileStatOrdering.default() + try: + return ProfileStatOrdering(ordering.strip()) + except ValueError: + # Invalid stat ordering, warn + perun_log.warn( + f"Unknown stat ordering: {ordering}. Using the default stat ordering value instead." + f" Please choose one of ({', '.join(ProfileStatOrdering.supported())})." + ) + return ProfileStatOrdering.default() + + +class ProfileStatAggregation(Protocol): + _SUPPORTED_KEYS: ClassVar[set[str]] = set() + _DEFAULT_KEY: ClassVar[str] = "" + + def get_value(self, key: str = _DEFAULT_KEY) -> Any: + if key not in self._SUPPORTED_KEYS: + perun_log.warn( + f"{self.__class__.__name__}: Unknown value key '{key}'. " + f"Using the default key '{self._DEFAULT_KEY}' instead." + ) + return getattr(self, self._DEFAULT_KEY) + return getattr(self, key) + + def infer_auto_ordering(self, ordering: ProfileStatOrdering) -> ProfileStatOrdering: ... + + # header, table + def as_table( + self, key: str = _DEFAULT_KEY + ) -> tuple[str | float | tuple[str, int], dict[str, Any]]: ... + + +@dataclasses.dataclass +class SingleValue(ProfileStatAggregation): + value: str | float = "" + + def get_value(self, _: str = "") -> str | float: + return self.value + + def infer_auto_ordering(self, ordering: ProfileStatOrdering) -> ProfileStatOrdering: + if ordering != ProfileStatOrdering.AUTO: + return ordering + if isinstance(self.value, str): + return ProfileStatOrdering.EQUALITY + return ProfileStatOrdering.HIGHER + + def as_table(self, _: str = "") -> tuple[str | float, dict[str, str | float]]: + # There are no details of a single value to generate into a table + return self.value, {} + + +@dataclasses.dataclass +class StatisticalSummary(ProfileStatAggregation): + _SUPPORTED_KEYS: ClassVar[set[str]] = { + "min", + "p10", + "p25", + "median", + "p75", + "p90", + "max", + "mean", + } + _DEFAULT_KEY: ClassVar[str] = "median" + + min: float = 0.0 + p10: float = 0.0 + p25: float = 0.0 + median: float = 0.0 + p75: float = 0.0 + p90: float = 0.0 + max: float = 0.0 + mean: float = 0.0 + + @classmethod + def from_values(cls, values: Iterable[float]) -> StatisticalSummary: + # We assume there aren't too many values so that multiple passes of the list don't matter + # too much. If this becomes a bottleneck, we can use pandas describe() instead. + values = list(values) + quantiles = statistics.quantiles(values, n=20, method="inclusive") + return cls( + float(min(values)), + quantiles[2], # p10 + quantiles[5], # p25 + quantiles[10], # p50 + quantiles[15], # p75 + quantiles[18], # p90 + float(max(values)), + statistics.mean(values), + ) + + def infer_auto_ordering(self, ordering: ProfileStatOrdering) -> ProfileStatOrdering: + if ordering != ProfileStatOrdering.AUTO: + return ordering + return ProfileStatOrdering.HIGHER + + def as_table(self, key: str = _DEFAULT_KEY) -> tuple[float, dict[str, float]]: + return self.get_value(key), dataclasses.asdict(self) + + +@dataclasses.dataclass +class StringCollection(ProfileStatAggregation): + _SUPPORTED_KEYS: ClassVar[set[str]] = { + "total", + "unique", + "min_count", + "max_count", + "counts", + "sequence", + } + _DEFAULT_KEY: ClassVar[str] = "unique" + + sequence: list[str] = dataclasses.field(default_factory=list) + counts: Counter[str] = dataclasses.field(init=False) + + def __post_init__(self) -> None: + self.counts = Counter(self.sequence) + + @property + def unique(self) -> int: + return len(self.counts) + + @property + def total(self) -> int: + return len(self.sequence) + + @property + def min_count(self) -> tuple[str, int]: + return self.counts.most_common()[-1] + + @property + def max_count(self) -> tuple[str, int]: + return self.counts.most_common()[0] + + def infer_auto_ordering(self, ordering: ProfileStatOrdering) -> ProfileStatOrdering: + if ordering != ProfileStatOrdering.AUTO: + return ordering + return ProfileStatOrdering.EQUALITY + + def as_table( + self, key: str = _DEFAULT_KEY + ) -> tuple[int | str | tuple[str, int], dict[str, int] | dict[str, str]]: + header: str | int | tuple[str, int] + if key in ("counts", "sequence"): + # The Counter and list objects are not suitable for direct printing in a table. + header = f"<{key}>" + else: + # A little type hinting help. The list and Counter types have already been covered. + header = cast(Union[str, int, tuple[str, int]], self.get_value(key)) + if key == "sequence": + # The 'sequence' key table format is a bit different from the rest. + return header, {f"{idx}.": value for idx, value in enumerate(self.sequence)} + return header, self.counts + + +def aggregate_stats(stat: ProfileStat) -> ProfileStatAggregation: + if isinstance(stat.value, (str, float, int)): + return SingleValue(stat.value) + if isinstance(stat.value, Iterable): + # Iterable types are converted to a list + values = list(stat.value) + if len(values) == 0: + perun_log.warn(f"ProfileStat aggregation: Missing value of stat '{stat.name}'") + return SingleValue() + elif len(values) == 1: + return SingleValue(values[0]) + elif all(isinstance(value, (int, float)) for value in values): + # All values are integers or floats + return StatisticalSummary.from_values(map(float, values)) + else: + # Even heterogeneous lists will be aggregated as lists of strings + return StringCollection(list(map(str, values))) + perun_log.warn( + f"ProfileStat aggregation: Unknown type '{type(stat.value)}' of stat '{stat.name}'" + ) + return SingleValue() + + +def compare_stats( + stat: ProfileStatAggregation, + other_stat: ProfileStatAggregation, + key: str, + ordering: ProfileStatOrdering, +) -> StatComparisonResult: + value, other_value = stat.get_value(key), other_stat.get_value(key) + # Handle auto ordering according to the aggregation type + ordering = stat.infer_auto_ordering(ordering) + if type(stat) is not type(other_stat): + # Invalid comparison attempt + perun_log.warn( + f"Invalid comparison of {stat.__class__.__name__} and {other_stat.__class__.__name__}." + ) + return StatComparisonResult.INVALID + if value == other_value: + # The values are the same, the result is the same regardless of the ordering used + return StatComparisonResult.EQUAL + if ordering == ProfileStatOrdering.EQUALITY: + # The values are different and we compare for equality + return StatComparisonResult.UNEQUAL + elif value > other_value: + return ( + StatComparisonResult.BASELINE_BETTER + if ordering == ProfileStatOrdering.HIGHER + else StatComparisonResult.TARGET_BETTER + ) + elif value < other_value: + return ( + StatComparisonResult.BASELINE_BETTER + if ordering == ProfileStatOrdering.LOWER + else StatComparisonResult.TARGET_BETTER + ) + return StatComparisonResult.UNEQUAL diff --git a/perun/utils/common/diff_kit.py b/perun/utils/common/diff_kit.py index e4019cc6..0c998721 100755 --- a/perun/utils/common/diff_kit.py +++ b/perun/utils/common/diff_kit.py @@ -2,18 +2,19 @@ from __future__ import annotations - # Standard Imports -from typing import Any, Optional, Iterable, Literal import difflib import os +from typing import Any, Optional, Iterable, Literal # Third-Party Imports # Perun Imports from perun.profile import helpers from perun.profile.factory import Profile +from perun.profile import stats as pstats from perun.utils import log +from perun.utils.common.common_kit import ColorChoiceType def save_diff_view( @@ -124,7 +125,7 @@ def diff_to_html(diff: list[str], start_tag: Literal["+", "-"]) -> str: :param diff: diff computed by difflib.ndiff :param start_tag: starting point of the tag """ - tag_to_color = { + tag_to_color: dict[str, ColorChoiceType] = { "+": "green", "-": "red", } @@ -133,15 +134,14 @@ def diff_to_html(diff: list[str], start_tag: Literal["+", "-"]) -> str: if chunk.startswith(" "): result.append(chunk[2:]) if chunk.startswith(start_tag): - result.append( - f'{chunk[2:]}' - ) + result.append(_emphasize(tag_to_color.get(start_tag, "grey"), chunk[2:])) return " ".join(result) def _color_stat_value_diff( - lhs_stat: helpers.ProfileStat, rhs_stat: helpers.ProfileStat + lhs_stat_agg: pstats.ProfileStatAggregation, + rhs_stat_agg: pstats.ProfileStatAggregation, + ordering: pstats.ProfileStatOrdering, ) -> tuple[str, str]: """Color the stat values on the LHS and RHS according to their difference. @@ -151,30 +151,25 @@ def _color_stat_value_diff( :param rhs_stat: a stat from the target :return: colored LHS and RHS stat values """ - # Map the colors based on the value ordering - color_map: dict[bool, str] = { - lhs_stat.ordering: "red", - not lhs_stat.ordering: "green", + comparison_result = pstats.compare_stats( + lhs_stat_agg, rhs_stat_agg, lhs_stat_agg._DEFAULT_KEY, ordering + ) + color_map: dict[pstats.StatComparisonResult, tuple[ColorChoiceType, ColorChoiceType]] = { + pstats.StatComparisonResult.INVALID: ("red", "red"), + pstats.StatComparisonResult.UNEQUAL: ("red", "red"), + pstats.StatComparisonResult.EQUAL: ("black", "black"), + pstats.StatComparisonResult.BASELINE_BETTER: ("green", "red"), + pstats.StatComparisonResult.TARGET_BETTER: ("red", "green"), } - lhs_value, rhs_value = str(lhs_stat.value), str(rhs_stat.value) - if lhs_stat.ordering != rhs_stat.ordering: - # Conflicting ordering in baseline and target, do not compare - log.warn( - f"Profile stats '{lhs_stat.name}' have conflicting ordering in baseline and target." - f" The stats will not be compared." - ) - elif lhs_value != rhs_value: - # Different stat values, color them - is_lhs_lower = lhs_stat.value < rhs_stat.value - lhs_value = ( - f'{lhs_value}' - ) - rhs_value = ( - f'{rhs_value}' + + baseline_color, target_color = color_map[comparison_result] + if comparison_result == pstats.StatComparisonResult.INVALID: + baseline_value, target_value = "", "" + else: + baseline_value, target_value = str(lhs_stat_agg.as_table()[0]), str( + rhs_stat_agg.as_table()[0] ) - return lhs_value, rhs_value + return _emphasize(baseline_color, baseline_value), _emphasize(target_color, target_value) def _format_exit_codes(exit_code: str | list[str] | list[int]) -> str: @@ -185,14 +180,11 @@ def _format_exit_codes(exit_code: str | list[str] | list[int]) -> str: else: exit_codes = list(map(str, exit_code)) # Color exit codes that are not zero - return ", ".join( - code if code == "0" else f'{code}' - for code in exit_codes - ) + return ", ".join(code if code == "0" else _emphasize("red", code) for code in exit_codes) def generate_diff_of_stats( - lhs_stats: list[helpers.ProfileStat], rhs_stats: list[helpers.ProfileStat] + lhs_stats: list[pstats.ProfileStat], rhs_stats: list[pstats.ProfileStat] ) -> tuple[list[tuple[str, str, str]], list[tuple[str, str, str]]]: """Re-generate the stats with CSS diff styles suitable for an output. @@ -203,32 +195,48 @@ def generate_diff_of_stats( """ # Get all the stats that occur in either lhs or rhs and match those that exist in both - stats_map: dict[str, dict[str, helpers.ProfileStat]] = {} + stats_map: dict[str, dict[str, pstats.ProfileStat]] = {} for stat_source, stat_list in [("lhs", lhs_stats), ("rhs", rhs_stats)]: for stat in stat_list: stats_map.setdefault(stat.name, {})[stat_source] = stat # Iterate the stats and format them according to their diffs lhs_diff, rhs_diff = [], [] for stat_key in sorted(stats_map.keys()): - lhs_stat: helpers.ProfileStat | None = stats_map[stat_key].get("lhs", None) - rhs_stat: helpers.ProfileStat | None = stats_map[stat_key].get("rhs", None) - lhs_tooltip = lhs_stat.get_normalized_tooltip() if lhs_stat is not None else "" - rhs_tooltip = rhs_stat.get_normalized_tooltip() if rhs_stat is not None else "" + lhs_stat: pstats.ProfileStat | None = stats_map[stat_key].get("lhs", None) + rhs_stat: pstats.ProfileStat | None = stats_map[stat_key].get("rhs", None) if rhs_stat and lhs_stat is None: # There is no matching stat on the LHS + rhs_stat_agg = pstats.aggregate_stats(rhs_stat) + rhs_tooltip = normalize_stat_tooltip( + rhs_stat.tooltip, rhs_stat_agg.infer_auto_ordering(rhs_stat.ordering) + ) lhs_diff.append((f"{rhs_stat.name} [{rhs_stat.unit}]", "-", rhs_tooltip)) rhs_diff.append( - (f"{rhs_stat.name} [{rhs_stat.unit}]", str(rhs_stat.value), rhs_tooltip) + (f"{rhs_stat.name} [{rhs_stat.unit}]", str(rhs_stat_agg.as_table()[0]), rhs_tooltip) ) elif lhs_stat and rhs_stat is None: # There is no matching stat on the RHS + lhs_stat_agg = pstats.aggregate_stats(lhs_stat) + lhs_tooltip = normalize_stat_tooltip( + lhs_stat.tooltip, lhs_stat_agg.infer_auto_ordering(lhs_stat.ordering) + ) lhs_diff.append( - (f"{lhs_stat.name} [{lhs_stat.unit}]", str(lhs_stat.value), lhs_tooltip) + (f"{lhs_stat.name} [{lhs_stat.unit}]", str(lhs_stat_agg.as_table()[0]), lhs_tooltip) ) rhs_diff.append((f"{lhs_stat.name} [{lhs_stat.unit}]", "-", lhs_tooltip)) elif lhs_stat and rhs_stat: # The stat is present on both LHS and RHS - lhs_value, rhs_value = _color_stat_value_diff(lhs_stat, rhs_stat) + lhs_stat_agg = pstats.aggregate_stats(lhs_stat) + rhs_stat_agg = pstats.aggregate_stats(rhs_stat) + rhs_tooltip = normalize_stat_tooltip( + rhs_stat.tooltip, rhs_stat_agg.infer_auto_ordering(rhs_stat.ordering) + ) + lhs_tooltip = normalize_stat_tooltip( + lhs_stat.tooltip, lhs_stat_agg.infer_auto_ordering(lhs_stat.ordering) + ) + lhs_value, rhs_value = _color_stat_value_diff( + lhs_stat_agg, rhs_stat_agg, lhs_stat.ordering + ) lhs_diff.append((f"{lhs_stat.name} [{lhs_stat.unit}]", lhs_value, lhs_tooltip)) rhs_diff.append((f"{rhs_stat.name} [{rhs_stat.unit}]", rhs_value, rhs_tooltip)) return lhs_diff, rhs_diff @@ -251,7 +259,7 @@ def generate_diff_of_headers( ) if lhs_value != rhs_value: diff = list(difflib.ndiff(str(lhs_value).split(), str(rhs_value).split())) - key = f'{lhs_key}' + key = _emphasize("red", lhs_key) lhs_diff.append((key, diff_to_html(diff, "-"), lhs_info)) rhs_diff.append((key, diff_to_html(diff, "+"), lhs_info)) else: @@ -290,3 +298,12 @@ def generate_metadata( [(k, v, "") for k, v in rhs_profile.get("metadata", {}).items()], key=lambda x: x[0] ) return generate_diff_of_headers(lhs_metadata, rhs_metadata) + + +def normalize_stat_tooltip(tooltip: str, ordering: pstats.ProfileStatOrdering) -> str: + ordering_str: str = f'[{ordering.value.replace("_", " ")}]' + return f"{tooltip} {ordering_str}" + + +def _emphasize(color: ColorChoiceType, value: str) -> str: + return f'{value}' diff --git a/perun/view_diff/report/run.py b/perun/view_diff/report/run.py index 9ceaf304..25391f80 100755 --- a/perun/view_diff/report/run.py +++ b/perun/view_diff/report/run.py @@ -15,19 +15,19 @@ from __future__ import annotations # Standard Imports -from operator import itemgetter -from typing import Any, Literal, Type, Callable +import array from collections import defaultdict from dataclasses import dataclass -import array +from operator import itemgetter import sys +from typing import Any, Literal, Type, Callable # Third-Party Imports import click # Perun Imports from perun.logic import config -from perun.profile import convert, helpers as profile_helpers +from perun.profile import convert, stats as profile_stats from perun.profile.factory import Profile from perun.templates import filters, factory as templates from perun.utils import log, mapping @@ -71,7 +71,7 @@ def __init__(self) -> None: self.max_seen_trace: int = 0 self.max_per_resource: dict[str, float] = defaultdict(float) self.minimize: bool = False - self.profile_stats: dict[str, list[profile_helpers.ProfileStat]] = { + self.profile_stats: dict[str, list[profile_stats.ProfileStat]] = { "baseline": [], "target": [], } @@ -531,18 +531,18 @@ def process_traces( name.rstrip() unit = unit.rsplit("]", maxsplit=1)[0] Config().profile_stats[profile_type].append( - profile_helpers.ProfileStat( + profile_stats.ProfileStat( f"Overall {name}", - False, + profile_stats.ProfileStatOrdering.LOWER, unit, f"The overall value of the {name} for the root value", max_samples[key], ) ) Config().profile_stats[profile_type].append( - profile_helpers.ProfileStat( + profile_stats.ProfileStat( "Maximum Trace Length", - False, + profile_stats.ProfileStatOrdering.LOWER, "#", "Maximum length of the trace in the profile", max_trace, @@ -550,7 +550,7 @@ def process_traces( ) Config().max_seen_trace = max(max_trace, Config().max_seen_trace) for stat in profile.get("stats", []): - Config().profile_stats[profile_type].append(profile_helpers.ProfileStat(**stat)) + Config().profile_stats[profile_type].append(profile_stats.ProfileStat.from_profile(stat)) def generate_trace_stats(graph: Graph) -> dict[str, list[TraceStat]]: From 91d769a3c369d9d7fbd612a885c5063f517186a5 Mon Sep 17 00:00:00 2001 From: Jiri Pavela Date: Sat, 7 Sep 2024 16:32:40 +0200 Subject: [PATCH 04/14] Add support for specifying ProfileStat aggregate key --- perun/profile/stats.py | 38 +++++---- perun/utils/common/diff_kit.py | 143 ++++++++++++++------------------- perun/view_diff/report/run.py | 8 +- 3 files changed, 88 insertions(+), 101 deletions(-) diff --git a/perun/profile/stats.py b/perun/profile/stats.py index a7bca3d4..408363fd 100644 --- a/perun/profile/stats.py +++ b/perun/profile/stats.py @@ -39,27 +39,28 @@ class StatComparisonResult(enum.Enum): @dataclasses.dataclass class ProfileStat: - # TODO: add the aggregate key name: str ordering: ProfileStatOrdering = ProfileStatOrdering.default() unit: str = "#" + aggregate_by: str = "" tooltip: str = "" value: object = "" @classmethod def from_string( cls, - name: str = "", + name: str = "[empty]", ordering: str = "", unit: str = "#", + aggregate_by: str = "", tooltip: str = "", *_: Any, ) -> ProfileStat: - if name == "": + if name == "[empty]": # Invalid stat specification, warn - perun_log.warn("Empty profile stat specification. Creating a dummy '' stat.") + perun_log.warn("Empty profile stat specification. Creating a dummy '[empty]' stat.") ordering_enum = cls._convert_ordering(ordering) - return cls(name, ordering_enum, unit, tooltip) + return cls(name, ordering_enum, unit, aggregate_by, tooltip) @classmethod def from_profile(cls, stat: dict[str, Any]) -> ProfileStat: @@ -85,14 +86,19 @@ class ProfileStatAggregation(Protocol): _SUPPORTED_KEYS: ClassVar[set[str]] = set() _DEFAULT_KEY: ClassVar[str] = "" - def get_value(self, key: str = _DEFAULT_KEY) -> Any: + def normalize_aggregate_key(self, key: str = _DEFAULT_KEY) -> str: if key not in self._SUPPORTED_KEYS: - perun_log.warn( - f"{self.__class__.__name__}: Unknown value key '{key}'. " - f"Using the default key '{self._DEFAULT_KEY}' instead." - ) - return getattr(self, self._DEFAULT_KEY) - return getattr(self, key) + if key: + # A key was provided, but it is an invalid one + perun_log.warn( + f"{self.__class__.__name__}: Invalid aggregate key '{key}'. " + f"Using the default key '{self._DEFAULT_KEY}' instead." + ) + key = self._DEFAULT_KEY + return key + + def get_value(self, key: str = _DEFAULT_KEY) -> Any: + return getattr(self, self.normalize_aggregate_key(key)) def infer_auto_ordering(self, ordering: ProfileStatOrdering) -> ProfileStatOrdering: ... @@ -104,10 +110,10 @@ def as_table( @dataclasses.dataclass class SingleValue(ProfileStatAggregation): - value: str | float = "" + _SUPPORTED_KEYS: ClassVar[set[str]] = {"value"} + _DEFAULT_KEY = "value" - def get_value(self, _: str = "") -> str | float: - return self.value + value: str | float = "[missing]" def infer_auto_ordering(self, ordering: ProfileStatOrdering) -> ProfileStatOrdering: if ordering != ProfileStatOrdering.AUTO: @@ -215,7 +221,7 @@ def as_table( header: str | int | tuple[str, int] if key in ("counts", "sequence"): # The Counter and list objects are not suitable for direct printing in a table. - header = f"<{key}>" + header = f"[{key}]" else: # A little type hinting help. The list and Counter types have already been covered. header = cast(Union[str, int, tuple[str, int]], self.get_value(key)) diff --git a/perun/utils/common/diff_kit.py b/perun/utils/common/diff_kit.py index 0c998721..2aad25a4 100755 --- a/perun/utils/common/diff_kit.py +++ b/perun/utils/common/diff_kit.py @@ -138,51 +138,6 @@ def diff_to_html(diff: list[str], start_tag: Literal["+", "-"]) -> str: return " ".join(result) -def _color_stat_value_diff( - lhs_stat_agg: pstats.ProfileStatAggregation, - rhs_stat_agg: pstats.ProfileStatAggregation, - ordering: pstats.ProfileStatOrdering, -) -> tuple[str, str]: - """Color the stat values on the LHS and RHS according to their difference. - - The color is determined by the stat ordering and the result of the stat values comparison. - - :param lhs_stat: a stat from the baseline - :param rhs_stat: a stat from the target - :return: colored LHS and RHS stat values - """ - comparison_result = pstats.compare_stats( - lhs_stat_agg, rhs_stat_agg, lhs_stat_agg._DEFAULT_KEY, ordering - ) - color_map: dict[pstats.StatComparisonResult, tuple[ColorChoiceType, ColorChoiceType]] = { - pstats.StatComparisonResult.INVALID: ("red", "red"), - pstats.StatComparisonResult.UNEQUAL: ("red", "red"), - pstats.StatComparisonResult.EQUAL: ("black", "black"), - pstats.StatComparisonResult.BASELINE_BETTER: ("green", "red"), - pstats.StatComparisonResult.TARGET_BETTER: ("red", "green"), - } - - baseline_color, target_color = color_map[comparison_result] - if comparison_result == pstats.StatComparisonResult.INVALID: - baseline_value, target_value = "", "" - else: - baseline_value, target_value = str(lhs_stat_agg.as_table()[0]), str( - rhs_stat_agg.as_table()[0] - ) - return _emphasize(baseline_color, baseline_value), _emphasize(target_color, target_value) - - -def _format_exit_codes(exit_code: str | list[str] | list[int]) -> str: - # Unify the exit code types - exit_codes: list[str] = [] - if isinstance(exit_code, str): - exit_codes.append(exit_code) - else: - exit_codes = list(map(str, exit_code)) - # Color exit codes that are not zero - return ", ".join(code if code == "0" else _emphasize("red", code) for code in exit_codes) - - def generate_diff_of_stats( lhs_stats: list[pstats.ProfileStat], rhs_stats: list[pstats.ProfileStat] ) -> tuple[list[tuple[str, str, str]], list[tuple[str, str, str]]]: @@ -193,7 +148,6 @@ def generate_diff_of_stats( :return: collection of LHS and RHS stats (stat-name [stat-unit], stat-value, stat-tooltip) with CSS styles that reflect the stat diffs. """ - # Get all the stats that occur in either lhs or rhs and match those that exist in both stats_map: dict[str, dict[str, pstats.ProfileStat]] = {} for stat_source, stat_list in [("lhs", lhs_stats), ("rhs", rhs_stats)]: @@ -204,41 +158,8 @@ def generate_diff_of_stats( for stat_key in sorted(stats_map.keys()): lhs_stat: pstats.ProfileStat | None = stats_map[stat_key].get("lhs", None) rhs_stat: pstats.ProfileStat | None = stats_map[stat_key].get("rhs", None) - if rhs_stat and lhs_stat is None: - # There is no matching stat on the LHS - rhs_stat_agg = pstats.aggregate_stats(rhs_stat) - rhs_tooltip = normalize_stat_tooltip( - rhs_stat.tooltip, rhs_stat_agg.infer_auto_ordering(rhs_stat.ordering) - ) - lhs_diff.append((f"{rhs_stat.name} [{rhs_stat.unit}]", "-", rhs_tooltip)) - rhs_diff.append( - (f"{rhs_stat.name} [{rhs_stat.unit}]", str(rhs_stat_agg.as_table()[0]), rhs_tooltip) - ) - elif lhs_stat and rhs_stat is None: - # There is no matching stat on the RHS - lhs_stat_agg = pstats.aggregate_stats(lhs_stat) - lhs_tooltip = normalize_stat_tooltip( - lhs_stat.tooltip, lhs_stat_agg.infer_auto_ordering(lhs_stat.ordering) - ) - lhs_diff.append( - (f"{lhs_stat.name} [{lhs_stat.unit}]", str(lhs_stat_agg.as_table()[0]), lhs_tooltip) - ) - rhs_diff.append((f"{lhs_stat.name} [{lhs_stat.unit}]", "-", lhs_tooltip)) - elif lhs_stat and rhs_stat: - # The stat is present on both LHS and RHS - lhs_stat_agg = pstats.aggregate_stats(lhs_stat) - rhs_stat_agg = pstats.aggregate_stats(rhs_stat) - rhs_tooltip = normalize_stat_tooltip( - rhs_stat.tooltip, rhs_stat_agg.infer_auto_ordering(rhs_stat.ordering) - ) - lhs_tooltip = normalize_stat_tooltip( - lhs_stat.tooltip, lhs_stat_agg.infer_auto_ordering(lhs_stat.ordering) - ) - lhs_value, rhs_value = _color_stat_value_diff( - lhs_stat_agg, rhs_stat_agg, lhs_stat.ordering - ) - lhs_diff.append((f"{lhs_stat.name} [{lhs_stat.unit}]", lhs_value, lhs_tooltip)) - rhs_diff.append((f"{rhs_stat.name} [{rhs_stat.unit}]", rhs_value, rhs_tooltip)) + lhs_diff.append(_generate_stat_diff_record(lhs_stat, rhs_stat)) + rhs_diff.append(_generate_stat_diff_record(rhs_stat, lhs_stat)) return lhs_diff, rhs_diff @@ -307,3 +228,63 @@ def normalize_stat_tooltip(tooltip: str, ordering: pstats.ProfileStatOrdering) - def _emphasize(color: ColorChoiceType, value: str) -> str: return f'{value}' + + +def _format_exit_codes(exit_code: str | list[str] | list[int]) -> str: + # Unify the exit code types + exit_codes: list[str] = [] + if isinstance(exit_code, str): + exit_codes.append(exit_code) + else: + exit_codes = list(map(str, exit_code)) + # Color exit codes that are not zero + return ", ".join(code if code == "0" else _emphasize("red", code) for code in exit_codes) + + +def _generate_stat_diff_record( + stat: pstats.ProfileStat | None, other_stat: pstats.ProfileStat | None +) -> tuple[str, str, str]: + if stat is None: + # The stat is missing, use some info from the other stat + assert other_stat is not None + return f"{other_stat.name} [{other_stat.unit}]", "-", "missing stat info" + else: + stat_agg = pstats.aggregate_stats(stat) + tooltip = normalize_stat_tooltip(stat.tooltip, stat_agg.infer_auto_ordering(stat.ordering)) + return ( + f"{stat.name} [{stat.unit}] ({stat_agg.normalize_aggregate_key(stat.aggregate_by)})", + str(stat_agg.as_table()[0]), + tooltip, + ) + + +def _color_stat_value_diff( + lhs_stat_agg: pstats.ProfileStatAggregation, + rhs_stat_agg: pstats.ProfileStatAggregation, + aggregate_key: str, + ordering: pstats.ProfileStatOrdering, +) -> tuple[str, str]: + """Color the stat values on the LHS and RHS according to their difference. + + The color is determined by the stat ordering and the result of the stat values comparison. + + :param lhs_stat: a stat from the baseline + :param rhs_stat: a stat from the target + :return: colored LHS and RHS stat values + """ + comparison_result = pstats.compare_stats(lhs_stat_agg, rhs_stat_agg, aggregate_key, ordering) + color_map: dict[pstats.StatComparisonResult, tuple[ColorChoiceType, ColorChoiceType]] = { + pstats.StatComparisonResult.INVALID: ("red", "red"), + pstats.StatComparisonResult.UNEQUAL: ("red", "red"), + pstats.StatComparisonResult.EQUAL: ("black", "black"), + pstats.StatComparisonResult.BASELINE_BETTER: ("green", "red"), + pstats.StatComparisonResult.TARGET_BETTER: ("red", "green"), + } + + baseline_color, target_color = color_map[comparison_result] + if comparison_result == pstats.StatComparisonResult.INVALID: + baseline_value, target_value = "", "" + else: + baseline_value = str(lhs_stat_agg.as_table()[0]) + target_value = str(rhs_stat_agg.as_table()[0]) + return _emphasize(baseline_color, baseline_value), _emphasize(target_color, target_value) diff --git a/perun/view_diff/report/run.py b/perun/view_diff/report/run.py index 25391f80..19b94907 100755 --- a/perun/view_diff/report/run.py +++ b/perun/view_diff/report/run.py @@ -535,8 +535,8 @@ def process_traces( f"Overall {name}", profile_stats.ProfileStatOrdering.LOWER, unit, - f"The overall value of the {name} for the root value", - max_samples[key], + tooltip=f"The overall value of the {name} for the root value", + value=max_samples[key], ) ) Config().profile_stats[profile_type].append( @@ -544,8 +544,8 @@ def process_traces( "Maximum Trace Length", profile_stats.ProfileStatOrdering.LOWER, "#", - "Maximum length of the trace in the profile", - max_trace, + tooltip="Maximum length of the trace in the profile", + value=max_trace, ) ) Config().max_seen_trace = max(max_trace, Config().max_seen_trace) From 8996a7e5cd4460d523a56371e24e57863fcdfbec Mon Sep 17 00:00:00 2001 From: Jiri Pavela Date: Wed, 11 Sep 2024 10:25:10 +0200 Subject: [PATCH 05/14] Add support for profile metadata for imports Profile metadata may be specified in the import CLI now. A proper internal representation of profile metadata has been implemented so that metadata may have a tooltip information associated with them. --- perun/cli_groups/import_cli.py | 11 +++++ perun/profile/helpers.py | 42 +++++++++++++++++++ perun/profile/imports.py | 13 +++++- .../diff_view_flamegraph.html.jinja2 | 2 +- perun/templates/diff_view_report.html.jinja2 | 2 +- perun/utils/common/cli_kit.py | 22 ++++++++++ perun/utils/common/diff_kit.py | 17 ++++++-- 7 files changed, 102 insertions(+), 7 deletions(-) diff --git a/perun/cli_groups/import_cli.py b/perun/cli_groups/import_cli.py index 79998062..d106ec90 100755 --- a/perun/cli_groups/import_cli.py +++ b/perun/cli_groups/import_cli.py @@ -11,6 +11,7 @@ # Perun Imports from perun.logic import commands from perun.profile import imports +from perun.utils.common import cli_kit @click.group("import") @@ -45,6 +46,16 @@ help="Describes the stats associated with the imported profiles. Please see the import " "documentation for details regarding the stat description format.", ) +@click.option( + "--metadata", + "-md", + multiple=True, + default=None, + metavar="[KEY|VALUE|[TOOLTIP]]", + callback=cli_kit.process_metadata, + help="Describes the metadata associated with the imported profiles as key: value pairs with " + "an optional tooltip information.", +) @click.option( "--cmd", "-c", diff --git a/perun/profile/helpers.py b/perun/profile/helpers.py index a9ffec05..ad033df3 100644 --- a/perun/profile/helpers.py +++ b/perun/profile/helpers.py @@ -17,6 +17,7 @@ from __future__ import annotations # Standard Imports +import dataclasses import json import operator import os @@ -609,3 +610,44 @@ def is_compatible_with_profile(self, profile: profiles.Profile) -> bool: "checksum", "source", ] + + +@dataclasses.dataclass +class ProfileMetadata: + """A representation of a single profile metadata entry. + + :ivar name: the name (key) of the metadata entry + :ivar value: the value of the metadata entry + :ivar tooltip: detailed description of the metadata entry + """ + + name: str + value: str | float + tooltip: str = "" + + @classmethod + def from_string(cls, metadata: str) -> ProfileMetadata: + """Constructs a ProfileMetadata object from a string representation. + + :param metadata: the string representation of a metadata entry + + :return: the constructed ProfileMetadata object + """ + return cls(*metadata.split("|")) + + @classmethod + def from_profile(cls, metadata: dict[str, Any]) -> ProfileMetadata: + """Constructs a ProfileMetadata object from a dictionary representation used in Profile. + + :param metadata: the dictionary representation of a metadata entry + + :return: the constructed ProfileMetadata object + """ + return cls(**metadata) + + def as_tuple(self) -> tuple[str, str | float, str]: + """Converts the metadata object into a tuple. + + :return: the tuple representation of a metadata entry + """ + return self.name, self.value, self.tooltip diff --git a/perun/profile/imports.py b/perun/profile/imports.py index 6cff9a67..141545c5 100755 --- a/perun/profile/imports.py +++ b/perun/profile/imports.py @@ -168,6 +168,7 @@ def import_perf_profile( resources: list[dict[str, Any]], minor_version: MinorVersion, machine_info: Optional[str] = None, + metadata: list[profile_helpers.ProfileMetadata] | None = None, with_sudo: bool = False, save_to_index: bool = False, **kwargs: Any, @@ -178,9 +179,10 @@ def import_perf_profile( :param resources: list of parsed resources :param minor_version: minor version corresponding to the imported profiles :param machine_info: additional dictionary with machine specification + :param metadata: additional metadata to associate with the profile :param with_sudo: indication whether the data were collected with sudo :param save_to_index: indication whether we should save the imported profiles to index - :param kwargs: rest of the paramters + :param kwargs: rest of the parameters """ prof = Profile( { @@ -192,6 +194,7 @@ def import_perf_profile( ) prof.update({"origin": minor_version.checksum}) prof.update({"machine": get_machine_info(machine_info)}) + prof.update({"metadata": [asdict(data) for data in metadata] if metadata is not None else []}), prof.update({"stats": [asdict(stat) for stat in profiles.aggregate_stats()]}), prof.update( { @@ -409,7 +412,13 @@ def import_elk_profile( } ) prof.update({"origin": minor_version.checksum}) - prof.update({"metadata": metadata}) + prof.update( + { + "metadata": [ + profile_helpers.ProfileMetadata(key, value) for key, value in metadata.items() + ] + } + ) prof.update({"machine": extract_machine_info_from_metadata(metadata)}) prof.update( { diff --git a/perun/templates/diff_view_flamegraph.html.jinja2 b/perun/templates/diff_view_flamegraph.html.jinja2 index a3e3540f..94340f6f 100755 --- a/perun/templates/diff_view_flamegraph.html.jinja2 +++ b/perun/templates/diff_view_flamegraph.html.jinja2 @@ -116,7 +116,7 @@ {{ profile_overview.overview_table('toggleRightProfileCollapse', 'right-profile-info', rhs_stats, lhs_stats, "Profile Stats") }}
 
{%- if rhs_metadata%} - {{ profile_overview.overview_table('toggleRightMetadata', 'right-metadata-info', rhs_metadata, lhs_metadata, "Profile Metadata") }} + {{ profile_overview.overview_table('toggleRightMetadataCollapse', 'right-metadata-info', rhs_metadata, lhs_metadata, "Profile Metadata") }}
 
{%- endif %} diff --git a/perun/templates/diff_view_report.html.jinja2 b/perun/templates/diff_view_report.html.jinja2 index 43f018ed..c597fbad 100755 --- a/perun/templates/diff_view_report.html.jinja2 +++ b/perun/templates/diff_view_report.html.jinja2 @@ -192,7 +192,7 @@ {{ profile_overview.overview_table('toggleRightProfileCollapse', 'right-profile-info', rhs_stats, lhs_stats, "Profile Stats") }}
 
{%- if rhs_metadata%} - {{ profile_overview.overview_table('toggleRightMetadata', 'right-metadata-info', rhs_metadata, lhs_metadata, "Profile Metadata") }} + {{ profile_overview.overview_table('toggleRightMetadataCollapse', 'right-metadata-info', rhs_metadata, lhs_metadata, "Profile Metadata") }}
 
{%- endif %} diff --git a/perun/utils/common/cli_kit.py b/perun/utils/common/cli_kit.py index a45a81b2..ec4cedeb 100644 --- a/perun/utils/common/cli_kit.py +++ b/perun/utils/common/cli_kit.py @@ -906,6 +906,28 @@ def configure_metrics(_: click.Context, __: click.Option, value: tuple[str, str] metrics.Metrics.configure(value[0], value[1]) +def process_metadata( + _: click.Context, __: click.Option, value: tuple[str] | None +) -> list[profile_helpers.ProfileMetadata]: + """Parse the metadata strings from CLI and convert them to our internal representation. + + :param _: click context + :param __: the click parameter + :param value: metadata strings + + :return: a collection of parsed and converted metadata objects + """ + profile_metadata: list[profile_helpers.ProfileMetadata] = [] + if value is None: + return profile_metadata + for metadata_str in value: + try: + profile_metadata.append(profile_helpers.ProfileMetadata.from_string(metadata_str)) + except TypeError: + log.warn(f"Ignoring invalid profile metadata string '{metadata_str}'.") + return profile_metadata + + def get_supported_module_names(package: str) -> list[str]: """Obtains list of supported modules supported by the package. diff --git a/perun/utils/common/diff_kit.py b/perun/utils/common/diff_kit.py index 2aad25a4..5c7de9ec 100755 --- a/perun/utils/common/diff_kit.py +++ b/perun/utils/common/diff_kit.py @@ -3,9 +3,10 @@ from __future__ import annotations # Standard Imports +import dataclasses import difflib import os -from typing import Any, Optional, Iterable, Literal +from typing import Any, Optional, Iterable, Literal, cast # Third-Party Imports @@ -212,11 +213,21 @@ def generate_metadata( :param rhs_profile: profile for target :return: pair of metadata for lhs (baseline) and rhs (target) """ + data: dict[str, Any] + lhs_metadata = sorted( - [(k, v, "") for k, v in lhs_profile.get("metadata", {}).items()], key=lambda x: x[0] + [ + helpers.ProfileMetadata.from_profile(data).as_tuple() + for data in lhs_profile.get("metadata", []) + ], + key=lambda x: x[0], ) rhs_metadata = sorted( - [(k, v, "") for k, v in rhs_profile.get("metadata", {}).items()], key=lambda x: x[0] + [ + helpers.ProfileMetadata.from_profile(data).as_tuple() + for data in rhs_profile.get("metadata", []) + ], + key=lambda x: x[0], ) return generate_diff_of_headers(lhs_metadata, rhs_metadata) From 93a6b07fd0c32ca6b15df36c45c0e8909a898a06 Mon Sep 17 00:00:00 2001 From: Jiri Pavela Date: Wed, 11 Sep 2024 11:12:35 +0200 Subject: [PATCH 06/14] Rework the path handling in perun import Relative paths (to csv files, machine info file, profiles, ...) are now always prepended with the import-dir path, if provided. Absolute paths ignore the import-dir path and are kept as is. --- perun/cli_groups/import_cli.py | 2 +- perun/profile/imports.py | 88 +++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 30 deletions(-) diff --git a/perun/cli_groups/import_cli.py b/perun/cli_groups/import_cli.py index d106ec90..642aa8ee 100755 --- a/perun/cli_groups/import_cli.py +++ b/perun/cli_groups/import_cli.py @@ -18,7 +18,7 @@ @click.option( "--machine-info", "-i", - type=click.Path(resolve_path=True, readable=True), + type=click.Path(), help="Imports machine info from file in JSON format (by default, machine info is loaded from " "the current host). You can use `utils/generate_machine_info.sh` script to generate the " "machine info file.", diff --git a/perun/profile/imports.py b/perun/profile/imports.py index 141545c5..743795e8 100755 --- a/perun/profile/imports.py +++ b/perun/profile/imports.py @@ -30,6 +30,7 @@ # TODO: add documentation # TODO: fix stats in other types of diffviews # TODO: refactor the perf import type commands: there is a lot of code duplication +# TODO: unify handling of path open success / fail @dataclass @@ -52,7 +53,7 @@ class ImportedProfiles: __slots__ = "import_dir", "stats", "profiles" def __init__(self, targets: list[str], import_dir: str | None, stats_info: str | None) -> None: - self.import_dir: Path = Path(import_dir) if import_dir is not None else Path.cwd() + self.import_dir: Path = Path(import_dir.strip()) if import_dir is not None else Path.cwd() # Parse the CLI stats if available self.stats: list[profile_stats.ProfileStat] = [] self.profiles: list[ImportProfileSpec] = [] @@ -95,27 +96,33 @@ def aggregate_stats(self) -> Iterator[profile_stats.ProfileStat]: yield stat_obj def _parse_import_csv(self, target: str) -> None: - with open(self.import_dir / target, "r") as csvfile: - csv_reader = csv.reader(csvfile, delimiter=",") - header: list[str] = next(csv_reader) - stats: list[profile_stats.ProfileStat] = [ - profile_stats.ProfileStat.from_string(*stat_definition.split("|")) - for stat_definition in header[2:] - ] - # Parse the CSV stat definition and check that they are not in conflict with the CLI - # stat definitions, if any - for idx, stat in enumerate(stats): - if idx >= len(self.stats): - self.stats.append(stat) - elif stat != self.stats[idx]: - log.warn( - f"Mismatching profile stat definition from CLI and CSV: " - f"cli.{self.stats[idx].name} != csv.{stat.name}. " - f"Using the CLI stat definition." - ) - # Parse the remaining rows that should represent profile specifications - for row in csv_reader: - self._add_imported_profile(row) + csv_path = _massage_import_path(self.import_dir, target) + try: + with open(csv_path, "r") as csvfile: + log.minor_success(log.path_style(str(csv_path)), "found") + csv_reader = csv.reader(csvfile, delimiter=",") + header: list[str] = next(csv_reader) + stats: list[profile_stats.ProfileStat] = [ + profile_stats.ProfileStat.from_string(*stat_definition.split("|")) + for stat_definition in header[2:] + ] + # Parse the CSV stat definition and check that they are not in conflict with the CLI + # stat definitions, if any + for idx, stat in enumerate(stats): + if idx >= len(self.stats): + self.stats.append(stat) + elif stat != self.stats[idx]: + log.warn( + f"Mismatching profile stat definition from CLI and CSV: " + f"cli.{self.stats[idx].name} != csv.{stat.name}. " + f"Using the CLI stat definition." + ) + # Parse the remaining rows that should represent profile specifications + for row in csv_reader: + self._add_imported_profile(row) + except OSError as exc: + log.minor_fail(log.path_style(str(csv_path)), "not found") + log.error(str(exc), exc) def _add_imported_profile(self, target: list[str]) -> None: if len(target) == 0: @@ -124,7 +131,7 @@ def _add_imported_profile(self, target: list[str]) -> None: else: # Make sure we strip the leading and trailing whitespaces in each column value profile_info = ImportProfileSpec( - self.import_dir / target[0].strip(), + _massage_import_path(self.import_dir, target[0]), int(target[1].strip()) if len(target) >= 2 else ImportProfileSpec.exit_code, list(map(lambda stat_value: float(stat_value.strip()), target[2:])), ) @@ -150,17 +157,22 @@ def load_file(filepath: Path) -> str: return imported_handle.read() -def get_machine_info(machine_info: Optional[str] = None) -> dict[str, Any]: +def get_machine_info(import_dir: Path, machine_info: Optional[str] = None) -> dict[str, Any]: """Returns machine info either from input file or constructs it from environment + :param import_dir: path to a directory where the machine info will be looked up :param machine_info: file in json format, which contains machine specification :return: parsed dictionary format of machine specification """ if machine_info is not None: - with open(machine_info, "r") as machine_handle: - return json.load(machine_handle) - else: - return environment.get_machine_specification() + info_path = _massage_import_path(import_dir, machine_info) + try: + with open(info_path, "r") as machine_handle: + log.minor_success(log.path_style(str(info_path)), "found") + return json.load(machine_handle) + except OSError: + log.minor_fail(log.path_style(str(info_path)), "not found, generating info") + return environment.get_machine_specification() def import_perf_profile( @@ -193,7 +205,7 @@ def import_perf_profile( } ) prof.update({"origin": minor_version.checksum}) - prof.update({"machine": get_machine_info(machine_info)}) + prof.update({"machine": get_machine_info(profiles.import_dir, machine_info)}) prof.update({"metadata": [asdict(data) for data in metadata] if metadata is not None else []}), prof.update({"stats": [asdict(stat) for stat in profiles.aggregate_stats()]}), prof.update( @@ -513,3 +525,21 @@ def import_elk_from_json( metadata.update(m) log.minor_success(log.path_style(str(imported_file)), "imported") import_elk_profile(resources, metadata, minor_version_info, **kwargs) + + +def _massage_import_path(import_dir: Path, path_str: str) -> Path: + """Massages path strings into unified path format. + + First, the path string is stripped of leading and trailing whitespaces. + Next, absolute paths are kept as is, while relative paths are prepended with the + provided import directory. + + :param import_dir: the import directory to use for relative paths + :param path_str: the path string to massage + + :return: massaged path + """ + path: Path = Path(path_str.strip()) + if path.is_absolute(): + return path + return import_dir / path From 4fb5e4b824fe36b99a169b597eab227d137790b6 Mon Sep 17 00:00:00 2001 From: Jiri Pavela Date: Wed, 11 Sep 2024 13:06:43 +0200 Subject: [PATCH 07/14] Fix stat generation for showdiff --- perun/profile/imports.py | 10 +++++- perun/utils/common/diff_kit.py | 63 +++++++++++++++++++++++++--------- 2 files changed, 55 insertions(+), 18 deletions(-) diff --git a/perun/profile/imports.py b/perun/profile/imports.py index 743795e8..e96705df 100755 --- a/perun/profile/imports.py +++ b/perun/profile/imports.py @@ -133,12 +133,20 @@ def _add_imported_profile(self, target: list[str]) -> None: profile_info = ImportProfileSpec( _massage_import_path(self.import_dir, target[0]), int(target[1].strip()) if len(target) >= 2 else ImportProfileSpec.exit_code, - list(map(lambda stat_value: float(stat_value.strip()), target[2:])), + list(map(ImportedProfiles._massage_stat_value, target[2:])), ) if profile_info.exit_code != 0: log.warn("Importing a profile with non-zero exit code.") self.profiles.append(profile_info) + @staticmethod + def _massage_stat_value(stat_value: str) -> str | float: + stat_value = stat_value.strip() + try: + return float(stat_value) + except ValueError: + return stat_value + def load_file(filepath: Path) -> str: """Tests if the file is packed by gzip and unpacks it, otherwise reads it as a text file diff --git a/perun/utils/common/diff_kit.py b/perun/utils/common/diff_kit.py index 5c7de9ec..0e8a2b9c 100755 --- a/perun/utils/common/diff_kit.py +++ b/perun/utils/common/diff_kit.py @@ -3,10 +3,9 @@ from __future__ import annotations # Standard Imports -import dataclasses import difflib import os -from typing import Any, Optional, Iterable, Literal, cast +from typing import Any, Optional, Iterable, Literal # Third-Party Imports @@ -159,8 +158,9 @@ def generate_diff_of_stats( for stat_key in sorted(stats_map.keys()): lhs_stat: pstats.ProfileStat | None = stats_map[stat_key].get("lhs", None) rhs_stat: pstats.ProfileStat | None = stats_map[stat_key].get("rhs", None) - lhs_diff.append(_generate_stat_diff_record(lhs_stat, rhs_stat)) - rhs_diff.append(_generate_stat_diff_record(rhs_stat, lhs_stat)) + lhs_info, rhs_info = _generate_stat_diff_record(lhs_stat, rhs_stat) + lhs_diff.append(lhs_info) + rhs_diff.append(rhs_info) return lhs_diff, rhs_diff @@ -253,20 +253,49 @@ def _format_exit_codes(exit_code: str | list[str] | list[int]) -> str: def _generate_stat_diff_record( - stat: pstats.ProfileStat | None, other_stat: pstats.ProfileStat | None -) -> tuple[str, str, str]: - if stat is None: - # The stat is missing, use some info from the other stat - assert other_stat is not None - return f"{other_stat.name} [{other_stat.unit}]", "-", "missing stat info" - else: - stat_agg = pstats.aggregate_stats(stat) - tooltip = normalize_stat_tooltip(stat.tooltip, stat_agg.infer_auto_ordering(stat.ordering)) - return ( - f"{stat.name} [{stat.unit}] ({stat_agg.normalize_aggregate_key(stat.aggregate_by)})", - str(stat_agg.as_table()[0]), - tooltip, + lhs_stat: pstats.ProfileStat | None, rhs_stat: pstats.ProfileStat | None +) -> tuple[tuple[str, str, str], tuple[str, str, str]]: + lhs_stat_agg: pstats.ProfileStatAggregation | None = None + lhs_name: str = "" + lhs_value: str = "-" + lhs_tooltip: str = "missing stat info" + rhs_stat_agg: pstats.ProfileStatAggregation | None = None + rhs_name: str = "" + rhs_value: str = "-" + rhs_tooltip: str = "missing stat info" + if lhs_stat is not None: + lhs_stat_agg = pstats.aggregate_stats(lhs_stat) + unit = f" [{lhs_stat.unit}]" if lhs_stat.unit else "" + lhs_name = ( + f"{lhs_stat.name}{unit} " + f"({lhs_stat_agg.normalize_aggregate_key(lhs_stat.aggregate_by)})" + ) + rhs_name = f"{lhs_stat.name}{unit}" + lhs_value = str(lhs_stat_agg.as_table()[0]) + lhs_tooltip = normalize_stat_tooltip( + lhs_stat.tooltip, lhs_stat_agg.infer_auto_ordering(lhs_stat.ordering) + ) + if rhs_stat is not None: + rhs_stat_agg = pstats.aggregate_stats(rhs_stat) + unit = f" [{rhs_stat.unit}]" if rhs_stat.unit else "" + rhs_name = ( + f"{rhs_stat.name}{unit} " + f"({rhs_stat_agg.normalize_aggregate_key(rhs_stat.aggregate_by)})" + ) + lhs_name = lhs_name if lhs_name else f"{rhs_stat.name}{unit}" + rhs_value = str(rhs_stat_agg.as_table()[0]) + rhs_tooltip = normalize_stat_tooltip( + rhs_stat.tooltip, rhs_stat_agg.infer_auto_ordering(rhs_stat.ordering) + ) + if lhs_stat_agg is not None and rhs_stat_agg is not None: + assert lhs_stat is not None + lhs_value, rhs_value = _color_stat_value_diff( + lhs_stat_agg, + rhs_stat_agg, + lhs_stat.aggregate_by, + lhs_stat_agg.infer_auto_ordering(lhs_stat.ordering), ) + return (lhs_name, lhs_value, lhs_tooltip), (rhs_name, rhs_value, rhs_tooltip) def _color_stat_value_diff( From cf02833763489ca28daa3b62c0f82d2d1a08a7aa Mon Sep 17 00:00:00 2001 From: Jiri Pavela Date: Mon, 16 Sep 2024 19:37:14 +0200 Subject: [PATCH 08/14] Finish metadata and stats loading and output --- perun/cli_groups/import_cli.py | 15 +- perun/profile/factory.py | 18 +- perun/profile/imports.py | 127 ++++++---- perun/utils/common/cli_kit.py | 22 -- perun/utils/common/common_kit.py | 32 ++- perun/utils/common/diff_kit.py | 387 +++++++++++++++++++----------- perun/view_diff/flamegraph/run.py | 87 ++++--- perun/view_diff/report/run.py | 46 ++-- 8 files changed, 454 insertions(+), 280 deletions(-) diff --git a/perun/cli_groups/import_cli.py b/perun/cli_groups/import_cli.py index 642aa8ee..d895e596 100755 --- a/perun/cli_groups/import_cli.py +++ b/perun/cli_groups/import_cli.py @@ -11,7 +11,6 @@ # Perun Imports from perun.logic import commands from perun.profile import imports -from perun.utils.common import cli_kit @click.group("import") @@ -42,19 +41,19 @@ "-t", nargs=1, default=None, - metavar="[STAT]", - help="Describes the stats associated with the imported profiles. Please see the import " - "documentation for details regarding the stat description format.", + metavar="[STAT_HEADER+]", + help="Describes the stats headers associated with the imported profiles. Each stats header is " + "in the form of 'NAME[|ORDERING[|UNIT[|AGGREGATE_BY[|TOOLTIP]]]]'.", ) @click.option( "--metadata", "-md", multiple=True, default=None, - metavar="[KEY|VALUE|[TOOLTIP]]", - callback=cli_kit.process_metadata, - help="Describes the metadata associated with the imported profiles as key: value pairs with " - "an optional tooltip information.", + metavar="['KEY|VALUE|[TOOLTIP]'] or [FILE.json]", + help="Describes a single metadata entry associated with the imported profiles as a " + "'key|value[|tooltip]' string or multiple entries in a JSON file that will be flattened if" + "needed. The --metadata option may be specified multiple times.", ) @click.option( "--cmd", diff --git a/perun/profile/factory.py b/perun/profile/factory.py index d16f125a..52d2dcee 100644 --- a/perun/profile/factory.py +++ b/perun/profile/factory.py @@ -21,7 +21,7 @@ # Perun Imports from perun.logic import config from perun.postprocess.regression_analysis import regression_models -from perun.profile import convert, query +from perun.profile import convert, query, stats, helpers from perun.utils import log from perun.utils.common import common_kit import perun.check.detection_kit as detection @@ -455,6 +455,22 @@ def all_snapshots(self) -> Iterable[tuple[int, list[dict[str, Any]]]]: for i in range(0, maximal_snapshot + 1): yield i, snapshot_map[i] + def all_stats(self) -> Iterable[stats.ProfileStat]: + """Iterates through all the stats records in the profile. + + :return: iterable of all stats records + """ + for stat in self._storage.get("stats", {}): + yield stats.ProfileStat.from_profile(stat) + + def all_metadata(self) -> Iterable[helpers.ProfileMetadata]: + """Iterates through all the metadata records in the profile. + + :return: iterable of all metadata records + """ + for entry in self._storage.get("metadata", {}): + yield helpers.ProfileMetadata.from_profile(entry) + # TODO: discuss the intent of __len__ and possibly merge? def resources_size(self) -> int: """Returns the number of resources stored in the internal storage. diff --git a/perun/profile/imports.py b/perun/profile/imports.py index e96705df..72a12015 100755 --- a/perun/profile/imports.py +++ b/perun/profile/imports.py @@ -18,7 +18,7 @@ # Perun Imports from perun.collect.kperf import parser from perun.logic import commands, index, pcs -from perun.profile import helpers as profile_helpers, stats as profile_stats +from perun.profile import query, helpers as profile_helpers, stats as profile_stats from perun.profile.factory import Profile from perun.utils import log, streams from perun.utils.common import script_kit, common_kit @@ -188,7 +188,6 @@ def import_perf_profile( resources: list[dict[str, Any]], minor_version: MinorVersion, machine_info: Optional[str] = None, - metadata: list[profile_helpers.ProfileMetadata] | None = None, with_sudo: bool = False, save_to_index: bool = False, **kwargs: Any, @@ -199,7 +198,6 @@ def import_perf_profile( :param resources: list of parsed resources :param minor_version: minor version corresponding to the imported profiles :param machine_info: additional dictionary with machine specification - :param metadata: additional metadata to associate with the profile :param with_sudo: indication whether the data were collected with sudo :param save_to_index: indication whether we should save the imported profiles to index :param kwargs: rest of the parameters @@ -209,26 +207,21 @@ def import_perf_profile( "global": { "time": "???", "resources": resources, - } - } - ) - prof.update({"origin": minor_version.checksum}) - prof.update({"machine": get_machine_info(profiles.import_dir, machine_info)}) - prof.update({"metadata": [asdict(data) for data in metadata] if metadata is not None else []}), - prof.update({"stats": [asdict(stat) for stat in profiles.aggregate_stats()]}), - prof.update( - { + }, + "origin": minor_version.checksum, + "machine": get_machine_info(profiles.import_dir, machine_info), + "metadata": [ + asdict(data) + for data in _import_metadata(profiles.import_dir, kwargs.get("metadata", tuple())) + ], + "stats": [asdict(stat) for stat in profiles.aggregate_stats()], "header": { "type": "time", "cmd": kwargs.get("cmd", ""), "exitcode": profiles.get_exit_codes(), "workload": kwargs.get("workload", ""), "units": {"time": "sample"}, - } - } - ) - prof.update( - { + }, "collector_info": { "name": "kperf", "params": { @@ -236,10 +229,10 @@ def import_perf_profile( "warmup": kwargs.get("warmup", 0), "repeat": len(profiles), }, - } + }, + "postprocessors": [], } ) - prof.update({"postprocessors": []}) save_imported_profile(prof, save_to_index, minor_version) @@ -286,7 +279,7 @@ def import_perf_from_record( :param stats_info: additional statistics collected for the profile (i.e. non-resource types) :param minor_version: minor version corresponding to the imported profiles :param with_sudo: indication whether the data were collected with sudo - :param kwargs: rest of the paramters + :param kwargs: rest of the parameters """ parse_script = script_kit.get_script("stackcollapse-perf.pl") minor_version_info = pcs.vcs().get_minor_version_info(minor_version) @@ -330,7 +323,7 @@ def import_perf_from_script( :param import_dir: different directory for importing the profiles :param stats_info: additional statistics collected for the profile (i.e. non-resource types) :param minor_version: minor version corresponding to the imported profiles - :param kwargs: rest of the paramters + :param kwargs: rest of the parameters """ parse_script = script_kit.get_script("stackcollapse-perf.pl") minor_version_info = pcs.vcs().get_minor_version_info(minor_version) @@ -363,7 +356,7 @@ def import_perf_from_stack( :param import_dir: different directory for importing the profiles :param stats_info: additional statistics collected for the profile (i.e. non-resource types) :param minor_version: minor version corresponding to the imported profiles - :param kwargs: rest of the paramters + :param kwargs: rest of the parameters """ minor_version_info = pcs.vcs().get_minor_version_info(minor_version) profiles = ImportedProfiles(imported, import_dir, stats_info) @@ -380,7 +373,7 @@ def import_perf_from_stack( def extract_machine_info_from_metadata(metadata: dict[str, Any]) -> dict[str, Any]: """Extracts the parts of the profile, that corresponds to machine info - Note that not many is collected from the ELK formats and it can vary greatly, + Note that not many is collected from the ELK formats, and it can vary greatly, hence, most of the machine specification and environment should be in metadata instead. :param metadata: metadata extracted from the ELK profiles @@ -400,11 +393,11 @@ def extract_machine_info_from_metadata(metadata: dict[str, Any]) -> dict[str, An "total_ram": metadata.get("machine.ram", "?"), "swap": "?", }, + "boot_info": "?", + "mem_details": {}, + "cpu_details": [], } - machine_info["boot_info"] = "?" - machine_info["mem_details"] = {} - machine_info["cpu_details"] = [] return machine_info @@ -421,46 +414,33 @@ def import_elk_profile( :param metadata: parts of the profiles that will be stored as metadata in the profile :param minor_version: minor version corresponding to the imported profiles :param save_to_index: indication whether we should save the imported profiles to index - :param kwargs: rest of the paramters + :param kwargs: rest of the parameters """ prof = Profile( { "global": { "time": "???", "resources": resources, - } - } - ) - prof.update({"origin": minor_version.checksum}) - prof.update( - { + }, + "origin": minor_version.checksum, "metadata": [ profile_helpers.ProfileMetadata(key, value) for key, value in metadata.items() - ] - } - ) - prof.update({"machine": extract_machine_info_from_metadata(metadata)}) - prof.update( - { + ], + "machine": extract_machine_info_from_metadata(metadata), "header": { "type": "time", "cmd": kwargs.get("cmd", ""), "exitcode": "?", "workload": kwargs.get("workload", ""), "units": {"time": "sample"}, - } - } - ) - prof.update( - { + }, "collector_info": { "name": "???", "params": {}, - } + }, + "postprocessors": [], } ) - prof.update({"postprocessors": []}) - save_imported_profile(prof, save_to_index, minor_version) @@ -469,9 +449,9 @@ def extract_from_elk( ) -> tuple[list[dict[str, Any]], dict[str, Any]]: """For the given elk query, extracts resources and metadata. - For metadata we consider any key that has only single value through the profile, + For metadata, we consider any key that has only single value through the profile, and is not linked to keywords `metric` or `benchmarking`. - For resources we consider anything that is not identified as metadata + For resources, we consider anything that is not identified as metadata :param elk_query: query from the elk in form of list of resource :return: list of resources and metadata @@ -516,7 +496,7 @@ def import_elk_from_json( :param imported: list of filenames with elk data. :param minor_version: minor version corresponding to the imported profiles - :param kwargs: rest of the paramters + :param kwargs: rest of the parameters """ minor_version_info = pcs.vcs().get_minor_version_info(minor_version) @@ -535,6 +515,53 @@ def import_elk_from_json( import_elk_profile(resources, metadata, minor_version_info, **kwargs) +def _import_metadata( + import_dir: Path, metadata: tuple[str, ...] | None +) -> list[profile_helpers.ProfileMetadata]: + """Parse the metadata strings from CLI and convert them to our internal representation. + + :param import_dir: the import directory to use for relative metadata file paths + :param metadata: a collection of metadata strings or files + + :return: a collection of parsed and converted metadata objects + """ + p_metadata: list[profile_helpers.ProfileMetadata] = [] + if metadata is None: + return [] + # Normalize the metadata string for parsing and/or opening the file + for metadata_str in map(str.strip, metadata): + if metadata_str.lower().endswith(".json"): + # Update the metadata collection with entries from the json file + p_metadata.extend(_parse_metadata_json(_massage_import_path(import_dir, metadata_str))) + else: + # Add a single metadata entry parsed from its string representation + try: + p_metadata.append(profile_helpers.ProfileMetadata.from_string(metadata_str)) + except TypeError: + log.warn(f"Ignoring invalid profile metadata string '{metadata_str}'.") + return p_metadata + + +def _parse_metadata_json(metadata_path: Path) -> list[profile_helpers.ProfileMetadata]: + """Parse a metadata JSON file into the metadata objects. + + :param metadata_path: the path to the metadata JSON + + :return: a collection of parsed metadata objects + """ + try: + with open(metadata_path, "r") as metadata_handle: + log.minor_success(log.path_style(str(metadata_path)), "found") + metadata_dict: dict[str, Any] = json.load(metadata_handle) + # Make sure we flatten the input + return [ + profile_helpers.ProfileMetadata(k, v) for k, v in query.all_items_of(metadata_dict) + ] + except OSError: + log.minor_fail(log.path_style(str(metadata_path)), "not found, skipping") + return [] + + def _massage_import_path(import_dir: Path, path_str: str) -> Path: """Massages path strings into unified path format. diff --git a/perun/utils/common/cli_kit.py b/perun/utils/common/cli_kit.py index ec4cedeb..a45a81b2 100644 --- a/perun/utils/common/cli_kit.py +++ b/perun/utils/common/cli_kit.py @@ -906,28 +906,6 @@ def configure_metrics(_: click.Context, __: click.Option, value: tuple[str, str] metrics.Metrics.configure(value[0], value[1]) -def process_metadata( - _: click.Context, __: click.Option, value: tuple[str] | None -) -> list[profile_helpers.ProfileMetadata]: - """Parse the metadata strings from CLI and convert them to our internal representation. - - :param _: click context - :param __: the click parameter - :param value: metadata strings - - :return: a collection of parsed and converted metadata objects - """ - profile_metadata: list[profile_helpers.ProfileMetadata] = [] - if value is None: - return profile_metadata - for metadata_str in value: - try: - profile_metadata.append(profile_helpers.ProfileMetadata.from_string(metadata_str)) - except TypeError: - log.warn(f"Ignoring invalid profile metadata string '{metadata_str}'.") - return profile_metadata - - def get_supported_module_names(package: str) -> list[str]: """Obtains list of supported modules supported by the package. diff --git a/perun/utils/common/common_kit.py b/perun/utils/common/common_kit.py index d6353276..9785590a 100644 --- a/perun/utils/common/common_kit.py +++ b/perun/utils/common/common_kit.py @@ -4,14 +4,14 @@ # Standard Imports from typing import ( - Optional, Any, - Iterable, Callable, + Iterable, Literal, - TYPE_CHECKING, + Optional, Type, TypeVar, + TYPE_CHECKING, ) import array import contextlib @@ -29,8 +29,8 @@ # Perun Imports from perun.postprocess.regression_analysis import tools from perun.utils.exceptions import ( - SignalReceivedException, NotPerunRepositoryException, + SignalReceivedException, SuppressedExceptions, ) @@ -38,6 +38,8 @@ import types T = TypeVar("T") +KT = TypeVar("KT") +VT = TypeVar("VT") # Types ColorChoiceType = Literal[ @@ -458,6 +460,28 @@ def merge_dictionaries(*args: dict[Any, Any]) -> dict[Any, Any]: return res +def match_dicts_by_keys( + lhs: dict[KT, VT], rhs: dict[KT, VT] +) -> dict[KT, tuple[VT, VT | None] | tuple[VT | None, VT]]: + """Match and merge the keys and values from two dictionaries into a single one. + + Keys that are in both dictionaries will store a tuple of the lhs and rhs values. Keys that are + in either but not both dictionaries will store a tuple of the lhs or rhs value and a None. + + :param lhs: the first dictionary + :param rhs: the second dictionary + + :return: the merged dictionary with matched keys and values + """ + match_map: dict[KT, tuple[VT, VT | None] | tuple[VT | None, VT]] = {} + for lhs_key, lhs_item in lhs.items(): + match_map[lhs_key] = (lhs_item, rhs.get(lhs_key, None)) + for rhs_key, rhs_item in rhs.items(): + if rhs_key not in match_map: + match_map[rhs_key] = (None, rhs_item) + return match_map + + def partition_list( input_list: Iterable[Any], condition: Callable[[Any], bool] ) -> tuple[list[Any], list[Any]]: diff --git a/perun/utils/common/diff_kit.py b/perun/utils/common/diff_kit.py index 0e8a2b9c..f13bc35a 100755 --- a/perun/utils/common/diff_kit.py +++ b/perun/utils/common/diff_kit.py @@ -3,7 +3,9 @@ from __future__ import annotations # Standard Imports +import dataclasses import difflib +import enum import os from typing import Any, Optional, Iterable, Literal @@ -14,9 +16,33 @@ from perun.profile.factory import Profile from perun.profile import stats as pstats from perun.utils import log +from perun.utils.common import common_kit from perun.utils.common.common_kit import ColorChoiceType +class MetadataDisplayStyle(enum.Enum): + """Supported styles of displaying metadata.""" + + FULL = "full" + DIFF = "diff" + + @staticmethod + def supported() -> list[str]: + """Obtain the collection of supported metadata display styles. + + :return: the collection of valid display styles + """ + return [style.value for style in MetadataDisplayStyle] + + @staticmethod + def default() -> str: + """Provide the default metadata display style. + + :return: the default display style + """ + return MetadataDisplayStyle.FULL.value + + def save_diff_view( output_file: Optional[str], content: str, @@ -119,129 +145,206 @@ def generate_header(profile: Profile) -> list[tuple[str, Any, str]]: ] -def diff_to_html(diff: list[str], start_tag: Literal["+", "-"]) -> str: - """Create a html tag with differences for either + or - changes +def generate_headers( + lhs_profile: Profile, rhs_profile: Profile +) -> tuple[list[tuple[str, Any, str]], list[tuple[str, Any, str]]]: + """Generates headers for lhs and rhs profile - :param diff: diff computed by difflib.ndiff - :param start_tag: starting point of the tag + :param lhs_profile: profile for baseline + :param rhs_profile: profile for target + :return: pair of headers for lhs (baseline) and rhs (target) """ - tag_to_color: dict[str, ColorChoiceType] = { - "+": "green", - "-": "red", - } - result = [] - for chunk in diff: - if chunk.startswith(" "): - result.append(chunk[2:]) - if chunk.startswith(start_tag): - result.append(_emphasize(tag_to_color.get(start_tag, "grey"), chunk[2:])) - return " ".join(result) + lhs_header = generate_header(lhs_profile) + rhs_header = generate_header(rhs_profile) + return generate_diff_of_headers(lhs_header, rhs_header) + + +def generate_diff_of_headers( + lhs_header: list[tuple[str, Any, str]], rhs_header: list[tuple[str, Any, str]] +) -> tuple[list[tuple[str, Any, str]], list[tuple[str, Any, str]]]: + """Regenerates header entries with HTML diff styles suitable for an output. + + :param lhs_header: header for baseline + :param rhs_header: header for target + + :return: pair of header entries for lhs (baseline) and rhs (target) + """ + lhs_diff, rhs_diff = [], [] + for lhs_record, rhs_record in zip(lhs_header, rhs_header): + _, lhs_diff_record, rhs_diff_record = generate_diff_of_header_record(lhs_record, rhs_record) + lhs_diff.append(lhs_diff_record) + rhs_diff.append(rhs_diff_record) + return lhs_diff, rhs_diff + + +def generate_diff_of_metadata( + lhs_metadata: Iterable[helpers.ProfileMetadata], + rhs_metadata: Iterable[helpers.ProfileMetadata], + display_style: MetadataDisplayStyle, +) -> tuple[list[tuple[str, Any, str]], list[tuple[str, Any, str]]]: + """Generates metadata entries with HTML diff styles suitable for an output. + + :param lhs_metadata: a collection of metadata entries for baseline + :param rhs_metadata: a collection of metadata entries for target + :param display_style: the metadata display style; DIFF produces only entries that have diffs + + :return: pair of metadata entries for lhs (baseline) and rhs (target) + """ + metadata_map = common_kit.match_dicts_by_keys( + {data.name: data for data in lhs_metadata}, {data.name: data for data in rhs_metadata} + ) + lhs_list, rhs_list = [], [] + for metadata_key in sorted(metadata_map.keys()): + lhs_data: helpers.ProfileMetadata | None + rhs_data: helpers.ProfileMetadata | None + lhs_data, rhs_data = metadata_map[metadata_key] + lhs_tuple = ( + lhs_data.as_tuple() + if lhs_data is not None + else (metadata_key, "-", "missing metadata info") + ) + rhs_tuple = ( + rhs_data.as_tuple() + if rhs_data is not None + else (metadata_key, "-", "missing metadata info") + ) + if lhs_data is not None and rhs_data is not None: + is_diff, lhs_tuple, rhs_tuple = generate_diff_of_header_record(lhs_tuple, rhs_tuple) + if display_style == MetadataDisplayStyle.DIFF and not is_diff: + # We wish to display only differing metadata, skip this one + continue + lhs_list.append(lhs_tuple) + rhs_list.append(rhs_tuple) + return lhs_list, rhs_list def generate_diff_of_stats( - lhs_stats: list[pstats.ProfileStat], rhs_stats: list[pstats.ProfileStat] + lhs_stats: Iterable[pstats.ProfileStat], rhs_stats: Iterable[pstats.ProfileStat] ) -> tuple[list[tuple[str, str, str]], list[tuple[str, str, str]]]: - """Re-generate the stats with CSS diff styles suitable for an output. + """Generates the profile stats with HTML diff styles suitable for an output. :param lhs_stats: stats from the baseline :param rhs_stats: stats from the target - :return: collection of LHS and RHS stats (stat-name [stat-unit], stat-value, stat-tooltip) - with CSS styles that reflect the stat diffs. + :return: collection of LHS and RHS stats (stat-description, stat-value, stat-tooltip) + with HTML styles that reflect the stat diffs. """ # Get all the stats that occur in either lhs or rhs and match those that exist in both - stats_map: dict[str, dict[str, pstats.ProfileStat]] = {} - for stat_source, stat_list in [("lhs", lhs_stats), ("rhs", rhs_stats)]: - for stat in stat_list: - stats_map.setdefault(stat.name, {})[stat_source] = stat + stats_map = common_kit.match_dicts_by_keys( + {stats.name: stats for stats in lhs_stats}, {stats.name: stats for stats in rhs_stats} + ) # Iterate the stats and format them according to their diffs lhs_diff, rhs_diff = [], [] for stat_key in sorted(stats_map.keys()): - lhs_stat: pstats.ProfileStat | None = stats_map[stat_key].get("lhs", None) - rhs_stat: pstats.ProfileStat | None = stats_map[stat_key].get("rhs", None) - lhs_info, rhs_info = _generate_stat_diff_record(lhs_stat, rhs_stat) + lhs_stat: pstats.ProfileStat | None + rhs_stat: pstats.ProfileStat | None + lhs_stat, rhs_stat = stats_map[stat_key] + lhs_info, rhs_info = generate_diff_of_stats_record(lhs_stat, rhs_stat) lhs_diff.append(lhs_info) rhs_diff.append(rhs_info) return lhs_diff, rhs_diff -def generate_diff_of_headers( - lhs_header: list[tuple[str, Any, str]], rhs_header: list[tuple[str, Any, str]] -) -> tuple[list[tuple[str, Any, str]], list[tuple[str, Any, str]]]: - """Regenerates header with differences between individual parts of the info +def generate_diff_of_header_record( + lhs_record: tuple[str, Any, str], rhs_record: tuple[str, Any, str] +) -> tuple[bool, tuple[str, Any, str], tuple[str, Any, str]]: + """Generates a single diffed LHS and RHS header entry. - :param lhs_header: header for baseline - :param rhs_header: header for target + :param lhs_record: the LHS (baseline) entry + :param rhs_record: the RHS (target) entry + + :return: pair of a single diffed LHS (baseline) and RHS (target) header entry """ - lhs_diff, rhs_diff = [], [] - for (lhs_key, lhs_value, lhs_info), (rhs_key, rhs_value, _) in zip(lhs_header, rhs_header): - assert ( - lhs_key == rhs_key - and f"Configuration keys in headers are wrongly ordered (expected {lhs_key}; " - f"got {rhs_key})" + (lhs_key, lhs_value, lhs_info), (rhs_key, rhs_value, _) = lhs_record, rhs_record + assert ( + lhs_key == rhs_key + and f"Configuration keys in headers are wrongly ordered (expected {lhs_key}; got {rhs_key})" + ) + if lhs_value != rhs_value: + diff = list(difflib.ndiff(str(lhs_value).split(), str(rhs_value).split())) + key = _emphasize(lhs_key, "red") + return ( + True, + (key, diff_to_html(diff, "-"), lhs_info), + (key, diff_to_html(diff, "+"), lhs_info), ) - if lhs_value != rhs_value: - diff = list(difflib.ndiff(str(lhs_value).split(), str(rhs_value).split())) - key = _emphasize("red", lhs_key) - lhs_diff.append((key, diff_to_html(diff, "-"), lhs_info)) - rhs_diff.append((key, diff_to_html(diff, "+"), lhs_info)) - else: - lhs_diff.append((lhs_key, lhs_value, lhs_info)) - rhs_diff.append((rhs_key, rhs_value, lhs_info)) - return lhs_diff, rhs_diff + else: + return False, (lhs_key, lhs_value, lhs_info), (rhs_key, rhs_value, lhs_info) -def generate_headers( - lhs_profile: Profile, rhs_profile: Profile -) -> tuple[list[tuple[str, Any, str]], list[tuple[str, Any, str]]]: - """Generates headers for lhs and rhs profile +def generate_diff_of_stats_record( + lhs_stat: pstats.ProfileStat | None, rhs_stat: pstats.ProfileStat | None +) -> tuple[tuple[str, str, str], tuple[str, str, str]]: + """Generates a single diffed LHS and RHS profile stats entry. - :param lhs_profile: profile for baseline - :param rhs_profile: profile for target - :return: pair of headers for lhs (baseline) and rhs (target) + :param lhs_stat: the LHS (baseline) stats entry + :param rhs_stat: the RHS (target) stats entry + + :return: pair of a single diffed LHS (baseline) and RHS (target) stats entry """ - lhs_header = generate_header(lhs_profile) - rhs_header = generate_header(rhs_profile) - return generate_diff_of_headers(lhs_header, rhs_header) + lhs_diff = _StatsDiffRecord.from_stat(lhs_stat, rhs_stat) + rhs_diff = _StatsDiffRecord.from_stat(rhs_stat, lhs_stat) + if lhs_diff.stat_agg is not None and rhs_diff.stat_agg is not None: + assert lhs_stat is not None + lhs_diff.value, rhs_diff.value = _color_stat_record_diff( + lhs_diff.stat_agg, + rhs_diff.stat_agg, + lhs_stat.aggregate_by, + lhs_diff.stat_agg.infer_auto_ordering(lhs_stat.ordering), + ) + return lhs_diff.to_tuple(), rhs_diff.to_tuple() -def generate_metadata( - lhs_profile: Profile, rhs_profile: Profile -) -> tuple[list[tuple[str, Any, str]], list[tuple[str, Any, str]]]: - """Generates metadata for lhs and rhs profile +def diff_to_html(diff: list[str], start_tag: Literal["+", "-"]) -> str: + """Create a html tag with differences for either + or - changes - :param lhs_profile: profile for baseline - :param rhs_profile: profile for target - :return: pair of metadata for lhs (baseline) and rhs (target) + :param diff: diff computed by difflib.ndiff + :param start_tag: starting point of the tag """ - data: dict[str, Any] - - lhs_metadata = sorted( - [ - helpers.ProfileMetadata.from_profile(data).as_tuple() - for data in lhs_profile.get("metadata", []) - ], - key=lambda x: x[0], - ) - rhs_metadata = sorted( - [ - helpers.ProfileMetadata.from_profile(data).as_tuple() - for data in rhs_profile.get("metadata", []) - ], - key=lambda x: x[0], - ) - return generate_diff_of_headers(lhs_metadata, rhs_metadata) + tag_to_color: dict[str, ColorChoiceType] = { + "+": "green", + "-": "red", + } + result = [] + for chunk in diff: + if chunk.startswith(" "): + result.append(chunk[2:]) + if chunk.startswith(start_tag): + result.append(_emphasize(chunk[2:], tag_to_color.get(start_tag, "grey"))) + return " ".join(result) def normalize_stat_tooltip(tooltip: str, ordering: pstats.ProfileStatOrdering) -> str: + """Normalize a stat tooltip by including the ordering type as well. + + :param tooltip: the original stat tooltip + :param ordering: the ordering type of the stat + + :return: the normalized tooltip + """ ordering_str: str = f'[{ordering.value.replace("_", " ")}]' return f"{tooltip} {ordering_str}" -def _emphasize(color: ColorChoiceType, value: str) -> str: +def _emphasize(value: str, color: ColorChoiceType) -> str: + """Emphasize a string with a HTML color. + + :param value: the string to emphasize + :param color: the color to use + + :return: the emphasized string + """ return f'{value}' def _format_exit_codes(exit_code: str | list[str] | list[int]) -> str: + """Format (a collection of) exit code(s) for HTML output. + + Exit codes that are non-zero will be emphasized with a color. + + :param exit_code: the exit code(s) + + :return: the formatted exit codes + """ # Unify the exit code types exit_codes: list[str] = [] if isinstance(exit_code, str): @@ -249,70 +352,27 @@ def _format_exit_codes(exit_code: str | list[str] | list[int]) -> str: else: exit_codes = list(map(str, exit_code)) # Color exit codes that are not zero - return ", ".join(code if code == "0" else _emphasize("red", code) for code in exit_codes) + return ", ".join(code if code == "0" else _emphasize(code, "red") for code in exit_codes) -def _generate_stat_diff_record( - lhs_stat: pstats.ProfileStat | None, rhs_stat: pstats.ProfileStat | None -) -> tuple[tuple[str, str, str], tuple[str, str, str]]: - lhs_stat_agg: pstats.ProfileStatAggregation | None = None - lhs_name: str = "" - lhs_value: str = "-" - lhs_tooltip: str = "missing stat info" - rhs_stat_agg: pstats.ProfileStatAggregation | None = None - rhs_name: str = "" - rhs_value: str = "-" - rhs_tooltip: str = "missing stat info" - if lhs_stat is not None: - lhs_stat_agg = pstats.aggregate_stats(lhs_stat) - unit = f" [{lhs_stat.unit}]" if lhs_stat.unit else "" - lhs_name = ( - f"{lhs_stat.name}{unit} " - f"({lhs_stat_agg.normalize_aggregate_key(lhs_stat.aggregate_by)})" - ) - rhs_name = f"{lhs_stat.name}{unit}" - lhs_value = str(lhs_stat_agg.as_table()[0]) - lhs_tooltip = normalize_stat_tooltip( - lhs_stat.tooltip, lhs_stat_agg.infer_auto_ordering(lhs_stat.ordering) - ) - if rhs_stat is not None: - rhs_stat_agg = pstats.aggregate_stats(rhs_stat) - unit = f" [{rhs_stat.unit}]" if rhs_stat.unit else "" - rhs_name = ( - f"{rhs_stat.name}{unit} " - f"({rhs_stat_agg.normalize_aggregate_key(rhs_stat.aggregate_by)})" - ) - lhs_name = lhs_name if lhs_name else f"{rhs_stat.name}{unit}" - rhs_value = str(rhs_stat_agg.as_table()[0]) - rhs_tooltip = normalize_stat_tooltip( - rhs_stat.tooltip, rhs_stat_agg.infer_auto_ordering(rhs_stat.ordering) - ) - if lhs_stat_agg is not None and rhs_stat_agg is not None: - assert lhs_stat is not None - lhs_value, rhs_value = _color_stat_value_diff( - lhs_stat_agg, - rhs_stat_agg, - lhs_stat.aggregate_by, - lhs_stat_agg.infer_auto_ordering(lhs_stat.ordering), - ) - return (lhs_name, lhs_value, lhs_tooltip), (rhs_name, rhs_value, rhs_tooltip) - - -def _color_stat_value_diff( +def _color_stat_record_diff( lhs_stat_agg: pstats.ProfileStatAggregation, rhs_stat_agg: pstats.ProfileStatAggregation, - aggregate_key: str, + compare_key: str, ordering: pstats.ProfileStatOrdering, ) -> tuple[str, str]: - """Color the stat values on the LHS and RHS according to their difference. + """Color the stats values on the LHS and RHS according to their difference. The color is determined by the stat ordering and the result of the stat values comparison. - :param lhs_stat: a stat from the baseline - :param rhs_stat: a stat from the target + :param lhs_stat_agg: a baseline stat aggregation + :param rhs_stat_agg: a target stat aggregation + :param compare_key: the key by which to compare the stats + :param ordering: the ordering type of the stat + :return: colored LHS and RHS stat values """ - comparison_result = pstats.compare_stats(lhs_stat_agg, rhs_stat_agg, aggregate_key, ordering) + # Build a color map for different comparison results color_map: dict[pstats.StatComparisonResult, tuple[ColorChoiceType, ColorChoiceType]] = { pstats.StatComparisonResult.INVALID: ("red", "red"), pstats.StatComparisonResult.UNEQUAL: ("red", "red"), @@ -320,11 +380,62 @@ def _color_stat_value_diff( pstats.StatComparisonResult.BASELINE_BETTER: ("green", "red"), pstats.StatComparisonResult.TARGET_BETTER: ("red", "green"), } - + # Compare and color the stat entry + comparison_result = pstats.compare_stats(lhs_stat_agg, rhs_stat_agg, compare_key, ordering) baseline_color, target_color = color_map[comparison_result] if comparison_result == pstats.StatComparisonResult.INVALID: - baseline_value, target_value = "", "" + baseline_value, target_value = "invalid comparison", "invalid comparison" else: baseline_value = str(lhs_stat_agg.as_table()[0]) target_value = str(rhs_stat_agg.as_table()[0]) - return _emphasize(baseline_color, baseline_value), _emphasize(target_color, target_value) + return _emphasize(baseline_value, baseline_color), _emphasize(target_value, target_color) + + +@dataclasses.dataclass +class _StatsDiffRecord: + """A helper struct for storing a difference entry of baseline/target stats. + + :ivar name: name of the stats entry + :ivar value: the value of the stats entry + :ivar tooltip: the tooltip of the stats entry + :ivar stat_agg: the stats aggregation + """ + + name: str = "" + value: str = "-" + tooltip: str = "missing stat info" + stat_agg: pstats.ProfileStatAggregation | None = None + + @classmethod + def from_stat( + cls, stat: pstats.ProfileStat | None, other_stat: pstats.ProfileStat | None + ) -> _StatsDiffRecord: + """Construct a difference record from a baseline/target profile stat. + + If the baseline/target profile stat is not available, we use the other + (i.e., target/baseline) stat to construct a stub entry to indicate a missing stat. + + :param stat: the baseline/target profile stat to construct from, if available + :param other_stat: the other profile stat to use for construction as a fallback + + :return: the constructed difference record + """ + if stat is None: + # Fallback construction from the other stat + assert other_stat is not None + unit = f" [{other_stat.unit}]" if other_stat.unit else "" + return cls(f"{other_stat.name}{unit}") + # The standard construction + stat_agg = pstats.aggregate_stats(stat) + unit = f" [{stat.unit}]" if stat.unit else "" + name = f"{stat.name}{unit} " f"({stat_agg.normalize_aggregate_key(stat.aggregate_by)})" + value = str(stat_agg.as_table()[0]) + tooltip = normalize_stat_tooltip(stat.tooltip, stat_agg.infer_auto_ordering(stat.ordering)) + return cls(name, value, tooltip, stat_agg) + + def to_tuple(self) -> tuple[str, str, str]: + """Convert the difference record to a tuple. + + :return: the tuple representation of the difference record + """ + return self.name, self.value, self.tooltip diff --git a/perun/view_diff/flamegraph/run.py b/perun/view_diff/flamegraph/run.py index bd98cc7e..3e0e8a3b 100755 --- a/perun/view_diff/flamegraph/run.py +++ b/perun/view_diff/flamegraph/run.py @@ -14,9 +14,9 @@ # Perun Imports from perun.templates import factory as templates from perun.utils import log, mapping -from perun.utils.common import diff_kit, common_kit +from perun.utils.common import common_kit, diff_kit +from perun.profile import convert, stats as profile_stats from perun.profile.factory import Profile -from perun.profile import convert from perun.view.flamegraph import flamegraph as flamegraph_factory from perun.view_diff.short import run as table_run @@ -124,11 +124,12 @@ def generate_flamegraphs( :param lhs_profile: baseline profile :param rhs_profile: target profile + :param data_types: list of data types (resources) + :param width: width of the flame graph + :param skip_diff: whether the flamegraph diff should be skipped or not :param minimize: whether the flamegraph should be minimized or not :param max_trace: maximal size of the trace :param max_per_resource: maximal values for each resource - :param data_types: list of data types (resources) - :param width: width of the flame graph """ flamegraphs = [] for i, dtype in log.progress(enumerate(data_types), description="Generating Flamegraphs"): @@ -177,27 +178,16 @@ def generate_flamegraphs( return flamegraphs -def generate_profile_stats(stats: dict[str, float]) -> list[tuple[str, Any, str]]: - """Generates stats for baseline or target profile - - :param profile_type: type of the profile - :return: list of tuples containing stats as tuples key, value and tooltip - """ - profile_stats = [] - for key, value in stats.items(): - stat_key, stat_tooltip = key.split(";") - profile_stats.append((stat_key, value, stat_tooltip)) - return profile_stats - - def process_maxima( - maxima_per_resources: dict[str, float], stats: dict[str, float], profile: Profile -) -> None: + maxima_per_resources: dict[str, float], stats: list[profile_stats.ProfileStat], profile: Profile +) -> int: """Processes maxima for each profile :param maxima_per_resources: dictionary that maps resources to their maxima + :param stats: list of profile stats to extend :param profile: input profile - :return: maximal trace + + :return: the length of the maximum trace """ is_inclusive = profile.get("collector_info", {}).get("name") == "kperf" counts: dict[str, float] = defaultdict(float) @@ -214,8 +204,23 @@ def process_maxima( counts[key] += amount for key in counts.keys(): maxima_per_resources[key] = max(maxima_per_resources[key], counts[key]) - stats[f"Overall {key};The overall value of the {key} for the root value"] = counts[key] - stats["Maximal Trace Length;Maximal lenght of the trace in the profile"] = max_trace + stats.append( + profile_stats.ProfileStat( + f"Overall {key}", + profile_stats.ProfileStatOrdering.LOWER, + tooltip=f"The overall value of the {key} for the root value", + value=counts[key], + ) + ) + stats.append( + profile_stats.ProfileStat( + "Maximum Trace Length", + profile_stats.ProfileStatOrdering.LOWER, + tooltip="Maximum length of the trace in the profile", + value=max_trace, + ) + ) + return max_trace def generate_flamegraph_difference( @@ -228,17 +233,17 @@ def generate_flamegraph_difference( :param kwargs: additional arguments """ maxima_per_resource: dict[str, float] = defaultdict(float) - lhs_stats: dict[str, float] = defaultdict(float) - rhs_stats: dict[str, float] = defaultdict(float) + lhs_stats: list[profile_stats.ProfileStat] = [] + rhs_stats: list[profile_stats.ProfileStat] = [] lhs_types = list(lhs_profile.all_resource_fields()) rhs_types = list(rhs_profile.all_resource_fields()) data_types = diff_kit.get_candidate_keys(set(lhs_types).union(set(rhs_types))) data_type = list(data_types)[0] - process_maxima(maxima_per_resource, lhs_stats, lhs_profile) - process_maxima(maxima_per_resource, rhs_stats, rhs_profile) - lhs_final_stats, rhs_final_stats = diff_kit.generate_diff_of_headers( - generate_profile_stats(lhs_stats), generate_profile_stats(rhs_stats) - ) + lhs_max_trace = process_maxima(maxima_per_resource, lhs_stats, lhs_profile) + rhs_max_trace = process_maxima(maxima_per_resource, rhs_stats, rhs_profile) + lhs_stats += list(lhs_profile.all_stats()) + rhs_stats += list(rhs_profile.all_stats()) + lhs_final_stats, rhs_final_stats = diff_kit.generate_diff_of_stats(lhs_stats, rhs_stats) log.major_info("Generating Flamegraph Difference") flamegraphs = generate_flamegraphs( @@ -246,15 +251,12 @@ def generate_flamegraph_difference( rhs_profile, data_types, max_per_resource=maxima_per_resource, - max_trace=int( - max( - lhs_stats["Maximal Trace Length;Maximal lenght of the trace in the profile"], - rhs_stats["Maximal Trace Length;Maximal lenght of the trace in the profile"], - ) - ), + max_trace=max(lhs_max_trace, rhs_max_trace), ) lhs_header, rhs_header = diff_kit.generate_headers(lhs_profile, rhs_profile) - lhs_meta, rhs_meta = diff_kit.generate_metadata(lhs_profile, rhs_profile) + lhs_meta, rhs_meta = diff_kit.generate_diff_of_metadata( + lhs_profile.all_metadata(), rhs_profile.all_metadata(), kwargs["metadata_display"] + ) template = templates.get_template("diff_view_flamegraph.html.jinja2") content = template.render( @@ -284,13 +286,22 @@ def generate_flamegraph_difference( @click.command() @click.pass_context @click.option( - "-w", "--width", + "-w", type=click.INT, default=DEFAULT_WIDTH, help="Sets the width of the flamegraph (default=600px).", ) -@click.option("-o", "--output-file", help="Sets the output file (default=automatically generated).") +@click.option("--output-file", "-o", help="Sets the output file (default=automatically generated).") +@click.option( + "--metadata-display", + type=click.Choice(diff_kit.MetadataDisplayStyle.supported()), + default=diff_kit.MetadataDisplayStyle.default(), + callback=lambda _, __, ds: diff_kit.MetadataDisplayStyle(ds), + help="Selects the display style of profile metadata. The 'full' option displays all provided " + "metadata, while the 'diff' option shows only metadata with different values " + f"(default={diff_kit.MetadataDisplayStyle.default()}).", +) def flamegraph(ctx: click.Context, *_: Any, **kwargs: Any) -> None: """ """ assert ctx.parent is not None and f"impossible happened: {ctx} has no parent" diff --git a/perun/view_diff/report/run.py b/perun/view_diff/report/run.py index 19b94907..10f0bea7 100755 --- a/perun/view_diff/report/run.py +++ b/perun/view_diff/report/run.py @@ -321,6 +321,7 @@ class Node: :ivar uid: unique identifier of the node (the label) :ivar callees: map of positions to edge relation for callees :ivar callers: map of positions to edge relation for callers + :ivar stats: statistics for the given node """ __slots__ = ["uid", "callees", "callers", "stats"] @@ -370,7 +371,7 @@ class Link: class Stats: - """Statistics for a given edge + """Statistics for a given edge or node :ivar baseline: baseline stats :ivar target: target stats @@ -444,8 +445,7 @@ def process_node( :param graph: sankey graph :param profile_type: type of the profile :param resource: consumed resources - :param src: callee - :param tgt: caller + :param uid: the node uid """ node = graph.get_node(uid) for key in resource: @@ -549,8 +549,6 @@ def process_traces( ) ) Config().max_seen_trace = max(max_trace, Config().max_seen_trace) - for stat in profile.get("stats", []): - Config().profile_stats[profile_type].append(profile_stats.ProfileStat.from_profile(stat)) def generate_trace_stats(graph: Graph) -> dict[str, list[TraceStat]]: @@ -725,6 +723,8 @@ def generate_report(lhs_profile: Profile, rhs_profile: Profile, **kwargs: Any) - process_traces(lhs_profile, "baseline", graph) process_traces(rhs_profile, "target", graph) + lhs_stats = Config().profile_stats["baseline"] + list(lhs_profile.all_stats()) + rhs_stats = Config().profile_stats["target"] + list(rhs_profile.all_stats()) trace_stats = generate_trace_stats(graph) selection_table = generate_selection(graph, trace_stats) @@ -739,11 +739,10 @@ def generate_report(lhs_profile: Profile, rhs_profile: Profile, **kwargs: Any) - ) log.minor_success("Sankey graphs", "generated") lhs_header, rhs_header = diff_kit.generate_headers(lhs_profile, rhs_profile) - lhs_stats, rhs_stats = diff_kit.generate_diff_of_stats( - Config().profile_stats["baseline"], - Config().profile_stats["target"], + lhs_diff_stats, rhs_diff_stats = diff_kit.generate_diff_of_stats(lhs_stats, rhs_stats) + lhs_meta, rhs_meta = diff_kit.generate_diff_of_metadata( + lhs_profile.all_metadata(), rhs_profile.all_metadata(), kwargs["metadata_display"] ) - lhs_meta, rhs_meta = diff_kit.generate_metadata(lhs_profile, rhs_profile) env_filters = {"sanitize_variable_name": filters.sanitize_variable_name} template = templates.get_template("diff_view_report.html.jinja2", filters=env_filters) @@ -751,11 +750,11 @@ def generate_report(lhs_profile: Profile, rhs_profile: Profile, **kwargs: Any) - title="Differences of profiles (with sankey)", lhs_tag="Baseline (base)", lhs_header=lhs_header, - lhs_stats=lhs_stats, + lhs_stats=lhs_diff_stats, lhs_metadata=lhs_meta, rhs_tag="Target (tgt)", rhs_header=rhs_header, - rhs_stats=rhs_stats, + rhs_stats=rhs_diff_stats, rhs_metadata=rhs_meta, palette=WebColorPalette, callee_graph=graph.to_jinja_string("callees"), @@ -789,29 +788,38 @@ def generate_report(lhs_profile: Profile, rhs_profile: Profile, **kwargs: Any) - @click.command() -@click.option("-o", "--output-file", help="Sets the output file (default=automatically generated).") +@click.option("--output-file", "-o", help="Sets the output file (default=automatically generated).") @click.option( - "-fr", "--filter-by-relative", + "-fr", nargs=1, - help="Filters records based on the relative increase wrt the target. " - f"It filters values that are lesser or equal than [FLOAT] (default={Config().DefaultRelativeThreshold}).", type=click.FLOAT, default=Config().DefaultRelativeThreshold, + help="Filters records based on the relative increase wrt the target. It filters values that " + f"are lesser or equal than [FLOAT] (default={Config().DefaultRelativeThreshold}).", ) @click.option( - "-tn", "--top-n", + "-tn", nargs=1, - help=f"Filters how many top traces will be recorded per uid (default={Config().DefaultTopN}). ", type=click.INT, default=Config().DefaultTopN, + help=f"Filters how many top traces will be recorded per uid (default={Config().DefaultTopN}). ", ) @click.option( - "-m", "--minimize", + "-m", is_flag=True, - help="Minimizes the traces, folds the recursive calls, hids the generic types.", + help="Minimizes the traces, folds the recursive calls, hides the generic types.", +) +@click.option( + "--metadata-display", + type=click.Choice(diff_kit.MetadataDisplayStyle.supported()), + default=diff_kit.MetadataDisplayStyle.default(), + callback=lambda _, __, ds: diff_kit.MetadataDisplayStyle(ds), + help="Selects the display style of profile metadata. The 'full' option displays all provided " + "metadata, while the 'diff' option shows only metadata with different values " + f"(default={diff_kit.MetadataDisplayStyle.default()}).", ) @click.pass_context def report(ctx: click.Context, *_: Any, **kwargs: Any) -> None: From 8fa8f43e02174f26e2bb64a87ff0890e02c5de63 Mon Sep 17 00:00:00 2001 From: Jiri Pavela Date: Mon, 16 Sep 2024 21:13:51 +0200 Subject: [PATCH 09/14] Refine some names and add missing docstrings --- perun/cli_groups/import_cli.py | 6 +- perun/profile/helpers.py | 6 +- perun/profile/stats.py | 269 +++++++++++++++++++++++++----- perun/utils/common/diff_kit.py | 30 ++-- perun/view_diff/flamegraph/run.py | 8 +- perun/view_diff/report/run.py | 8 +- 6 files changed, 256 insertions(+), 71 deletions(-) diff --git a/perun/cli_groups/import_cli.py b/perun/cli_groups/import_cli.py index d895e596..7e6f102d 100755 --- a/perun/cli_groups/import_cli.py +++ b/perun/cli_groups/import_cli.py @@ -43,16 +43,16 @@ default=None, metavar="[STAT_HEADER+]", help="Describes the stats headers associated with the imported profiles. Each stats header is " - "in the form of 'NAME[|ORDERING[|UNIT[|AGGREGATE_BY[|TOOLTIP]]]]'.", + "in the form of 'NAME[|COMPARISON_TYPE[|UNIT[|AGGREGATE_BY[|DESCRIPTION]]]]'.", ) @click.option( "--metadata", "-md", multiple=True, default=None, - metavar="['KEY|VALUE|[TOOLTIP]'] or [FILE.json]", + metavar="['KEY|VALUE|[DESCRIPTION]'] or [FILE.json]", help="Describes a single metadata entry associated with the imported profiles as a " - "'key|value[|tooltip]' string or multiple entries in a JSON file that will be flattened if" + "'key|value[|description]' string or multiple entries in a JSON file that will be flattened if" "needed. The --metadata option may be specified multiple times.", ) @click.option( diff --git a/perun/profile/helpers.py b/perun/profile/helpers.py index ad033df3..d2dc67b4 100644 --- a/perun/profile/helpers.py +++ b/perun/profile/helpers.py @@ -618,12 +618,12 @@ class ProfileMetadata: :ivar name: the name (key) of the metadata entry :ivar value: the value of the metadata entry - :ivar tooltip: detailed description of the metadata entry + :ivar description: detailed description of the metadata entry """ name: str value: str | float - tooltip: str = "" + description: str = "" @classmethod def from_string(cls, metadata: str) -> ProfileMetadata: @@ -650,4 +650,4 @@ def as_tuple(self) -> tuple[str, str | float, str]: :return: the tuple representation of a metadata entry """ - return self.name, self.value, self.tooltip + return self.name, self.value, self.description diff --git a/perun/profile/stats.py b/perun/profile/stats.py index 408363fd..1417ceeb 100644 --- a/perun/profile/stats.py +++ b/perun/profile/stats.py @@ -1,3 +1,20 @@ +"""Profile Stats are additional metrics or statistics associated with a Profile. + +Profile stats are identified by a name and include additional information about the metrics unit, +description and value(s). When the stat value is a collection, the values may be aggregated into +a single representative value and a collection of other descriptive values or statistics. + +For example, if a stat contains a collection of float values, the aggregation creates a statistical +description of the values (i.e., min, max, mean, median, first and last decile, and first and third +quartile) and selects a representative value out of these using the aggregate-by key. A collection +of strings is aggregated into a histogram, where each value is a bin. + +When comparing the representative value, the stat value comparison type defines the type of +comparison operator to use (e.g., lower than, higher than, equal, etc.) on the representative value. +Note that using equality for float values will not work properly. The AUTO comparison type +automatically selects a sane default comparison operator based on the aggregation type. +""" + from __future__ import annotations # Standard Imports @@ -13,8 +30,15 @@ from perun.utils import log as perun_log -class ProfileStatOrdering(str, enum.Enum): - # The class is derived from str so that ProfileStat serialization using asdict() works properly +class ProfileStatComparison(str, enum.Enum): + """The profile stat comparison types. + + The auto comparison type selects a sane default comparison operator based on the aggregation + type. + + Note: the enum derives from str so that ProfileStat serialization using asdict() works properly. + """ + AUTO = "auto" HIGHER = "higher_is_better" LOWER = "lower_is_better" @@ -22,14 +46,28 @@ class ProfileStatOrdering(str, enum.Enum): @staticmethod def supported() -> set[str]: - return {ordering.value for ordering in ProfileStatOrdering} + """Provides the set of supported comparison tupes. + + :return: The set of supported comparison types. + """ + return {comparison.value for comparison in ProfileStatComparison} @staticmethod - def default() -> ProfileStatOrdering: - return ProfileStatOrdering.AUTO + def default() -> ProfileStatComparison: + """Provides the default comparison type. + + :return: The default comparison type. + """ + return ProfileStatComparison.AUTO class StatComparisonResult(enum.Enum): + """The result of stat representative value comparison. + + Since the comparison is determined by the comparison operator and the type of the representative + key, there is a number of valid comparison results that need to be represented. + """ + EQUAL = 1 UNEQUAL = 2 BASELINE_BETTER = 3 @@ -39,54 +77,107 @@ class StatComparisonResult(enum.Enum): @dataclasses.dataclass class ProfileStat: + """An internal representation of a profile stat. + + :ivar name: The name of the stat. + :ivar cmp: The comparison type of the stat values. + :ivar unit: The unit of the stat value(s). + :ivar aggregate_by: The aggregation (representative value) key. + :ivar description: A detailed description of the stat. + :ivar value: The value of the stat. + """ + name: str - ordering: ProfileStatOrdering = ProfileStatOrdering.default() + cmp: ProfileStatComparison = ProfileStatComparison.default() unit: str = "#" aggregate_by: str = "" - tooltip: str = "" + description: str = "" value: object = "" @classmethod def from_string( cls, name: str = "[empty]", - ordering: str = "", + cmp: str = "", unit: str = "#", aggregate_by: str = "", - tooltip: str = "", + description: str = "", *_: Any, ) -> ProfileStat: + """Constructs a ProfileStat object from a string describing a stat header. + + The value of the stat is ignored when parsing from a string, as string representation is + used solely for specifying the stat header. + + :param name: The name of the stat. + :param cmp: The comparison type of the stat values. + :param unit: The unit of the stat value(s). + :param aggregate_by: The aggregation (representative value) key. + :param description: A detailed description of the stat. + + :return: A constructed ProfileStat object. + """ if name == "[empty]": # Invalid stat specification, warn perun_log.warn("Empty profile stat specification. Creating a dummy '[empty]' stat.") - ordering_enum = cls._convert_ordering(ordering) - return cls(name, ordering_enum, unit, aggregate_by, tooltip) + comparison_enum = cls._convert_comparison(cmp) + return cls(name, comparison_enum, unit, aggregate_by, description) @classmethod def from_profile(cls, stat: dict[str, Any]) -> ProfileStat: - stat["ordering"] = cls._convert_ordering(stat.get("ordering", "")) + """Constructs a ProfileStat object from a Perun profile. + + :param stat: The stat dictionary from a Perun profile. + + :return: A constructed ProfileStat object. + """ + stat["cmp"] = cls._convert_comparison(stat.get("cmp", "")) return cls(**stat) @staticmethod - def _convert_ordering(ordering: str) -> ProfileStatOrdering: - if not ordering: - return ProfileStatOrdering.default() + def _convert_comparison(comparison: str) -> ProfileStatComparison: + """Convert a comparison type string into a ProfileStatComparison enum value. + + If an invalid comparison type is provided, the default type will be used. + + :param comparison: The comparison type as a string. + + :return: The comparison type as an enum value. + """ + if not comparison: + return ProfileStatComparison.default() try: - return ProfileStatOrdering(ordering.strip()) + return ProfileStatComparison(comparison.strip()) except ValueError: - # Invalid stat ordering, warn + # Invalid stat comparison, warn perun_log.warn( - f"Unknown stat ordering: {ordering}. Using the default stat ordering value instead." - f" Please choose one of ({', '.join(ProfileStatOrdering.supported())})." + f"Unknown stat comparison: {comparison}. Using the default stat comparison value " + f"instead. Please choose one of ({', '.join(ProfileStatComparison.supported())})." ) - return ProfileStatOrdering.default() + return ProfileStatComparison.default() class ProfileStatAggregation(Protocol): + """A protocol for profile stat aggregation objects. + + Since individual aggregation types may differ in a lot of ways (e.g., the supported + representative/aggregation keys, table representation, auto comparison type, ...), we provide + an abstract protocol for all aggregation objects. + """ + _SUPPORTED_KEYS: ClassVar[set[str]] = set() _DEFAULT_KEY: ClassVar[str] = "" def normalize_aggregate_key(self, key: str = _DEFAULT_KEY) -> str: + """Check and normalize the aggregation/representative key. + + If no key is provided, or the key is invalid or unsupported by the aggregation type, the + default key is used instead. + + :param key: The key to check. + + :return: The checked (and possibly normalized) key. + """ if key not in self._SUPPORTED_KEYS: if key: # A key was provided, but it is an invalid one @@ -98,29 +189,60 @@ def normalize_aggregate_key(self, key: str = _DEFAULT_KEY) -> str: return key def get_value(self, key: str = _DEFAULT_KEY) -> Any: + """Obtain a value associated with the key from the aggregation / statistic description. + + If no key is provided, or the key is invalid, the value associated with the default key is + returned. + + :param key: The key of the value to obtain. + + :return: The value associated with the key. + """ return getattr(self, self.normalize_aggregate_key(key)) - def infer_auto_ordering(self, ordering: ProfileStatOrdering) -> ProfileStatOrdering: ... + def infer_auto_comparison(self, comparison: ProfileStatComparison) -> ProfileStatComparison: + """Selects the correct auto comparison type for the aggregation type. + + :param comparison: May be auto or any other valid comparison type. For the auto comparison + type, another non-auto comparison type is returned. For the other comparison types, + the method works as an identity function. + + :return: A non-auto comparison type. + """ - # header, table def as_table( self, key: str = _DEFAULT_KEY - ) -> tuple[str | float | tuple[str, int], dict[str, Any]]: ... + ) -> tuple[str | float | tuple[str, int], dict[str, Any]]: + """Transforms the aggregation object into the representative value and a table of the + aggregation / statistic description values. + + :param key: The key of the aggregation / statistic description. + + :return: The representative value and a table representation of the aggregation. + """ @dataclasses.dataclass class SingleValue(ProfileStatAggregation): + """A single value "aggregation". + + Used for single value profile stats that need to adhere to the same interface as the "proper" + aggregations. + + :ivar value: The value of the stat. + """ + _SUPPORTED_KEYS: ClassVar[set[str]] = {"value"} _DEFAULT_KEY = "value" value: str | float = "[missing]" - def infer_auto_ordering(self, ordering: ProfileStatOrdering) -> ProfileStatOrdering: - if ordering != ProfileStatOrdering.AUTO: - return ordering + def infer_auto_comparison(self, comparison: ProfileStatComparison) -> ProfileStatComparison: + if comparison != ProfileStatComparison.AUTO: + return comparison if isinstance(self.value, str): - return ProfileStatOrdering.EQUALITY - return ProfileStatOrdering.HIGHER + return ProfileStatComparison.EQUALITY + return ProfileStatComparison.HIGHER def as_table(self, _: str = "") -> tuple[str | float, dict[str, str | float]]: # There are no details of a single value to generate into a table @@ -129,6 +251,20 @@ def as_table(self, _: str = "") -> tuple[str | float, dict[str, str | float]]: @dataclasses.dataclass class StatisticalSummary(ProfileStatAggregation): + """A statistical description / summary aggregation type. + + Used for collections of floats. + + :ivar min: The minimum value in the collection. + :ivar p10: The first decile value. + :ivar p25: The first quartile value. + :ivar median: The median value of the entire collection. + :ivar p75: The third quartile value. + :ivar p90: The last decile value. + :ivar max: The maximum value in the collection. + :ivar mean: The mean value of the entire collection. + """ + _SUPPORTED_KEYS: ClassVar[set[str]] = { "min", "p10", @@ -152,6 +288,12 @@ class StatisticalSummary(ProfileStatAggregation): @classmethod def from_values(cls, values: Iterable[float]) -> StatisticalSummary: + """Constructs a StatisticalSummary object from a collection of values. + + :param values: The collection of values to construct from. + + :return: The constructed StatisticalSummary object. + """ # We assume there aren't too many values so that multiple passes of the list don't matter # too much. If this becomes a bottleneck, we can use pandas describe() instead. values = list(values) @@ -167,10 +309,10 @@ def from_values(cls, values: Iterable[float]) -> StatisticalSummary: statistics.mean(values), ) - def infer_auto_ordering(self, ordering: ProfileStatOrdering) -> ProfileStatOrdering: - if ordering != ProfileStatOrdering.AUTO: - return ordering - return ProfileStatOrdering.HIGHER + def infer_auto_comparison(self, comparison: ProfileStatComparison) -> ProfileStatComparison: + if comparison != ProfileStatComparison.AUTO: + return comparison + return ProfileStatComparison.HIGHER def as_table(self, key: str = _DEFAULT_KEY) -> tuple[float, dict[str, float]]: return self.get_value(key), dataclasses.asdict(self) @@ -178,6 +320,15 @@ def as_table(self, key: str = _DEFAULT_KEY) -> tuple[float, dict[str, float]]: @dataclasses.dataclass class StringCollection(ProfileStatAggregation): + """An aggregation type for a collection of strings. + + Supports numerous keys that attempt to aggregate and describe the string values. Also allows + to compare the entire sequence of values for equality if needed. + + :ivar sequence: The sequence of strings. + :ivar counts: A histogram where each string has a separate bin. + """ + _SUPPORTED_KEYS: ClassVar[set[str]] = { "total", "unique", @@ -192,28 +343,45 @@ class StringCollection(ProfileStatAggregation): counts: Counter[str] = dataclasses.field(init=False) def __post_init__(self) -> None: + """Computes the histogram from the sequence.""" self.counts = Counter(self.sequence) @property def unique(self) -> int: + """Get the number of unique strings in the collection. + + :return: The number of unique strings. + """ return len(self.counts) @property def total(self) -> int: + """Get the total number of strings in the collection. + + :return: The total number of strings. + """ return len(self.sequence) @property def min_count(self) -> tuple[str, int]: + """Get the string with the least number of occurrences in the collection. + + :return: The string with the least number of occurrences. + """ return self.counts.most_common()[-1] @property def max_count(self) -> tuple[str, int]: + """Get the string with the most number of occurrences in the collection. + + :return: The string with the most number of occurrences. + """ return self.counts.most_common()[0] - def infer_auto_ordering(self, ordering: ProfileStatOrdering) -> ProfileStatOrdering: - if ordering != ProfileStatOrdering.AUTO: - return ordering - return ProfileStatOrdering.EQUALITY + def infer_auto_comparison(self, comparison: ProfileStatComparison) -> ProfileStatComparison: + if comparison != ProfileStatComparison.AUTO: + return comparison + return ProfileStatComparison.EQUALITY def as_table( self, key: str = _DEFAULT_KEY @@ -232,6 +400,12 @@ def as_table( def aggregate_stats(stat: ProfileStat) -> ProfileStatAggregation: + """A factory that constructs the proper aggregation object based on the stat value(s) type. + + :param stat: The stat to create the aggregate object from. + + :return: The constructed aggregation object. + """ if isinstance(stat.value, (str, float, int)): return SingleValue(stat.value) if isinstance(stat.value, Iterable): @@ -258,11 +432,20 @@ def compare_stats( stat: ProfileStatAggregation, other_stat: ProfileStatAggregation, key: str, - ordering: ProfileStatOrdering, + comparison: ProfileStatComparison, ) -> StatComparisonResult: + """Compares two aggregated stats using the representative key and comparison type. + + :param stat: The first aggregate stat to compare. + :param other_stat: The second aggregate stat to compare. + :param key: The representative key from the aggregates to compare. + :param comparison: The comparison type. + + :return: The comparison result. + """ value, other_value = stat.get_value(key), other_stat.get_value(key) - # Handle auto ordering according to the aggregation type - ordering = stat.infer_auto_ordering(ordering) + # Handle auto comparison according to the aggregation type + comparison = stat.infer_auto_comparison(comparison) if type(stat) is not type(other_stat): # Invalid comparison attempt perun_log.warn( @@ -270,21 +453,21 @@ def compare_stats( ) return StatComparisonResult.INVALID if value == other_value: - # The values are the same, the result is the same regardless of the ordering used + # The values are the same, the result is the same regardless of the comparison used return StatComparisonResult.EQUAL - if ordering == ProfileStatOrdering.EQUALITY: + if comparison == ProfileStatComparison.EQUALITY: # The values are different and we compare for equality return StatComparisonResult.UNEQUAL elif value > other_value: return ( StatComparisonResult.BASELINE_BETTER - if ordering == ProfileStatOrdering.HIGHER + if comparison == ProfileStatComparison.HIGHER else StatComparisonResult.TARGET_BETTER ) elif value < other_value: return ( StatComparisonResult.BASELINE_BETTER - if ordering == ProfileStatOrdering.LOWER + if comparison == ProfileStatComparison.LOWER else StatComparisonResult.TARGET_BETTER ) return StatComparisonResult.UNEQUAL diff --git a/perun/utils/common/diff_kit.py b/perun/utils/common/diff_kit.py index f13bc35a..79f8e83e 100755 --- a/perun/utils/common/diff_kit.py +++ b/perun/utils/common/diff_kit.py @@ -225,7 +225,7 @@ def generate_diff_of_stats( :param lhs_stats: stats from the baseline :param rhs_stats: stats from the target - :return: collection of LHS and RHS stats (stat-description, stat-value, stat-tooltip) + :return: collection of LHS and RHS stats (stat-description, stat-value, stat-description) with HTML styles that reflect the stat diffs. """ # Get all the stats that occur in either lhs or rhs and match those that exist in both @@ -289,7 +289,7 @@ def generate_diff_of_stats_record( lhs_diff.stat_agg, rhs_diff.stat_agg, lhs_stat.aggregate_by, - lhs_diff.stat_agg.infer_auto_ordering(lhs_stat.ordering), + lhs_diff.stat_agg.infer_auto_comparison(lhs_stat.cmp), ) return lhs_diff.to_tuple(), rhs_diff.to_tuple() @@ -313,16 +313,16 @@ def diff_to_html(diff: list[str], start_tag: Literal["+", "-"]) -> str: return " ".join(result) -def normalize_stat_tooltip(tooltip: str, ordering: pstats.ProfileStatOrdering) -> str: - """Normalize a stat tooltip by including the ordering type as well. +def stat_description_to_tooltip(description: str, comparison: pstats.ProfileStatComparison) -> str: + """Transform a stat description into a tooltip by including the comparison type as well. - :param tooltip: the original stat tooltip - :param ordering: the ordering type of the stat + :param description: the original stat description + :param comparison: the comparison type of the stat - :return: the normalized tooltip + :return: the generated tooltip """ - ordering_str: str = f'[{ordering.value.replace("_", " ")}]' - return f"{tooltip} {ordering_str}" + comparison_str: str = f'[{comparison.value.replace("_", " ")}]' + return f"{description} {comparison_str}" def _emphasize(value: str, color: ColorChoiceType) -> str: @@ -359,16 +359,16 @@ def _color_stat_record_diff( lhs_stat_agg: pstats.ProfileStatAggregation, rhs_stat_agg: pstats.ProfileStatAggregation, compare_key: str, - ordering: pstats.ProfileStatOrdering, + comparison: pstats.ProfileStatComparison, ) -> tuple[str, str]: """Color the stats values on the LHS and RHS according to their difference. - The color is determined by the stat ordering and the result of the stat values comparison. + The color is determined by the stat comparison type and the comparison result. :param lhs_stat_agg: a baseline stat aggregation :param rhs_stat_agg: a target stat aggregation :param compare_key: the key by which to compare the stats - :param ordering: the ordering type of the stat + :param comparison: the comparison type of the stat :return: colored LHS and RHS stat values """ @@ -381,7 +381,7 @@ def _color_stat_record_diff( pstats.StatComparisonResult.TARGET_BETTER: ("red", "green"), } # Compare and color the stat entry - comparison_result = pstats.compare_stats(lhs_stat_agg, rhs_stat_agg, compare_key, ordering) + comparison_result = pstats.compare_stats(lhs_stat_agg, rhs_stat_agg, compare_key, comparison) baseline_color, target_color = color_map[comparison_result] if comparison_result == pstats.StatComparisonResult.INVALID: baseline_value, target_value = "invalid comparison", "invalid comparison" @@ -430,7 +430,9 @@ def from_stat( unit = f" [{stat.unit}]" if stat.unit else "" name = f"{stat.name}{unit} " f"({stat_agg.normalize_aggregate_key(stat.aggregate_by)})" value = str(stat_agg.as_table()[0]) - tooltip = normalize_stat_tooltip(stat.tooltip, stat_agg.infer_auto_ordering(stat.ordering)) + tooltip = stat_description_to_tooltip( + stat.description, stat_agg.infer_auto_comparison(stat.cmp) + ) return cls(name, value, tooltip, stat_agg) def to_tuple(self) -> tuple[str, str, str]: diff --git a/perun/view_diff/flamegraph/run.py b/perun/view_diff/flamegraph/run.py index 3e0e8a3b..333b9067 100755 --- a/perun/view_diff/flamegraph/run.py +++ b/perun/view_diff/flamegraph/run.py @@ -207,16 +207,16 @@ def process_maxima( stats.append( profile_stats.ProfileStat( f"Overall {key}", - profile_stats.ProfileStatOrdering.LOWER, - tooltip=f"The overall value of the {key} for the root value", + profile_stats.ProfileStatComparison.LOWER, + description=f"The overall value of the {key} for the root value", value=counts[key], ) ) stats.append( profile_stats.ProfileStat( "Maximum Trace Length", - profile_stats.ProfileStatOrdering.LOWER, - tooltip="Maximum length of the trace in the profile", + profile_stats.ProfileStatComparison.LOWER, + description="Maximum length of the trace in the profile", value=max_trace, ) ) diff --git a/perun/view_diff/report/run.py b/perun/view_diff/report/run.py index 10f0bea7..363e0170 100755 --- a/perun/view_diff/report/run.py +++ b/perun/view_diff/report/run.py @@ -533,18 +533,18 @@ def process_traces( Config().profile_stats[profile_type].append( profile_stats.ProfileStat( f"Overall {name}", - profile_stats.ProfileStatOrdering.LOWER, + profile_stats.ProfileStatComparison.LOWER, unit, - tooltip=f"The overall value of the {name} for the root value", + description=f"The overall value of the {name} for the root value", value=max_samples[key], ) ) Config().profile_stats[profile_type].append( profile_stats.ProfileStat( "Maximum Trace Length", - profile_stats.ProfileStatOrdering.LOWER, + profile_stats.ProfileStatComparison.LOWER, "#", - tooltip="Maximum length of the trace in the profile", + description="Maximum length of the trace in the profile", value=max_trace, ) ) From a6367fde35542171345ae4289158a6842a0fd243 Mon Sep 17 00:00:00 2001 From: Jiri Pavela Date: Sun, 22 Sep 2024 15:48:37 +0200 Subject: [PATCH 10/14] Refactor the imports.py implementation --- perun/cli_groups/import_cli.py | 132 +++-- perun/profile/imports.py | 788 ++++++++++++++++-------------- perun/profile/stats.py | 103 ++-- perun/utils/streams.py | 96 +++- perun/view_diff/flamegraph/run.py | 4 +- perun/view_diff/report/run.py | 4 +- 6 files changed, 679 insertions(+), 448 deletions(-) diff --git a/perun/cli_groups/import_cli.py b/perun/cli_groups/import_cli.py index 7e6f102d..4221a190 100755 --- a/perun/cli_groups/import_cli.py +++ b/perun/cli_groups/import_cli.py @@ -9,8 +9,9 @@ import click # Perun Imports -from perun.logic import commands +from perun.logic import commands, config from perun.profile import imports +from perun.utils.common import cli_kit @click.group("import") @@ -18,6 +19,7 @@ "--machine-info", "-i", type=click.Path(), + default="", help="Imports machine info from file in JSON format (by default, machine info is loaded from " "the current host). You can use `utils/generate_machine_info.sh` script to generate the " "machine info file.", @@ -26,7 +28,9 @@ "--import-dir", "-d", type=click.Path(resolve_path=True, readable=True), - help="Specifies the directory to import profiles from.", + callback=cli_kit.set_config_option_from_flag(config.runtime, "import.dir"), + help="Specifies the directory from which to import profiles and other files (e.g., stats, " + "machine info, ...) that are provided as relative paths (default = ./).", ) @click.option( "--minor-version", @@ -37,13 +41,13 @@ help="Specifies the head minor version, for which the profiles will be imported.", ) @click.option( - "--stats-info", + "--stats-headers", "-t", nargs=1, - default=None, + default="", metavar="[STAT_HEADER+]", - help="Describes the stats headers associated with the imported profiles. Each stats header is " - "in the form of 'NAME[|COMPARISON_TYPE[|UNIT[|AGGREGATE_BY[|DESCRIPTION]]]]'.", + help="Describes the stats headers associated with imported profiles specified directly in CLI. " + "A stats header has the form of 'NAME[|COMPARISON_TYPE[|UNIT[|AGGREGATE_BY[|DESCRIPTION]]]]'.", ) @click.option( "--metadata", @@ -52,18 +56,16 @@ default=None, metavar="['KEY|VALUE|[DESCRIPTION]'] or [FILE.json]", help="Describes a single metadata entry associated with the imported profiles as a " - "'key|value[|description]' string or multiple entries in a JSON file that will be flattened if" - "needed. The --metadata option may be specified multiple times.", + "'key|value[|description]' string, or a JSON file that may contain multiple metadata entries " + "that will have its keys flattened. The --metadata option may be specified multiple times.", ) @click.option( "--cmd", "-c", nargs=1, default="", - help=( - "Command that was being profiled. Either corresponds to some" - " script, binary or command, e.g. ``./mybin`` or ``perun``." - ), + help="Command that was being profiled. Either corresponds to some script, binary or command, " + "e.g. ``./mybin`` or ``perun``.", ) @click.option( "--workload", @@ -76,12 +78,17 @@ "--save-to-index", "-s", is_flag=True, - help="Saves the imported profile to index.", default=False, + help="Saves the imported profile to index.", ) @click.pass_context def import_group(ctx: click.Context, **kwargs: Any) -> None: - """Imports Perun profiles from different formats""" + """Imports Perun profiles from different formats. + + If the --import-dir parameter is specified, relative file paths will be prefixed with the + import directory path (with the default value being the current working directory). + Absolute file paths ignore the import directory. + """ commands.try_init() ctx.obj = kwargs @@ -99,15 +106,16 @@ def perf_group(ctx: click.Context, **kwargs: Any) -> None: This supports either profiles collected in: - 1. Binary format: e.g., `collected.data` files, that are results of `perf record` - 2. Text format: result of `perf script` that parses the binary into user-friendly and - parsing-friendly text format + 1. Binary format: e.g., `collected.data` files, that are results of `perf record` + + 2. Text format: result of `perf script` that parses the binary into user-friendly and + parsing-friendly text format """ ctx.obj.update(kwargs) @perf_group.command("record") -@click.argument("imported", nargs=-1, required=True) +@click.argument("import_entries", nargs=-1, required=True) @click.pass_context @click.option( "--with-sudo", @@ -116,30 +124,83 @@ def perf_group(ctx: click.Context, **kwargs: Any) -> None: help="Runs the conversion of the data in sudo mode.", default=False, ) -def from_binary(ctx: click.Context, imported: list[str], **kwargs: Any) -> None: - """Imports Perun profiles from binary generated by `perf record` command""" +def from_binary(ctx: click.Context, import_entries: list[str], **kwargs: Any) -> None: + """Imports Perun profiles from binary generated by `perf record` command. + + Multiple import entries may be specified; an import entry is either a profile entry + + 'profile_path[,[,]+]' + + where each stat value corresponds to a stats header specified in the --stats-headers option, + or a CSV file entry + + 'file.csv' + + where the CSV file is in the format + + #Profile,Exit_code[,stat-header1]+ + profile_path[,[,]+] + ... + + that combines the --stats-headers option and profile entries. + """ kwargs.update(ctx.obj) - imports.import_perf_from_record(imported, **kwargs) + imports.import_perf_from_record(import_entries, **kwargs) @perf_group.command("script") -@click.argument("imported", type=str, nargs=-1, required=True) +@click.argument("import_entries", type=str, nargs=-1, required=True) @click.pass_context -def from_text(ctx: click.Context, imported: list[str], **kwargs: Any) -> None: - """Import Perun profiles from output generated by `perf script` command""" +def from_text(ctx: click.Context, import_entries: list[str], **kwargs: Any) -> None: + """Import Perun profiles from output generated by `perf script` command. + + Multiple import entries may be specified; an import entry is either a profile entry + + 'profile_path[,[,]+]' + + where each stat value corresponds to a stats header specified in the --stats-headers option, + or a CSV file entry + + 'file.csv' + + where the CSV file is in the format + + #Profile,Exit_code[,stat-header1]+ + profile_path[,[,]+] + ... + + that combines the --stats-headers option and profile entries. + """ kwargs.update(ctx.obj) - imports.import_perf_from_script(imported, **kwargs) + imports.import_perf_from_script(import_entries, **kwargs) @perf_group.command("stack") -@click.argument("imported", type=str, nargs=-1, required=True) +@click.argument("import_entries", type=str, nargs=-1, required=True) @click.pass_context -def from_stacks(ctx: click.Context, imported: list[str], **kwargs: Any) -> None: +def from_stacks(ctx: click.Context, import_entries: list[str], **kwargs: Any) -> None: """Import Perun profiles from output generated by `perf script | stackcollapse-perf.pl` - command + command. + + Multiple import entries may be specified; an import entry is either a profile entry + + 'profile_path[,[,]+]' + + where each stat value corresponds to a stats header specified in the --stats-headers option, + or a CSV file entry + + 'file_path.csv' + + where the CSV file is in the format + + #Profile,Exit_code[,stat-header1]+ + profile_path[,[,]+] + ... + + that combines the --stats-headers option and profile entries. """ kwargs.update(ctx.obj) - imports.import_perf_from_stack(imported, **kwargs) + imports.import_perf_from_stack(import_entries, **kwargs) @import_group.group("elk") @@ -155,15 +216,18 @@ def elk_group(ctx: click.Context, **kwargs: Any) -> None: The command supports profiles collected in: - 1. JSON format: files, that are extracted from ELK or are stored using format compatible with ELK. + 1. JSON format: files extracted from ELK or stored using format compatible with ELK. """ ctx.obj.update(kwargs) @elk_group.command("json") -@click.argument("imported", nargs=-1, required=True) +@click.argument("import_entries", nargs=-1, required=True) @click.pass_context -def from_json(ctx: click.Context, imported: list[str], **kwargs: Any) -> None: - """Imports Perun profiles from json compatible with elk infrastructure""" +def from_json(ctx: click.Context, import_entries: list[str], **kwargs: Any) -> None: + """Imports Perun profiles from JSON compatible with elk infrastructure. + + Each import entry may specify a JSON path 'file_path.json'. + """ kwargs.update(ctx.obj) - imports.import_elk_from_json(imported, **kwargs) + imports.import_elk_from_json(import_entries, **kwargs) diff --git a/perun/profile/imports.py b/perun/profile/imports.py index 72a12015..fda8a44a 100755 --- a/perun/profile/imports.py +++ b/perun/profile/imports.py @@ -5,19 +5,19 @@ # Standard Imports from collections import defaultdict import csv -from dataclasses import dataclass, field, asdict +from dataclasses import asdict, dataclass import gzip import json import os from pathlib import Path import subprocess -from typing import Any, Optional, Iterator +from typing import Any # Third-Party Imports # Perun Imports from perun.collect.kperf import parser -from perun.logic import commands, index, pcs +from perun.logic import commands, config, index, pcs from perun.profile import query, helpers as profile_helpers, stats as profile_stats from perun.profile.factory import Profile from perun.utils import log, streams @@ -27,266 +27,41 @@ from perun.vcs import vcs_kit -# TODO: add documentation -# TODO: fix stats in other types of diffviews -# TODO: refactor the perf import type commands: there is a lot of code duplication -# TODO: unify handling of path open success / fail - - @dataclass -class ImportProfileSpec: - path: Path - exit_code: int = 0 - values: list[float | str] = field(default_factory=list) - - -class ImportedProfiles: - """ - Note: I would reconsider this class or refactor it, removing the logical elements, it obfuscates the logic a little - and makes the functions less readable (there are not streams/pipes as is most of the logic/perun); I for one am - rather "fan" of generic functions that takes structures and returns structure than classes with methods/logic. - TODO: the import-dir could be removed by extracting this functionality to command-line callback and massage - the paths during the CLI parsing; hence assuming that the paths are correct when importing. I think the parameter - only complicates the code. - """ - - __slots__ = "import_dir", "stats", "profiles" - - def __init__(self, targets: list[str], import_dir: str | None, stats_info: str | None) -> None: - self.import_dir: Path = Path(import_dir.strip()) if import_dir is not None else Path.cwd() - # Parse the CLI stats if available - self.stats: list[profile_stats.ProfileStat] = [] - self.profiles: list[ImportProfileSpec] = [] - - if stats_info is not None: - self.stats = [ - profile_stats.ProfileStat.from_string(*stat.split("|")) - for stat in stats_info.split(",") - ] - - for target in targets: - if target.lower().endswith(".csv"): - # The input is a csv file - self._parse_import_csv(target) - else: - # The input is a file path - self._add_imported_profile(target.split(",")) - - def __iter__(self) -> Iterator[ImportProfileSpec]: - return iter(self.profiles) - - def __len__(self) -> int: - return len(self.profiles) - - def get_exit_codes(self) -> list[int]: - return [p.exit_code for p in self.profiles] - - def aggregate_stats(self) -> Iterator[profile_stats.ProfileStat]: - stat_value_lists: list[list[float | str]] = [[] for _ in range(len(self.stats))] - for profile in self.profiles: - value_list: list[float | str] - stat_value: float | str - for value_list, stat_value in zip(stat_value_lists, profile.values): - value_list.append(stat_value) - for value_list, stat_obj in zip(stat_value_lists, self.stats): - if len(value_list) == 1: - stat_obj.value = value_list[0] - else: - stat_obj.value = value_list - yield stat_obj - - def _parse_import_csv(self, target: str) -> None: - csv_path = _massage_import_path(self.import_dir, target) - try: - with open(csv_path, "r") as csvfile: - log.minor_success(log.path_style(str(csv_path)), "found") - csv_reader = csv.reader(csvfile, delimiter=",") - header: list[str] = next(csv_reader) - stats: list[profile_stats.ProfileStat] = [ - profile_stats.ProfileStat.from_string(*stat_definition.split("|")) - for stat_definition in header[2:] - ] - # Parse the CSV stat definition and check that they are not in conflict with the CLI - # stat definitions, if any - for idx, stat in enumerate(stats): - if idx >= len(self.stats): - self.stats.append(stat) - elif stat != self.stats[idx]: - log.warn( - f"Mismatching profile stat definition from CLI and CSV: " - f"cli.{self.stats[idx].name} != csv.{stat.name}. " - f"Using the CLI stat definition." - ) - # Parse the remaining rows that should represent profile specifications - for row in csv_reader: - self._add_imported_profile(row) - except OSError as exc: - log.minor_fail(log.path_style(str(csv_path)), "not found") - log.error(str(exc), exc) - - def _add_imported_profile(self, target: list[str]) -> None: - if len(target) == 0: - # Empty profile specification, warn - log.warn("Empty import profile specification. Skipping.") - else: - # Make sure we strip the leading and trailing whitespaces in each column value - profile_info = ImportProfileSpec( - _massage_import_path(self.import_dir, target[0]), - int(target[1].strip()) if len(target) >= 2 else ImportProfileSpec.exit_code, - list(map(ImportedProfiles._massage_stat_value, target[2:])), - ) - if profile_info.exit_code != 0: - log.warn("Importing a profile with non-zero exit code.") - self.profiles.append(profile_info) - - @staticmethod - def _massage_stat_value(stat_value: str) -> str | float: - stat_value = stat_value.strip() - try: - return float(stat_value) - except ValueError: - return stat_value +class _PerfProfileSpec: + """A representation of a perf profile record to import. - -def load_file(filepath: Path) -> str: - """Tests if the file is packed by gzip and unpacks it, otherwise reads it as a text file - - :param filepath: path with source file - :return: the content of the file - """ - if filepath.suffix.lower() == ".gz": - with open(filepath, "rb") as f: - header = f.read(2) - f.seek(0) - assert header == b"\x1f\x8b" - with gzip.GzipFile(fileobj=f) as gz: - return gz.read().decode("utf-8") - with open(filepath, "r", encoding="utf-8") as imported_handle: - return imported_handle.read() - - -def get_machine_info(import_dir: Path, machine_info: Optional[str] = None) -> dict[str, Any]: - """Returns machine info either from input file or constructs it from environment - - :param import_dir: path to a directory where the machine info will be looked up - :param machine_info: file in json format, which contains machine specification - :return: parsed dictionary format of machine specification - """ - if machine_info is not None: - info_path = _massage_import_path(import_dir, machine_info) - try: - with open(info_path, "r") as machine_handle: - log.minor_success(log.path_style(str(info_path)), "found") - return json.load(machine_handle) - except OSError: - log.minor_fail(log.path_style(str(info_path)), "not found, generating info") - return environment.get_machine_specification() - - -def import_perf_profile( - profiles: ImportedProfiles, - resources: list[dict[str, Any]], - minor_version: MinorVersion, - machine_info: Optional[str] = None, - with_sudo: bool = False, - save_to_index: bool = False, - **kwargs: Any, -) -> None: - """Constructs the profile for perf-collected data and saves them to jobs or index - - :param profiles: list of to-be-imported profiles - :param resources: list of parsed resources - :param minor_version: minor version corresponding to the imported profiles - :param machine_info: additional dictionary with machine specification - :param with_sudo: indication whether the data were collected with sudo - :param save_to_index: indication whether we should save the imported profiles to index - :param kwargs: rest of the parameters + :ivar path: the absolute path to the perf profile. + :ivar exit_code: the exit code of the profile collection process. """ - prof = Profile( - { - "global": { - "time": "???", - "resources": resources, - }, - "origin": minor_version.checksum, - "machine": get_machine_info(profiles.import_dir, machine_info), - "metadata": [ - asdict(data) - for data in _import_metadata(profiles.import_dir, kwargs.get("metadata", tuple())) - ], - "stats": [asdict(stat) for stat in profiles.aggregate_stats()], - "header": { - "type": "time", - "cmd": kwargs.get("cmd", ""), - "exitcode": profiles.get_exit_codes(), - "workload": kwargs.get("workload", ""), - "units": {"time": "sample"}, - }, - "collector_info": { - "name": "kperf", - "params": { - "with_sudo": with_sudo, - "warmup": kwargs.get("warmup", 0), - "repeat": len(profiles), - }, - }, - "postprocessors": [], - } - ) - save_imported_profile(prof, save_to_index, minor_version) - - -def save_imported_profile(prof: Profile, save_to_index: bool, minor_version: MinorVersion) -> None: - """Saves the imported profile either to index or to pending jobs - - :param prof: imported profile - :param minor_version: minor version corresponding to the imported profiles - :param save_to_index: indication whether we should save the imported profiles to index - """ - full_profile_name = profile_helpers.generate_profile_name(prof) - profile_directory = pcs.get_job_directory() - full_profile_path = os.path.join(profile_directory, full_profile_name) - - streams.store_json(prof.serialize(), full_profile_path) - log.minor_status( - "stored generated profile ", - status=f"{log.path_style(os.path.relpath(full_profile_path))}", - ) - if save_to_index: - commands.add([full_profile_path], minor_version.checksum, keep_profile=False) - else: - # Else we register the profile in pending index - index.register_in_pending_index(full_profile_path, prof) + path: Path + exit_code: int = 0 @vcs_kit.lookup_minor_version def import_perf_from_record( - imported: list[str], - import_dir: str | None, - stats_info: str | None, + import_entries: list[str], + stats_headers: str, minor_version: str, with_sudo: bool = False, **kwargs: Any, ) -> None: - """Imports profile collected by `perf record` + """Imports profiles collected by `perf record` command. - It does some black magic in ImportedProfiles probably, then for each filename it runs the - perf script + parser script to generate the profile. + First, the function parses all the perf import entries and stats headers, and then it runs + the perf script + parser script for each entry to generate the profile. - :param imported: list of files with imported data - :param import_dir: different directory for importing the profiles - :param stats_info: additional statistics collected for the profile (i.e. non-resource types) - :param minor_version: minor version corresponding to the imported profiles - :param with_sudo: indication whether the data were collected with sudo - :param kwargs: rest of the parameters + :param import_entries: a collection of import entries (profiles or CSV files). + :param stats_headers: CLI-specified stats headers. + :param minor_version: minor version corresponding to the imported profiles. + :param with_sudo: indication whether the data were collected with sudo. + :param kwargs: rest of the parameters. """ parse_script = script_kit.get_script("stackcollapse-perf.pl") - minor_version_info = pcs.vcs().get_minor_version_info(minor_version) - - profiles = ImportedProfiles(imported, import_dir, stats_info) - + profiles, stats = _parse_perf_import_entries(import_entries, stats_headers) resources = [] + for imported_file in profiles: perf_script_command = ( f"{'sudo ' if with_sudo else ''}perf script -i {imported_file.path} | {parse_script}" @@ -303,102 +78,158 @@ def import_perf_from_record( log.error(f"Cannot load data due to: {err}") resources.extend(parser.parse_events(out.decode("utf-8").split("\n"))) log.minor_success(log.path_style(str(imported_file.path)), "imported") - import_perf_profile(profiles, resources, minor_version_info, with_sudo=with_sudo, **kwargs) + minor_version_info = pcs.vcs().get_minor_version_info(minor_version) + import_perf_profile( + profiles, stats, resources, minor_version_info, with_sudo=with_sudo, **kwargs + ) @vcs_kit.lookup_minor_version def import_perf_from_script( - imported: list[str], - import_dir: str | None, - stats_info: str | None, + import_entries: list[str], + stats_headers: str, minor_version: str, **kwargs: Any, ) -> None: - """Imports profile collected by `perf record | perf script` + """Imports profiles collected by `perf record | perf script` command. - It does some black magic in ImportedProfiles probably, then for each filename it runs the - parser script to generate the profile. + First, the function parses all the perf import entries and stats headers, and then it runs + the parser script for each entry to generate the profile. - :param imported: list of files with imported data - :param import_dir: different directory for importing the profiles - :param stats_info: additional statistics collected for the profile (i.e. non-resource types) - :param minor_version: minor version corresponding to the imported profiles - :param kwargs: rest of the parameters + :param import_entries: a collection of import entries (profiles or CSV files). + :param stats_headers: CLI-specified stats headers. + :param minor_version: minor version corresponding to the imported profiles. + :param kwargs: rest of the parameters. """ parse_script = script_kit.get_script("stackcollapse-perf.pl") - minor_version_info = pcs.vcs().get_minor_version_info(minor_version) - - profiles = ImportedProfiles(imported, import_dir, stats_info) - + profiles, stats = _parse_perf_import_entries(import_entries, stats_headers) resources = [] + for imported_file in profiles: perf_script_command = f"cat {imported_file.path} | {parse_script}" out, _ = external_commands.run_safely_external_command(perf_script_command) log.minor_success(f"Raw data from {log.path_style(str(imported_file.path))}", "collected") resources.extend(parser.parse_events(out.decode("utf-8").split("\n"))) log.minor_success(log.path_style(str(imported_file.path)), "imported") - import_perf_profile(profiles, resources, minor_version_info, **kwargs) + minor_version_info = pcs.vcs().get_minor_version_info(minor_version) + import_perf_profile(profiles, stats, resources, minor_version_info, **kwargs) @vcs_kit.lookup_minor_version def import_perf_from_stack( - imported: list[str], - import_dir: str | None, - stats_info: str | None, + import_entries: list[str], + stats_headers: str, minor_version: str, **kwargs: Any, ) -> None: - """Imports profile collected by `perf record | perf script` + """Imports profiles collected by `perf record | perf script | stackcollapse-perf.pl` command. - It does some black magic in ImportedProfiles probably, then for each filename parses the files. + First, the function parses all the perf import entries and stats headers, and then it parses + each entry to generate the profile. - :param imported: list of files with imported data - :param import_dir: different directory for importing the profiles - :param stats_info: additional statistics collected for the profile (i.e. non-resource types) - :param minor_version: minor version corresponding to the imported profiles - :param kwargs: rest of the parameters + :param import_entries: a collection of import entries (profiles or CSV files). + :param stats_headers: CLI-specified stats headers. + :param minor_version: minor version corresponding to the imported profiles. + :param kwargs: rest of the parameters. """ - minor_version_info = pcs.vcs().get_minor_version_info(minor_version) - profiles = ImportedProfiles(imported, import_dir, stats_info) - + profiles, stats = _parse_perf_import_entries(import_entries, stats_headers) resources = [] for imported_profile in profiles: - out = load_file(imported_profile.path) + out = load_perf_file(imported_profile.path) resources.extend(parser.parse_events(out.split("\n"))) log.minor_success(log.path_style(str(imported_profile.path)), "imported") - import_perf_profile(profiles, resources, minor_version_info, **kwargs) + minor_version_info = pcs.vcs().get_minor_version_info(minor_version) + import_perf_profile(profiles, stats, resources, minor_version_info, **kwargs) -def extract_machine_info_from_metadata(metadata: dict[str, Any]) -> dict[str, Any]: - """Extracts the parts of the profile, that corresponds to machine info +@vcs_kit.lookup_minor_version +def import_elk_from_json( + import_entries: list[str], + metadata: tuple[str, ...] | None, + minor_version: str, + **kwargs: Any, +) -> None: + """Imports the ELK stored data from the json data. - Note that not many is collected from the ELK formats, and it can vary greatly, - hence, most of the machine specification and environment should be in metadata instead. + The loading expects the json files to be in form of `{'queries': []}`. - :param metadata: metadata extracted from the ELK profiles - :return: machine info extracted from the profiles + :param import_entries: list of filenames with elk data. + :param metadata: CLI-supplied additional metadata. Metadata specified in JSON take precedence. + :param minor_version: minor version corresponding to the imported profiles. + :param kwargs: rest of the parameters. """ - machine_info = { - "architecture": metadata.get("machine.arch", "?"), - "system": metadata.get("machine.os", "?").capitalize(), - "release": metadata.get("extra.machine.platform", "?"), - "host": metadata.get("machine.hostname", "?"), - "cpu": { - "physical": "?", - "total": metadata.get("machine.cpu-cores", "?"), - "frequency": "?", - }, - "memory": { - "total_ram": metadata.get("machine.ram", "?"), - "swap": "?", - }, - "boot_info": "?", - "mem_details": {}, - "cpu_details": [], + import_dir = Path(config.lookup_key_recursively("import.dir", os.getcwd())) + resources: list[dict[str, Any]] = [] + # Load the CLI-supplied metadata, if any + elk_metadata: dict[str, profile_helpers.ProfileMetadata] = { + data.name: data for data in _import_metadata(metadata, import_dir) } - return machine_info + for elk_file in import_entries: + elk_file_path = _massage_import_path(elk_file, import_dir) + with streams.safely_open_and_log(elk_file_path, "r", fatal_fail=True) as elk_handle: + imported_json = json.load(elk_handle) + assert ( + "queries" in imported_json.keys() + ), "expected the JSON to contain list of dictionaries in 'queries' key" + r, m = extract_from_elk(imported_json["queries"]) + resources.extend(r) + # Possibly overwrite CLI-supplied metadata when identical keys are found + elk_metadata.update(m) + log.minor_success(log.path_style(str(elk_file_path)), "imported") + minor_version_info = pcs.vcs().get_minor_version_info(minor_version) + import_elk_profile(resources, elk_metadata, minor_version_info, **kwargs) + + +def import_perf_profile( + profiles: list[_PerfProfileSpec], + stats: list[profile_stats.ProfileStat], + resources: list[dict[str, Any]], + minor_version: MinorVersion, + **kwargs: Any, +) -> None: + """Constructs the profile for perf-collected data and saves them to jobs or index. + + :param profiles: a collection of specifications of the profiles that are being imported. + :param stats: a collection of profiles statistics that should be associated with the profile. + :param resources: a collection of parsed resources. + :param minor_version: minor version corresponding to the imported profiles. + :param kwargs: rest of the parameters. + """ + import_dir = Path(config.lookup_key_recursively("import.dir", os.getcwd())) + prof = Profile( + { + "global": { + "time": "???", + "resources": resources, + }, + "origin": minor_version.checksum, + "machine": get_machine_info(kwargs.get("machine_info", ""), import_dir), + "metadata": [ + asdict(data) + for data in _import_metadata(kwargs.get("metadata", tuple()), import_dir) + ], + "stats": [asdict(stat) for stat in stats], + "header": { + "type": "time", + "cmd": kwargs.get("cmd", ""), + "exitcode": [profile.exit_code for profile in profiles], + "workload": kwargs.get("workload", ""), + "units": {"time": "sample"}, + }, + "collector_info": { + "name": "kperf", + "params": { + "with_sudo": kwargs.get("with_sudo", False), + "warmup": kwargs.get("warmup", 0), + "repeat": len(profiles), + }, + }, + "postprocessors": [], + } + ) + save_imported_profile(prof, kwargs.get("save_to_index", False), minor_version) def import_elk_profile( @@ -408,13 +239,13 @@ def import_elk_profile( save_to_index: bool = False, **kwargs: Any, ) -> None: - """Constructs the profile for elk-stored data and saves them to jobs or index + """Constructs the profile for elk-stored data and saves them to jobs or index. - :param resources: list of parsed resources - :param metadata: parts of the profiles that will be stored as metadata in the profile - :param minor_version: minor version corresponding to the imported profiles - :param save_to_index: indication whether we should save the imported profiles to index - :param kwargs: rest of the parameters + :param resources: list of parsed resources. + :param metadata: parts of the profiles that will be stored as metadata in the profile. + :param minor_version: minor version corresponding to the imported profiles. + :param save_to_index: indication whether we should save the imported profiles to index. + :param kwargs: rest of the parameters. """ prof = Profile( { @@ -423,10 +254,8 @@ def import_elk_profile( "resources": resources, }, "origin": minor_version.checksum, - "metadata": [ - profile_helpers.ProfileMetadata(key, value) for key, value in metadata.items() - ], - "machine": extract_machine_info_from_metadata(metadata), + "metadata": [asdict(data) for data in metadata.values()], + "machine": extract_machine_info_from_elk_metadata(metadata), "header": { "type": "time", "cmd": kwargs.get("cmd", ""), @@ -444,17 +273,61 @@ def import_elk_profile( save_imported_profile(prof, save_to_index, minor_version) +def save_imported_profile(prof: Profile, save_to_index: bool, minor_version: MinorVersion) -> None: + """Saves the imported profile either to index or to pending jobs. + + :param prof: imported profile + :param minor_version: minor version corresponding to the imported profiles. + :param save_to_index: indication whether we should save the imported profiles to index. + """ + full_profile_name = profile_helpers.generate_profile_name(prof) + profile_directory = pcs.get_job_directory() + full_profile_path = os.path.join(profile_directory, full_profile_name) + + streams.store_json(prof.serialize(), full_profile_path) + log.minor_status( + "stored generated profile ", + status=f"{log.path_style(os.path.relpath(full_profile_path))}", + ) + if save_to_index: + commands.add([full_profile_path], minor_version.checksum, keep_profile=False) + else: + # Else we register the profile in pending index + index.register_in_pending_index(full_profile_path, prof) + + +def load_perf_file(filepath: Path) -> str: + """Tests if the file is packed by gzip and unpacks it, otherwise reads it as a text file. + + :param filepath: path to the perf file. + + :return: the content of the file. + """ + if filepath.suffix.lower() == ".gz": + with streams.safely_open_and_log(filepath, "rb", fatal_fail=True) as gz_handle: + header = gz_handle.read(2) + gz_handle.seek(0) + assert header == b"\x1f\x8b" + with gzip.GzipFile(fileobj=gz_handle) as gz: + return gz.read().decode("utf-8") + with streams.safely_open_and_log( + filepath, "r", fatal_fail=True, encoding="utf-8" + ) as txt_handle: + return txt_handle.read() + + def extract_from_elk( elk_query: list[dict[str, Any]] -) -> tuple[list[dict[str, Any]], dict[str, Any]]: +) -> tuple[list[dict[str, Any]], dict[str, profile_helpers.ProfileMetadata]]: """For the given elk query, extracts resources and metadata. For metadata, we consider any key that has only single value through the profile, and is not linked to keywords `metric` or `benchmarking`. - For resources, we consider anything that is not identified as metadata + For resources, we consider anything that is not identified as metadata. + + :param elk_query: query from the elk in form of list of resource. - :param elk_query: query from the elk in form of list of resource - :return: list of resources and metadata + :return: list of resources and metadata. """ res_counter = defaultdict(set) for res in elk_query: @@ -466,7 +339,7 @@ def extract_from_elk( if not k.startswith("metric") and not k.startswith("benchmarking") and len(v) == 1 } - metadata = {k: res_counter[k].pop() for k in metadata_keys} + metadata = {k: profile_helpers.ProfileMetadata(k, res_counter[k].pop()) for k in metadata_keys} resources = [ { k: common_kit.try_convert(v, [int, float, str]) @@ -484,55 +357,81 @@ def extract_from_elk( return resources, metadata -@vcs_kit.lookup_minor_version -def import_elk_from_json( - imported: list[str], - minor_version: str, - **kwargs: Any, -) -> None: - """Imports the ELK stored data from the json data. +def get_machine_info(machine_info: str, import_dir: Path) -> dict[str, Any]: + """Returns machine info either from an input file or constructs it from the environment. - The loading expects the json files to be in form of `{'queries': []}`. + :param machine_info: relative or absolute path to machine specification JSON file. In case of + an empty string, the machine info will be constructed from the environment. + :param import_dir: import directory where to look for the machine info file if the provided + path is relative. - :param imported: list of filenames with elk data. - :param minor_version: minor version corresponding to the imported profiles - :param kwargs: rest of the parameters + :return: parsed or constructed machine specification. """ - minor_version_info = pcs.vcs().get_minor_version_info(minor_version) + if machine_info: + # Some machine info path has been provided. + info_path = _massage_import_path(machine_info, import_dir) + with streams.safely_open_and_log( + info_path, "r", fail_msg="not found, generating info from environment instead" + ) as info_handle: + if info_handle is not None: + json_data = json.load(info_handle) + log.minor_success(log.path_style(str(info_path)), "parsed") + return json_data + # No machine info file might have been provided, or an invalid path was specified. + # Construct the machine info from the current machine. + return environment.get_machine_specification() - resources: list[dict[str, Any]] = [] - metadata: dict[str, Any] = {} - for imported_file in imported: - with open(imported_file, "r") as imported_handle: - imported_json = json.load(imported_handle) - assert ( - "queries" in imported_json.keys() - ), "expected the JSON to contain list of dictionaries in 'queries' key" - r, m = extract_from_elk(imported_json["queries"]) - resources.extend(r) - metadata.update(m) - log.minor_success(log.path_style(str(imported_file)), "imported") - import_elk_profile(resources, metadata, minor_version_info, **kwargs) + +def extract_machine_info_from_elk_metadata(metadata: dict[str, Any]) -> dict[str, Any]: + """Extracts the parts of the profile that correspond to machine info. + + Note that not many is collected from the ELK formats, and it can vary greatly, + hence, most of the machine specification and environment should be in metadata instead. + + :param metadata: metadata extracted from the ELK profiles. + + :return: machine info extracted from the profiles. + """ + machine_info = { + "architecture": metadata.get("machine.arch", "?"), + "system": metadata.get("machine.os", "?").capitalize(), + "release": metadata.get("extra.machine.platform", "?"), + "host": metadata.get("machine.hostname", "?"), + "cpu": { + "physical": "?", + "total": metadata.get("machine.cpu-cores", "?"), + "frequency": "?", + }, + "memory": { + "total_ram": metadata.get("machine.ram", "?"), + "swap": "?", + }, + "boot_info": "?", + "mem_details": {}, + "cpu_details": [], + } + + return machine_info def _import_metadata( - import_dir: Path, metadata: tuple[str, ...] | None + metadata: tuple[str, ...] | None, import_dir: Path ) -> list[profile_helpers.ProfileMetadata]: - """Parse the metadata strings from CLI and convert them to our internal representation. + """Parse the metadata entries from CLI and convert them to our internal representation. - :param import_dir: the import directory to use for relative metadata file paths - :param metadata: a collection of metadata strings or files + :param import_dir: the import directory to use for relative metadata file paths. + :param metadata: a collection of metadata entries or JSON files. :return: a collection of parsed and converted metadata objects """ p_metadata: list[profile_helpers.ProfileMetadata] = [] if metadata is None: - return [] + return p_metadata # Normalize the metadata string for parsing and/or opening the file for metadata_str in map(str.strip, metadata): if metadata_str.lower().endswith(".json"): # Update the metadata collection with entries from the json file - p_metadata.extend(_parse_metadata_json(_massage_import_path(import_dir, metadata_str))) + p_metadata.extend(_parse_metadata_json(_massage_import_path(metadata_str, import_dir))) else: # Add a single metadata entry parsed from its string representation try: @@ -545,34 +444,193 @@ def _import_metadata( def _parse_metadata_json(metadata_path: Path) -> list[profile_helpers.ProfileMetadata]: """Parse a metadata JSON file into the metadata objects. - :param metadata_path: the path to the metadata JSON + If the JSON file contains nested dictionaries, the hierarchical keys will be flattened. + + :param metadata_path: the path to the metadata JSON. + + :return: a collection of parsed metadata objects. + """ + with streams.safely_open_and_log( + metadata_path, "r", fail_msg="not found, skipping" + ) as metadata_handle: + if metadata_handle is None: + return [] + # Make sure we flatten the input + metadata_list = [ + profile_helpers.ProfileMetadata(k, v) + for k, v in query.all_items_of(json.load(metadata_handle)) + ] + log.minor_success(log.path_style(str(metadata_path)), "parsed") + return metadata_list + + +def _parse_perf_import_entries( + import_entries: list[str], cli_stats_headers: str +) -> tuple[list[_PerfProfileSpec], list[profile_stats.ProfileStat]]: + """Parses perf import entries and stats. + + An import entry is either a profile entry + + 'profile_path[,[,]+]' + + where each stat value corresponds to a stats header specified in the cli_stats_headers, or + a CSV file entry + + 'file_path.csv' + + where the CSV file is in the format + + #Profile,Exit_code[,stat-header1]+ + profile_path[,[,]+] + ... + + that combines the --stats-headers option and profile entries. Stats specified in a CSV file + apply only to profile entries in the JSON file. Similarly, CLI-specified stats apply only to + profile entries specified directly in CLI. + + :param import_entries: the perf import entries to parse. + :param cli_stats_headers: the stats headers specified in CLI. + + :return: parsed profiles and stats. + """ + stats = [ + profile_stats.ProfileStat.from_string(*stat.split("|")) + for stat in cli_stats_headers.split(",") + ] + cli_stats_len = len(stats) + + import_dir = Path(config.lookup_key_recursively("import.dir", os.getcwd())) + profiles: list[_PerfProfileSpec] = [] + + for record in import_entries: + if record.strip().lower().endswith(".csv"): + # The input is a csv file + _parse_perf_import_csv(record, import_dir, profiles, stats) + elif ( + profile_spec := _parse_perf_entry(record.split(","), import_dir, stats[:cli_stats_len]) + ) is not None: + # The input is a string profile spec + profiles.append(profile_spec) + return profiles, stats + + +def _parse_perf_import_csv( + csv_file: str, + import_dir: Path, + profiles: list[_PerfProfileSpec], + stats: list[profile_stats.ProfileStat], +) -> None: + """Parse stats headers and perf import entries in a CSV file. + + :param csv_file: the CSV file to parse. + :param import_dir: the import directory to use for relative profile file paths. + :param profiles: profile specifications that will be extended with the parsed profiles. + :param stats: profile stats that will be merged with the CSV stats. + """ + csv_path = _massage_import_path(csv_file, import_dir) + with streams.safely_open_and_log(csv_path, "r", fatal_fail=True) as csvfile: + csv_reader = csv.reader(csvfile, delimiter=",") + try: + header: list[str] = next(csv_reader) + except StopIteration: + # Empty CSV file, skip + log.warn(f"Empty import file {csv_path}. Skipping.") + return + # Parse the stats headers + csv_stats: list[profile_stats.ProfileStat] = [ + profile_stats.ProfileStat.from_string(*stat_definition.split("|")) + for stat_definition in header[2:] + ] + # Parse the remaining rows that represent profile specifications and filter invalid ones + profiles.extend( + record + for row in csv_reader + if (record := _parse_perf_entry(row, import_dir, csv_stats)) is not None + ) + # Merge CSV stats with the other stats + for csv_stat in csv_stats: + _merge_stats(csv_stat, stats) + log.minor_success(log.path_style(str(csv_path)), "parsed") + + +def _parse_perf_entry( + entry: list[str], import_dir: Path, stats: list[profile_stats.ProfileStat] +) -> _PerfProfileSpec | None: + """Parse a single perf profile import entry. + + :param entry: the perf import entry to parse. + :param import_dir: the import directory to use for relative profile file paths. + :param stats: the profile stats associated with this profile. + + :return: the parsed profile, or None if the import entry is invalid. + """ + if len(entry) == 0: + # Empty profile specification, warn + log.warn("Empty import profile specification. Skipping.") + return None + # Parse the profile specification + profile_info = _PerfProfileSpec( + _massage_import_path(entry[0], import_dir), + int(entry[1].strip()) if len(entry) >= 2 else _PerfProfileSpec.exit_code, + ) + # Parse the stat values and add them to respective stats + for stat_value, stat_obj in zip(map(_massage_stat_value, entry[2:]), stats): + stat_obj.value.append(stat_value) + if len(entry[2:]) > len(stats): + log.warn( + f"Imported profile {profile_info.path} specifies more stats values than stats headers." + " Ignoring additional stats." + ) + if profile_info.exit_code != 0: + log.warn("Importing a profile with non-zero exit code.") + return profile_info + + +def _merge_stats( + new_stat: profile_stats.ProfileStat, into_stats: list[profile_stats.ProfileStat] +) -> None: + """Merge a new profile stat values into the current profile stats. + + If an existing stat with the same name exists, the values of both stats are merged. If no such + stat is found, the new stat is added to the collection of current stats. + + :param new_stat: the new profile stat to merge. + :param into_stats: the current collection of profile stats. + """ + for stat in into_stats: + if new_stat.name == stat.name: + # We found a stat with a matching name, merge + stat.merge_with(new_stat) + return + # There is no stat to merge with, extend the current collection of stats + into_stats.append(new_stat) + + +def _massage_stat_value(stat_value: str) -> str | float: + """Massages a stat value read from a string to check whether it is numerical or not. + + :param stat_value: the stat value to massage. - :return: a collection of parsed metadata objects + :return: a massaged stat value. """ + stat_value = stat_value.strip() try: - with open(metadata_path, "r") as metadata_handle: - log.minor_success(log.path_style(str(metadata_path)), "found") - metadata_dict: dict[str, Any] = json.load(metadata_handle) - # Make sure we flatten the input - return [ - profile_helpers.ProfileMetadata(k, v) for k, v in query.all_items_of(metadata_dict) - ] - except OSError: - log.minor_fail(log.path_style(str(metadata_path)), "not found, skipping") - return [] + return float(stat_value) + except ValueError: + return stat_value -def _massage_import_path(import_dir: Path, path_str: str) -> Path: - """Massages path strings into unified path format. +def _massage_import_path(path_str: str, import_dir: Path) -> Path: + """Massages path strings into a unified path format. First, the path string is stripped of leading and trailing whitespaces. Next, absolute paths are kept as is, while relative paths are prepended with the provided import directory. - :param import_dir: the import directory to use for relative paths - :param path_str: the path string to massage + :param import_dir: the import directory to use for relative paths. + :param path_str: the path string to massage. - :return: massaged path + :return: the massaged path. """ path: Path = Path(path_str.strip()) if path.is_absolute(): diff --git a/perun/profile/stats.py b/perun/profile/stats.py index 1417ceeb..7699c989 100644 --- a/perun/profile/stats.py +++ b/perun/profile/stats.py @@ -60,6 +60,28 @@ def default() -> ProfileStatComparison: """ return ProfileStatComparison.AUTO + @classmethod + def str_to_comparison(cls, comparison: str) -> ProfileStatComparison: + """Convert a comparison type string into a ProfileStatComparison enum value. + + If an invalid comparison type is provided, the default type will be used. + + :param comparison: The comparison type as a string. + + :return: The comparison type as an enum value. + """ + if not comparison: + return cls.default() + try: + return cls(comparison.strip()) + except ValueError: + # Invalid stat comparison, warn + perun_log.warn( + f"Unknown stat comparison: {comparison}. Using the default stat comparison value " + f"instead. Please choose one of ({', '.join(cls.supported())})." + ) + return cls.default() + class StatComparisonResult(enum.Enum): """The result of stat representative value comparison. @@ -84,7 +106,7 @@ class ProfileStat: :ivar unit: The unit of the stat value(s). :ivar aggregate_by: The aggregation (representative value) key. :ivar description: A detailed description of the stat. - :ivar value: The value of the stat. + :ivar value: The value(s) of the stat. """ name: str @@ -92,7 +114,7 @@ class ProfileStat: unit: str = "#" aggregate_by: str = "" description: str = "" - value: object = "" + value: list[str | float] = dataclasses.field(default_factory=list) @classmethod def from_string( @@ -120,7 +142,7 @@ def from_string( if name == "[empty]": # Invalid stat specification, warn perun_log.warn("Empty profile stat specification. Creating a dummy '[empty]' stat.") - comparison_enum = cls._convert_comparison(cmp) + comparison_enum = ProfileStatComparison.str_to_comparison(cmp) return cls(name, comparison_enum, unit, aggregate_by, description) @classmethod @@ -131,30 +153,32 @@ def from_profile(cls, stat: dict[str, Any]) -> ProfileStat: :return: A constructed ProfileStat object. """ - stat["cmp"] = cls._convert_comparison(stat.get("cmp", "")) + stat["cmp"] = ProfileStatComparison.str_to_comparison(stat.get("cmp", "")) return cls(**stat) - @staticmethod - def _convert_comparison(comparison: str) -> ProfileStatComparison: - """Convert a comparison type string into a ProfileStatComparison enum value. + def merge_with(self, other: ProfileStat) -> ProfileStat: + """Merges value(s) from another ProfileStat object to this one. - If an invalid comparison type is provided, the default type will be used. + In case of mismatching headers, this ProfileStat header is used over the other one. - :param comparison: The comparison type as a string. + :param other: The other ProfileStat object to merge with. - :return: The comparison type as an enum value. + :return: This ProfileStat object with merged values. """ - if not comparison: - return ProfileStatComparison.default() - try: - return ProfileStatComparison(comparison.strip()) - except ValueError: - # Invalid stat comparison, warn + if self.get_header() != other.get_header(): perun_log.warn( - f"Unknown stat comparison: {comparison}. Using the default stat comparison value " - f"instead. Please choose one of ({', '.join(ProfileStatComparison.supported())})." + f"Merged ProfileStats '{self.name}' have mismatching headers, using the current " + f"header {self.get_header()}" ) - return ProfileStatComparison.default() + self.value += other.value + return self + + def get_header(self) -> tuple[str, str, str, str, str]: + """Obtains the ProfileStat header, i.e., all attributes except the values. + + :return: the ProfileStat header. + """ + return self.name, self.cmp, self.unit, self.aggregate_by, self.description class ProfileStatAggregation(Protocol): @@ -386,17 +410,17 @@ def infer_auto_comparison(self, comparison: ProfileStatComparison) -> ProfileSta def as_table( self, key: str = _DEFAULT_KEY ) -> tuple[int | str | tuple[str, int], dict[str, int] | dict[str, str]]: - header: str | int | tuple[str, int] + representative_val: str | int | tuple[str, int] if key in ("counts", "sequence"): # The Counter and list objects are not suitable for direct printing in a table. - header = f"[{key}]" + representative_val = f"[{key}]" else: # A little type hinting help. The list and Counter types have already been covered. - header = cast(Union[str, int, tuple[str, int]], self.get_value(key)) + representative_val = cast(Union[str, int, tuple[str, int]], self.get_value(key)) if key == "sequence": # The 'sequence' key table format is a bit different from the rest. - return header, {f"{idx}.": value for idx, value in enumerate(self.sequence)} - return header, self.counts + return representative_val, {f"{idx}.": value for idx, value in enumerate(self.sequence)} + return representative_val, self.counts def aggregate_stats(stat: ProfileStat) -> ProfileStatAggregation: @@ -406,26 +430,17 @@ def aggregate_stats(stat: ProfileStat) -> ProfileStatAggregation: :return: The constructed aggregation object. """ - if isinstance(stat.value, (str, float, int)): - return SingleValue(stat.value) - if isinstance(stat.value, Iterable): - # Iterable types are converted to a list - values = list(stat.value) - if len(values) == 0: - perun_log.warn(f"ProfileStat aggregation: Missing value of stat '{stat.name}'") - return SingleValue() - elif len(values) == 1: - return SingleValue(values[0]) - elif all(isinstance(value, (int, float)) for value in values): - # All values are integers or floats - return StatisticalSummary.from_values(map(float, values)) - else: - # Even heterogeneous lists will be aggregated as lists of strings - return StringCollection(list(map(str, values))) - perun_log.warn( - f"ProfileStat aggregation: Unknown type '{type(stat.value)}' of stat '{stat.name}'" - ) - return SingleValue() + if len(stat.value) == 0: + perun_log.warn(f"ProfileStat aggregation: Missing value of stat '{stat.name}'") + return SingleValue() + elif len(stat.value) == 1: + return SingleValue(stat.value[0]) + elif all(isinstance(value, (int, float)) for value in stat.value): + # All values are integers or floats + return StatisticalSummary.from_values(map(float, stat.value)) + else: + # Even heterogeneous lists will be aggregated as lists of strings + return StringCollection(list(map(str, stat.value))) def compare_stats( diff --git a/perun/utils/streams.py b/perun/utils/streams.py index 0b2411bd..50b48e7c 100644 --- a/perun/utils/streams.py +++ b/perun/utils/streams.py @@ -7,11 +7,16 @@ from __future__ import annotations # Standard Imports -from typing import TextIO, Any +import contextlib import io import json import os +from pathlib import Path import re +from typing import Any, BinaryIO, Iterator, IO, Literal, TextIO, TYPE_CHECKING, overload + +if TYPE_CHECKING: + from _typeshed import OpenBinaryMode, OpenTextMode # Third-Party Imports from ruamel.yaml import YAML @@ -97,3 +102,92 @@ def safely_load_file(filename: str) -> list[str]: except UnicodeDecodeError as ude: log.warn(f"Could not decode '{filename}': {ude}") return [] + + +@overload +@contextlib.contextmanager +def safely_open_and_log( + file_path: Path, + mode: OpenTextMode, + fatal_fail: Literal[False] = ..., + success_msg: str = ..., + fail_msg: str = ..., + **open_args: Any, +) -> Iterator[TextIO | None]: ... + + +@overload +@contextlib.contextmanager +def safely_open_and_log( + file_path: Path, + mode: OpenBinaryMode, + fatal_fail: Literal[False] = ..., + success_msg: str = ..., + fail_msg: str = ..., + **open_args: Any, +) -> Iterator[BinaryIO | None]: ... + + +@overload +@contextlib.contextmanager +def safely_open_and_log( + file_path: Path, + mode: OpenTextMode, + *, + fatal_fail: Literal[True], + success_msg: str = ..., + fail_msg: str = ..., + **open_args: Any, +) -> Iterator[TextIO]: ... + + +@overload +@contextlib.contextmanager +def safely_open_and_log( + file_path: Path, + mode: OpenBinaryMode, + *, + fatal_fail: Literal[True], + success_msg: str = ..., + fail_msg: str = ..., + **open_args: Any, +) -> Iterator[BinaryIO]: ... + + +@contextlib.contextmanager +def safely_open_and_log( + file_path: Path, + mode: str, + fatal_fail: bool = False, + success_msg: str = "found", + fail_msg: str = "not found", + **open_args: Any, +) -> Iterator[IO[Any] | None]: + """Attempt to safely open a file and log a success or failure message. + + If fatal_fail is specified as True, the function will either return a valid file handle or + terminate the program; a None value will never be returned if fatal_fail is True. + + When providing a fatal_fail parameter value, it needs to be written with a keyword, e.g., + # safely_open_and_log(path, mode, fatal_fail=True) to conform to the expected call signature + # given how mypy currently handles overloads for parameters with default values. + # See this mypy issue for more details: https://github.com/python/mypy/issues/7333 + + :param file_path: path to the file to open. + :param mode: file opening mode. + :param fatal_fail: specifies whether failing to open a file should terminate the program. + :param success_msg: a log message when the file has been successfully opened. + :param fail_msg: a log message when the file could not be opened. + :param open_args: additional arguments to pass to the open function. + + :return: a file handle or None, depending on the success of opening the file. + """ + try: + with open(file_path, mode, **open_args) as f_handle: + log.minor_success(log.path_style(str(file_path)), success_msg) + yield f_handle + except OSError as exc: + log.minor_fail(log.path_style(str(file_path)), fail_msg) + if fatal_fail: + log.error(str(exc), exc) + yield None diff --git a/perun/view_diff/flamegraph/run.py b/perun/view_diff/flamegraph/run.py index 333b9067..e38ac7ae 100755 --- a/perun/view_diff/flamegraph/run.py +++ b/perun/view_diff/flamegraph/run.py @@ -209,7 +209,7 @@ def process_maxima( f"Overall {key}", profile_stats.ProfileStatComparison.LOWER, description=f"The overall value of the {key} for the root value", - value=counts[key], + value=[counts[key]], ) ) stats.append( @@ -217,7 +217,7 @@ def process_maxima( "Maximum Trace Length", profile_stats.ProfileStatComparison.LOWER, description="Maximum length of the trace in the profile", - value=max_trace, + value=[max_trace], ) ) return max_trace diff --git a/perun/view_diff/report/run.py b/perun/view_diff/report/run.py index 363e0170..56e55b17 100755 --- a/perun/view_diff/report/run.py +++ b/perun/view_diff/report/run.py @@ -536,7 +536,7 @@ def process_traces( profile_stats.ProfileStatComparison.LOWER, unit, description=f"The overall value of the {name} for the root value", - value=max_samples[key], + value=[max_samples[key]], ) ) Config().profile_stats[profile_type].append( @@ -545,7 +545,7 @@ def process_traces( profile_stats.ProfileStatComparison.LOWER, "#", description="Maximum length of the trace in the profile", - value=max_trace, + value=[max_trace], ) ) Config().max_seen_trace = max(max_trace, Config().max_seen_trace) From 226b699bddb04b32bba954bb367cc4796c8b172d Mon Sep 17 00:00:00 2001 From: Jiri Pavela Date: Sun, 22 Sep 2024 17:01:08 +0200 Subject: [PATCH 11/14] Make diff view info tables collapse both LHS and RHS Profile specification, stats and metadata now collapse / show both LHS and RHS at the same time. --- .../diff_view_datatables.html.jinja2 | 7 +++---- .../diff_view_flamegraph.html.jinja2 | 21 ++++++++----------- perun/templates/diff_view_report.html.jinja2 | 21 ++++++++----------- perun/templates/diff_view_sankey.html.jinja2 | 8 +++---- .../macros_profile_overview.html.jinja2 | 11 +++++++--- 5 files changed, 33 insertions(+), 35 deletions(-) diff --git a/perun/templates/diff_view_datatables.html.jinja2 b/perun/templates/diff_view_datatables.html.jinja2 index efec46c5..f330ef25 100755 --- a/perun/templates/diff_view_datatables.html.jinja2 +++ b/perun/templates/diff_view_datatables.html.jinja2 @@ -85,14 +85,14 @@

{{ lhs_tag }}

- {{ profile_overview.overview_table('toggleLeftCollapse', 'left-info', lhs_header, rhs_header, "Profile Specification") }} + {{ profile_overview.overview_table('toggleSpecificationCollapse', 'left-specification-info', lhs_header, rhs_header, "Profile Specification") }}
 

{{ rhs_tag }}

- {{ profile_overview.overview_table('toggleRightCollapse', 'right-info', rhs_header, lhs_header, "Profile Specification") }} + {{ profile_overview.overview_table('toggleSpecificationCollapse', 'right-specification-info', rhs_header, lhs_header, "Profile Specification") }}
 
@@ -191,8 +191,7 @@ {% include 'jquery-3.6.0.min.js' %} {% include 'dataTables.min.js' %} {%- endif %} - {{ profile_overview.toggle_script('toggleLeftCollapse', 'left-info') }} - {{ profile_overview.toggle_script('toggleRightCollapse', 'right-info') }} + {{ profile_overview.toggle_script('toggleSpecificationCollapse', 'left-specification-info', 'right-specification-info') }} $(document).ready( function () { var lhs = $("#table1").DataTable({ data: lhs_data.data, diff --git a/perun/templates/diff_view_flamegraph.html.jinja2 b/perun/templates/diff_view_flamegraph.html.jinja2 index 94340f6f..351c0408 100755 --- a/perun/templates/diff_view_flamegraph.html.jinja2 +++ b/perun/templates/diff_view_flamegraph.html.jinja2 @@ -99,24 +99,24 @@

{{ lhs_tag }}

- {{ profile_overview.overview_table('toggleLeftCollapse', 'left-info', lhs_header, rhs_header, "Profile Specification") }} + {{ profile_overview.overview_table('toggleSpecificationCollapse', 'left-specification-info', lhs_header, rhs_header, "Profile Specification") }}
 
- {{ profile_overview.overview_table('toggleLeftProfileCollapse', 'left-profile-info', lhs_stats, rhs_stats, "Profile Stats") }} + {{ profile_overview.overview_table('toggleStatsCollapse', 'left-stats-info', lhs_stats, rhs_stats, "Profile Stats") }}
 
{%- if rhs_metadata%} - {{ profile_overview.overview_table('toggleLeftMetadataCollapse', 'left-metadata-info', lhs_metadata, rhs_metadata, "Profile Metadata") }} + {{ profile_overview.overview_table('toggleMetadataCollapse', 'left-metadata-info', lhs_metadata, rhs_metadata, "Profile Metadata") }}
 
{%- endif %}

{{ rhs_tag }}

- {{ profile_overview.overview_table('toggleRightCollapse', 'right-info', rhs_header, lhs_header, "Profile Specification") }} + {{ profile_overview.overview_table('toggleSpecificationCollapse', 'right-specification-info', rhs_header, lhs_header, "Profile Specification") }}
 
- {{ profile_overview.overview_table('toggleRightProfileCollapse', 'right-profile-info', rhs_stats, lhs_stats, "Profile Stats") }} + {{ profile_overview.overview_table('toggleStatsCollapse', 'right-stats-info', rhs_stats, lhs_stats, "Profile Stats") }}
 
{%- if rhs_metadata%} - {{ profile_overview.overview_table('toggleRightMetadataCollapse', 'right-metadata-info', rhs_metadata, lhs_metadata, "Profile Metadata") }} + {{ profile_overview.overview_table('toggleMetadataCollapse', 'right-metadata-info', rhs_metadata, lhs_metadata, "Profile Metadata") }}
 
{%- endif %}
@@ -173,12 +173,9 @@