Skip to content

Commit

Permalink
Merge pull request #185 from Perfexionists/extending-traces
Browse files Browse the repository at this point in the history
Extending the trace toolkit
  • Loading branch information
tfiedor authored Mar 21, 2024
2 parents 41c2685 + 0985eb6 commit f4dd6bb
Show file tree
Hide file tree
Showing 4 changed files with 536 additions and 88 deletions.
82 changes: 0 additions & 82 deletions perun/utils/common/common_kit.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

# Standard Imports
from typing import Optional, Any, Iterable, Callable, Literal, TYPE_CHECKING
import functools
import importlib
import itertools
import operator
Expand Down Expand Up @@ -439,84 +438,3 @@ def get_module(module_name: str) -> types.ModuleType:


MODULE_CACHE: dict[str, types.ModuleType] = {}


@functools.cache
def split_to_words(identifier: str) -> set[str]:
"""Splits identifier of function into list of words
For simplicity, we assume, that identifier is in snake case, so camel case will not be split
:param identifier: identifier of function or other primitive, that consists of words
:return: list of words in identifier
"""
return set(identifier.split("_"))


def switch_cost(lhs_identifier: str, rhs_identifier: str) -> float:
"""Computes cost of switching lhs_identifier with rhs_identifier
The cost is computed as 1 - 2 * number of common words / (number of words in LHS + number of words in RHS)
:param lhs_identifier: left hand side identifier (function)
:param rhs_identifier: right hand side identifier (function)
:return: float cost of switching lhs with rhs
"""
lhs_words = split_to_words(lhs_identifier)
rhs_words = split_to_words(rhs_identifier)
return 1 - (2 * len(lhs_words.intersection(rhs_words)) / (len(lhs_words) + len(rhs_words)))


DISTANCE_CACHE: dict[str, float] = {}


def compute_distance(
lhs_trace: list[dict[str, Any]],
rhs_trace: list[dict[str, Any]],
trace_key: str = "func",
) -> float:
"""Computes the distance between two traces
The distance is computed as least number of applications of following operations:
1. Match (cost = 0): matching parts of the traces, i.e. the same functions;
2. Insert/Delete (cost = 1): adding or deleting part of the trace, so the traces match
3. Substituion (cost = variable): switching part of the trace with another
This is based on [ISCSME'21] paper called:
Performance debugging in the large via mining millions of stack traces
We assume, that the inputs are in form of list which contains the dictionaries
with key "func" that corresponds to the name of the ids. One can change it using
the parameter "trace_key".
:param lhs_trace: lhs trace of function names
:param rhs_trace: rhs trace of function names
:param trace_key: key that is used for retrieving the trace names
:return: distance between two traces
"""
key = f"{','.join(l[trace_key] for l in lhs_trace)};{','.join(r[trace_key] for r in rhs_trace)}"

if key not in DISTANCE_CACHE.keys():
# We need to insert everything from RHS, hence full cost of what is in RHS
if len(lhs_trace) == 0:
cost = float(len(rhs_trace))
# We need to insert everything from LHS, hence full cost of what is in LHS
elif len(rhs_trace) == 0:
cost = float(len(lhs_trace))
# 1. First parts are matched in the trace, so the cost is the cost of matching the rest of the trace
elif lhs_trace[0][trace_key] == rhs_trace[0][trace_key]:
cost = compute_distance(lhs_trace[1:], rhs_trace[1:], trace_key)
# Else, we have to either try to insert/delete or switch functions
else:
# 2. We try Insertion/Deletion of the current functions, and add the cost of inserting/deleting
cost_delete_lhs = compute_distance(lhs_trace[1:], rhs_trace, trace_key) + 1
cost_delete_rhs = compute_distance(lhs_trace, rhs_trace[1:], trace_key) + 1
# 3. We try Switch of the current two functions add the switch cost and compute the rest of the distance
cost_switch = compute_distance(lhs_trace[1:], rhs_trace[1:], trace_key) + switch_cost(
lhs_trace[0][trace_key], rhs_trace[0][trace_key]
)
# We take the minimum of the computed costs
cost = min(cost_delete_lhs, cost_delete_rhs, cost_switch)
DISTANCE_CACHE[key] = cost
return DISTANCE_CACHE[key]
1 change: 1 addition & 0 deletions perun/utils/common/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ perun_common_files = files(
'common_kit.py',
'diff_kit.py',
'script_kit.py',
'traces_kit.py',
'view_kit.py',
)

Expand Down
Loading

0 comments on commit f4dd6bb

Please sign in to comment.