Skip to content


Fix prefix finding bug
Browse files Browse the repository at this point in the history
  • Loading branch information
ayakayorihiro committed Nov 18, 2024
1 parent 43ce140 commit df7d24f
Showing 1 changed file with 67 additions and 94 deletions.
161 changes: 67 additions & 94 deletions tools/profiler/
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@

INVISIBLE = "gray"

def remove_size_from_name(name: str) -> str:
""" changes e.g. "state[2:0]" to "state" """
return name.split('[')[0]

class ProfilingInfo:
def __init__(self, name, callsite=None, component=None, is_cell=False): = name
Expand All @@ -33,27 +33,11 @@ def flame_repr(self):

def __repr__ (self):
if self.is_cell:
header = f"[Cell] {}" # FIXME: fix this later
header = f"[Cell][{self.callsite}] {}" # FIXME: fix this later
header = f"[{self.component}][{self.callsite}] {}"
return header

def nice_repr (self):
segments_str = ""
for segment in self.closed_segments:
if (segments_str != ""):
segments_str += ", "
segments_str += f"[{segment['start']}, {segment['end']})"
if self.is_cell:
header = f"[Cell] {}\n" # FIXME: fix this later
header = f"[{self.component}][{self.callsite}] {}\n"
return (header +
f"\tTotal cycles: {self.total_cycles}\n" +
f"\t# of times active: {len(self.closed_segments)}\n" +
f"\tSegments: {segments_str}\n"

def start_new_segment(self, curr_clock_cycle):
if self.current_segment is None:
self.current_segment = {"start": curr_clock_cycle, "end": -1, "callsite": self.callsite} # NOTE: see if this backfires
Expand Down Expand Up @@ -112,7 +96,6 @@ def enddefinitions(self, vcd, signals, cur_sig_vals):

# get go and done for cells (the signals are exactly {cell}.go and {cell}.done)
for cell in self.cells:
# FIXME: check if anything here is different when we go over multicomponent programs
cell_go = cell + ".go"
cell_done = cell + ".done"
if cell_go not in vcd.references_to_ids:
Expand Down Expand Up @@ -198,7 +181,7 @@ def postprocess(self):
started = started or [x for x in events if x["signal"] == f"{self.main_component}.go" and x["value"] == 1]
if not started: # only start counting when main component is on.
# checking whether the timestamp has a rising edge (hacky)
# checking whether the timestamp has a rising edge
if {"signal": clock_name, "value": 1} in events:
clock_cycles += 1
for event in events:
Expand Down Expand Up @@ -227,42 +210,37 @@ def postprocess(self):
child_group_component = encoded_info_split[2]
group_id = child_group_name + DELIMITER + child_group_component
se_currently_active.add((group_id, parent))
elif "se_probe_out" in signal_name and value == 0:
encoded_info_split = signal_name.split("_se_probe_out")[0].split("__")
child_group_name = encoded_info_split[0]
parent = encoded_info_split[1]
child_group_component = encoded_info_split[2]
group_id = child_group_name + DELIMITER + child_group_component
se_currently_active.remove((group_id, parent))
elif "cell_probe_out" in signal_name and value == 1:
encoded_info_split = signal_name.split("_cell_probe_out")[0].split("__")
cell_name = encoded_info_split[0]
parent = encoded_info_split[1]
parent_component = encoded_info_split[2]
caller_id = parent + DELIMITER + parent_component
# cell_id = cell_name + DELIMITER + parent_component
# self.cell_invoke_probe_info[cell_id][parent].start_new_segment(clock_cycles)
ci_currently_active.add((caller_id, cell_name))
elif "cell_probe_out" in signal_name and value == 0:
encoded_info_split = signal_name.split("_cell_probe_out")[0].split("__")
cell_name = encoded_info_split[0]
parent = encoded_info_split[1]
parent_component = encoded_info_split[2]
caller_id = parent + DELIMITER + parent_component
# cell_id = cell_name + DELIMITER + parent_component
# self.cell_invoke_probe_info[cell_id][parent].end_current_segment(clock_cycles)
ci_currently_active.remove((caller_id, cell_name))
for active in currently_active: # end any group/cell activitations that are still around...
# FIXME: pretty sure the next two blocks fail because both stack infos are nested dictionaries lmao
for active in se_currently_active: # end any structural enables that are still around...
for active in ci_currently_active:
for (group_id, parent) in se_currently_active: # end any structural enables that are still around...
for (caller_id, cell_name) in ci_currently_active:

self.clock_cycles = clock_cycles

Expand All @@ -282,7 +260,7 @@ def build_components_to_cells(prefix, curr_component, cells_to_components, compo
def read_component_cell_names_json(json_file):
cell_json = json.load(open(json_file))
# For each component, contains a map from each cell name to its corresponding component
# component name --> { cell name --> component name}
# component name --> { cell name --> component name }
cells_to_components = {}
main_component = ""
for curr_component_entry in cell_json:
Expand All @@ -295,7 +273,6 @@ def read_component_cell_names_json(json_file):
full_main_component = f"TOP.toplevel.{main_component}"
components_to_cells = {main_component : [full_main_component]} # come up with a better name for this
build_components_to_cells(full_main_component, main_component, cells_to_components, components_to_cells)
# FIXME: extreme hack. Find a better way to do this...
full_cell_names_to_components = {}
for component in components_to_cells:
for cell in components_to_cells[component]:
Expand All @@ -315,16 +292,16 @@ def create_traces(active_element_probes_info, call_stack_probes_info, cell_calle
new_timeline_map = {i : [] for i in range(total_cycles)}
# now, we need to figure out the sets of traces
for i in timeline_map:
parents = set()
parents = set() # keeping track of entities that are parents of other entities
i_mapping = {} # each unique group inv mapping to its stack. the "group" should be the last item on each stack
i_mapping[main_component] = ["main"] # [main_component]
i_mapping[main_component] = [main_component.split(".")[-1]]

cell_worklist = [main_component] # FIXME: maybe remove the hardcoding?
while len(cell_worklist) > 0:
current_cell = cell_worklist.pop()
current_component = cells_to_components[current_cell]
covered_units_in_component = set() # collect all of the units we've covered.
# this is so silly... but catch all active units that are groups in this component.
# catch all active units that are groups in this component.
units_to_cover = set(filter(lambda unit: not unit.is_cell and unit.component == current_component, timeline_map[i]))
# find all enables from control. these are all units that either (1) don't have any maps in call_stack_probes_info, or (2) have no active parent calls in call_stack_probes_info
for active_unit in units_to_cover:
Expand Down Expand Up @@ -372,6 +349,7 @@ def create_traces(active_element_probes_info, call_stack_probes_info, cell_calle
i_mapping[] = i_mapping[f"{current_cell}.{cell_invoker}"] + [f"{cell_active_probe.shortname} [{cell_component}]"]

# Only retain paths that lead to leaf nodes.
for elem in i_mapping:
if elem not in parents:
Expand All @@ -383,42 +361,48 @@ def create_traces(active_element_probes_info, call_stack_probes_info, cell_calle

return new_timeline_map

Creates a tree that encapsulates all stacks that occur within the program.
def create_tree(timeline_map):
# ugliest implementation of a tree
node_id_acc = 0
tree_dict = {} # node id --> node name
path_dict = {} # stack list string --> list of node ids
path_prefixes_dict = {} # stack list string --> list of node ids
stack_list = []
for sl in timeline_map.values():
for s in sl:
if s not in stack_list:
for stack in stack_list:
stack_string = ";".join(stack)
if stack_string not in path_dict:
id_path_list = []
prefix = ""
# check if we have any prefixes. start from the longest
for other_stack_string in sorted(path_dict, key=len, reverse=True):
if other_stack_string in stack_string:
# prefix found!
prefix = other_stack_string
id_path_list = list(path_dict[other_stack_string])
# create nodes
if prefix != "":
new_nodes = stack_string.split(f"{prefix};")[1].split(";")
stack_len = len(stack)
id_path_list = []
prefix = ""
for i in range(1, stack_len+1):
# start from reverse
attempted_prefix = ";".join(stack[0:stack_len-i])
if attempted_prefix in path_prefixes_dict:
prefix = attempted_prefix
id_path_list = list(path_prefixes_dict[prefix])
# create nodes
if prefix != "":
new_nodes = stack[i:]
new_prefix = prefix
new_nodes = stack
new_prefix = ""
for elem in new_nodes:
if new_prefix == "":
new_prefix = elem
new_nodes = stack
for elem in new_nodes:
tree_dict[node_id_acc] = elem
node_id_acc += 1
path_dict[stack_string] = id_path_list

new_prefix += f";{elem}"
tree_dict[node_id_acc] = elem
path_prefixes_dict[new_prefix] = list(id_path_list)
node_id_acc += 1
path_dict[new_prefix] = id_path_list

return tree_dict, path_dict

Expand All @@ -437,11 +421,28 @@ def create_path_dot_str_dict(path_dict):

def create_output(timeline_map, out_dir):


# make flame graph folded file
stacks = {} # stack to number of cycles
for i in timeline_map:
for stack_list in timeline_map[i]:
stack_id = ";".join(stack_list)
if stack_id not in stacks:
stacks[stack_id] = 1
stacks[stack_id] += 1

with open(os.path.join(out_dir, "flame.folded"), "w") as flame_out:
for stack in stacks:
flame_out.write(f"{stack} {stacks[stack]}\n")

if len(timeline_map) > TREE_PICTURE_LIMIT:
print(f"Simulation exceeds {TREE_PICTURE_LIMIT} cycles, skipping trees...")
tree_dict, path_dict = create_tree(timeline_map)
path_to_dot_str = create_path_dot_str_dict(path_dict)
all_paths_ordered = sorted(path_dict.keys())

for i in timeline_map:
used_paths = set()
used_nodes = set()
Expand All @@ -454,16 +455,15 @@ def create_output(timeline_map, out_dir):

fpath = os.path.join(out_dir, f"cycle{i}.dot")
# really lazy rn but I should actually use a library for this
with open(fpath, "w") as f:
f.write("digraph cycle" + str(i) + " {\n")
# declare nodes.
# used nodes should simply be declared
for used_node in used_nodes:
f.write(f'\t{used_node} [label={tree_dict[used_node]}];\n')
f.write(f'\t{used_node} [label="{tree_dict[used_node]}"];\n')
# unused nodes should be declared with gray
for unused_node in all_nodes.difference(used_nodes):
f.write(f'\t{unused_node} [label={tree_dict[unused_node]},color="{INVISIBLE}",fontcolor="{INVISIBLE}"];\n')
f.write(f'\t{unused_node} [label="{tree_dict[unused_node]}",color="{INVISIBLE}",fontcolor="{INVISIBLE}"];\n')
# write all paths.
for path_id in all_paths_ordered:
if ";" not in path_id or path_id in used_paths:
Expand All @@ -472,41 +472,14 @@ def create_output(timeline_map, out_dir):
f.write(f'\t{path_to_dot_str[path_id]} [color="{INVISIBLE}"];\n')

# make flame graph folded file
stacks = {} # stack to number of cycles
for i in timeline_map:
for stack_list in timeline_map[i]:
# stack_str = ";".join(map(lambda x : x.flame_repr(), stack_list))
stack_id = ";".join(stack_list)
if stack_id not in stacks:
stacks[stack_id] = 1
stacks[stack_id] += 1

with open(os.path.join(out_dir, "flame.folded"), "w") as flame_out:
for stack in stacks:
flame_out.write(f"{stack} {stacks[stack]}\n")

def main(vcd_filename, cells_json_file, out_dir):
# FIXME: will support multicomponent programs later. There's maybe something wrong here.
main_component, cells_to_components = read_component_cell_names_json(cells_json_file)
converter = VCDConverter(main_component, cells_to_components)
vcdvcd.VCDVCD(vcd_filename, callbacks=converter)

print("Active groups info: " + str(converter.active_elements_info.keys()))
print("Call stack info: " + str(converter.call_stack_probe_info))
print("Cell stack info: " + str(converter.cell_invoke_caller_probe_info))

# NOTE: for a more robust implementation, we can even skip the part where we store active
# cycles per group.
new_timeline_map = create_traces(converter.active_elements_info, converter.call_stack_probe_info, converter.cell_invoke_caller_probe_info, converter.clock_cycles, cells_to_components, main_component)


create_output(new_timeline_map, out_dir)

Expand Down

0 comments on commit df7d24f

Please sign in to comment.