From df7d24f2eb0f2921f887ece7c8fb0c84184ebd36 Mon Sep 17 00:00:00 2001 From: Ayaka Yorihiro Date: Mon, 18 Nov 2024 15:19:34 -0500 Subject: [PATCH] Fix prefix finding bug --- tools/profiler/new-parse-vcd.py | 161 +++++++++++++------------------- 1 file changed, 67 insertions(+), 94 deletions(-) diff --git a/tools/profiler/new-parse-vcd.py b/tools/profiler/new-parse-vcd.py index acc29d055..77396a3c3 100644 --- a/tools/profiler/new-parse-vcd.py +++ b/tools/profiler/new-parse-vcd.py @@ -8,12 +8,12 @@ DELIMITER = "__" INVISIBLE = "gray" +TREE_PICTURE_LIMIT=100 def remove_size_from_name(name: str) -> str: """ changes e.g. "state[2:0]" to "state" """ return name.split('[')[0] - class ProfilingInfo: def __init__(self, name, callsite=None, component=None, is_cell=False): self.name = name @@ -33,27 +33,11 @@ def flame_repr(self): def __repr__ (self): if self.is_cell: - header = f"[Cell] {self.name}" # FIXME: fix this later + header = f"[Cell][{self.callsite}] {self.name}" # FIXME: fix this later else: header = f"[{self.component}][{self.callsite}] {self.name}" return header - def nice_repr (self): - segments_str = "" - for segment in self.closed_segments: - if (segments_str != ""): - segments_str += ", " - segments_str += f"[{segment['start']}, {segment['end']})" - if self.is_cell: - header = f"[Cell] {self.name}\n" # FIXME: fix this later - else: - header = f"[{self.component}][{self.callsite}] {self.name}\n" - return (header + - f"\tTotal cycles: {self.total_cycles}\n" + - f"\t# of times active: {len(self.closed_segments)}\n" + - f"\tSegments: {segments_str}\n" - ) - def start_new_segment(self, curr_clock_cycle): if self.current_segment is None: self.current_segment = {"start": curr_clock_cycle, "end": -1, "callsite": self.callsite} # NOTE: see if this backfires @@ -112,7 +96,6 @@ def enddefinitions(self, vcd, signals, cur_sig_vals): # get go and done for cells (the signals are exactly {cell}.go and {cell}.done) for cell in self.cells: - # FIXME: check if anything here is different when we go over multicomponent programs cell_go = cell + ".go" cell_done = cell + ".done" if cell_go not in vcd.references_to_ids: @@ -198,7 +181,7 @@ def postprocess(self): started = started or [x for x in events if x["signal"] == f"{self.main_component}.go" and x["value"] == 1] if not started: # only start counting when main component is on. continue - # checking whether the timestamp has a rising edge (hacky) + # checking whether the timestamp has a rising edge if {"signal": clock_name, "value": 1} in events: clock_cycles += 1 for event in events: @@ -227,7 +210,7 @@ def postprocess(self): child_group_component = encoded_info_split[2] group_id = child_group_name + DELIMITER + child_group_component self.call_stack_probe_info[group_id][parent].start_new_segment(clock_cycles) - se_currently_active.add(group_id) + se_currently_active.add((group_id, parent)) elif "se_probe_out" in signal_name and value == 0: encoded_info_split = signal_name.split("_se_probe_out")[0].split("__") child_group_name = encoded_info_split[0] @@ -235,7 +218,7 @@ def postprocess(self): child_group_component = encoded_info_split[2] group_id = child_group_name + DELIMITER + child_group_component self.call_stack_probe_info[group_id][parent].end_current_segment(clock_cycles) - se_currently_active.remove(group_id) + se_currently_active.remove((group_id, parent)) elif "cell_probe_out" in signal_name and value == 1: encoded_info_split = signal_name.split("_cell_probe_out")[0].split("__") cell_name = encoded_info_split[0] @@ -243,9 +226,7 @@ def postprocess(self): parent_component = encoded_info_split[2] caller_id = parent + DELIMITER + parent_component self.cell_invoke_caller_probe_info[caller_id][cell_name].start_new_segment(clock_cycles) - ci_currently_active.add(caller_id) - # cell_id = cell_name + DELIMITER + parent_component - # self.cell_invoke_probe_info[cell_id][parent].start_new_segment(clock_cycles) + ci_currently_active.add((caller_id, cell_name)) elif "cell_probe_out" in signal_name and value == 0: encoded_info_split = signal_name.split("_cell_probe_out")[0].split("__") cell_name = encoded_info_split[0] @@ -253,16 +234,13 @@ def postprocess(self): parent_component = encoded_info_split[2] caller_id = parent + DELIMITER + parent_component self.cell_invoke_caller_probe_info[caller_id][cell_name].end_current_segment(clock_cycles) - ci_currently_active.remove(caller_id) - # cell_id = cell_name + DELIMITER + parent_component - # self.cell_invoke_probe_info[cell_id][parent].end_current_segment(clock_cycles) + ci_currently_active.remove((caller_id, cell_name)) for active in currently_active: # end any group/cell activitations that are still around... self.active_elements_info[active].end_current_segment(clock_cycles) - # FIXME: pretty sure the next two blocks fail because both stack infos are nested dictionaries lmao - for active in se_currently_active: # end any structural enables that are still around... - self.call_stack_probe_info[active].end_current_segment(clock_cycles) - for active in ci_currently_active: - self.cell_invoke_caller_probe_info[active].end_current_segment(clock_cycles) + for (group_id, parent) in se_currently_active: # end any structural enables that are still around... + self.call_stack_probe_info[group_id][parent].end_current_segment(clock_cycles) + for (caller_id, cell_name) in ci_currently_active: + self.cell_invoke_caller_probe_info[caller_id][cell_name].end_current_segment(clock_cycles) self.clock_cycles = clock_cycles @@ -282,7 +260,7 @@ def build_components_to_cells(prefix, curr_component, cells_to_components, compo def read_component_cell_names_json(json_file): cell_json = json.load(open(json_file)) # For each component, contains a map from each cell name to its corresponding component - # component name --> { cell name --> component name} + # component name --> { cell name --> component name } cells_to_components = {} main_component = "" for curr_component_entry in cell_json: @@ -295,7 +273,6 @@ def read_component_cell_names_json(json_file): full_main_component = f"TOP.toplevel.{main_component}" components_to_cells = {main_component : [full_main_component]} # come up with a better name for this build_components_to_cells(full_main_component, main_component, cells_to_components, components_to_cells) - # FIXME: extreme hack. Find a better way to do this... full_cell_names_to_components = {} for component in components_to_cells: for cell in components_to_cells[component]: @@ -315,16 +292,16 @@ def create_traces(active_element_probes_info, call_stack_probes_info, cell_calle new_timeline_map = {i : [] for i in range(total_cycles)} # now, we need to figure out the sets of traces for i in timeline_map: - parents = set() + parents = set() # keeping track of entities that are parents of other entities i_mapping = {} # each unique group inv mapping to its stack. the "group" should be the last item on each stack - i_mapping[main_component] = ["main"] # [main_component] + i_mapping[main_component] = [main_component.split(".")[-1]] cell_worklist = [main_component] # FIXME: maybe remove the hardcoding? while len(cell_worklist) > 0: current_cell = cell_worklist.pop() current_component = cells_to_components[current_cell] covered_units_in_component = set() # collect all of the units we've covered. - # this is so silly... but catch all active units that are groups in this component. + # catch all active units that are groups in this component. units_to_cover = set(filter(lambda unit: not unit.is_cell and unit.component == current_component, timeline_map[i])) # find all enables from control. these are all units that either (1) don't have any maps in call_stack_probes_info, or (2) have no active parent calls in call_stack_probes_info for active_unit in units_to_cover: @@ -372,6 +349,7 @@ def create_traces(active_element_probes_info, call_stack_probes_info, cell_calle i_mapping[cell_active_probe.name] = i_mapping[f"{current_cell}.{cell_invoker}"] + [f"{cell_active_probe.shortname} [{cell_component}]"] parents.add(f"{current_cell}.{cell_invoker}") + # Only retain paths that lead to leaf nodes. for elem in i_mapping: if elem not in parents: new_timeline_map[i].append(i_mapping[elem]) @@ -383,11 +361,14 @@ def create_traces(active_element_probes_info, call_stack_probes_info, cell_calle return new_timeline_map +""" +Creates a tree that encapsulates all stacks that occur within the program. +""" def create_tree(timeline_map): - # ugliest implementation of a tree node_id_acc = 0 tree_dict = {} # node id --> node name path_dict = {} # stack list string --> list of node ids + path_prefixes_dict = {} # stack list string --> list of node ids stack_list = [] for sl in timeline_map.values(): for s in sl: @@ -395,30 +376,33 @@ def create_tree(timeline_map): stack_list.append(s) stack_list.sort(key=len) for stack in stack_list: - stack_string = ";".join(stack) - if stack_string not in path_dict: - id_path_list = [] - prefix = "" - # check if we have any prefixes. start from the longest - for other_stack_string in sorted(path_dict, key=len, reverse=True): - if other_stack_string in stack_string: - # prefix found! - prefix = other_stack_string - id_path_list = list(path_dict[other_stack_string]) - break - # create nodes - if prefix != "": - new_nodes = stack_string.split(f"{prefix};")[1].split(";") + stack_len = len(stack) + id_path_list = [] + prefix = "" + for i in range(1, stack_len+1): + # start from reverse + attempted_prefix = ";".join(stack[0:stack_len-i]) + if attempted_prefix in path_prefixes_dict: + prefix = attempted_prefix + id_path_list = list(path_prefixes_dict[prefix]) + break + # create nodes + if prefix != "": + new_nodes = stack[i:] + new_prefix = prefix + else: + new_nodes = stack + new_prefix = "" + for elem in new_nodes: + if new_prefix == "": + new_prefix = elem else: - new_nodes = stack - for elem in new_nodes: - tree_dict[node_id_acc] = elem - id_path_list.append(node_id_acc) - node_id_acc += 1 - path_dict[stack_string] = id_path_list - - print(tree_dict) - print(path_dict) + new_prefix += f";{elem}" + tree_dict[node_id_acc] = elem + id_path_list.append(node_id_acc) + path_prefixes_dict[new_prefix] = list(id_path_list) + node_id_acc += 1 + path_dict[new_prefix] = id_path_list return tree_dict, path_dict @@ -437,11 +421,28 @@ def create_path_dot_str_dict(path_dict): def create_output(timeline_map, out_dir): + os.mkdir(out_dir) + + # make flame graph folded file + stacks = {} # stack to number of cycles + for i in timeline_map: + for stack_list in timeline_map[i]: + stack_id = ";".join(stack_list) + if stack_id not in stacks: + stacks[stack_id] = 1 + else: + stacks[stack_id] += 1 + + with open(os.path.join(out_dir, "flame.folded"), "w") as flame_out: + for stack in stacks: + flame_out.write(f"{stack} {stacks[stack]}\n") + + if len(timeline_map) > TREE_PICTURE_LIMIT: + print(f"Simulation exceeds {TREE_PICTURE_LIMIT} cycles, skipping trees...") + return tree_dict, path_dict = create_tree(timeline_map) path_to_dot_str = create_path_dot_str_dict(path_dict) all_paths_ordered = sorted(path_dict.keys()) - - os.mkdir(out_dir) for i in timeline_map: used_paths = set() used_nodes = set() @@ -454,16 +455,15 @@ def create_output(timeline_map, out_dir): used_nodes.add(node_id) fpath = os.path.join(out_dir, f"cycle{i}.dot") - # really lazy rn but I should actually use a library for this with open(fpath, "w") as f: f.write("digraph cycle" + str(i) + " {\n") # declare nodes. # used nodes should simply be declared for used_node in used_nodes: - f.write(f'\t{used_node} [label={tree_dict[used_node]}];\n') + f.write(f'\t{used_node} [label="{tree_dict[used_node]}"];\n') # unused nodes should be declared with gray for unused_node in all_nodes.difference(used_nodes): - f.write(f'\t{unused_node} [label={tree_dict[unused_node]},color="{INVISIBLE}",fontcolor="{INVISIBLE}"];\n') + f.write(f'\t{unused_node} [label="{tree_dict[unused_node]}",color="{INVISIBLE}",fontcolor="{INVISIBLE}"];\n') # write all paths. for path_id in all_paths_ordered: if ";" not in path_id or path_id in used_paths: @@ -472,41 +472,14 @@ def create_output(timeline_map, out_dir): f.write(f'\t{path_to_dot_str[path_id]} [color="{INVISIBLE}"];\n') f.write("}") - # make flame graph folded file - stacks = {} # stack to number of cycles - for i in timeline_map: - for stack_list in timeline_map[i]: - # stack_str = ";".join(map(lambda x : x.flame_repr(), stack_list)) - stack_id = ";".join(stack_list) - if stack_id not in stacks: - stacks[stack_id] = 1 - else: - stacks[stack_id] += 1 - - with open(os.path.join(out_dir, "flame.folded"), "w") as flame_out: - for stack in stacks: - flame_out.write(f"{stack} {stacks[stack]}\n") - def main(vcd_filename, cells_json_file, out_dir): - # FIXME: will support multicomponent programs later. There's maybe something wrong here. main_component, cells_to_components = read_component_cell_names_json(cells_json_file) converter = VCDConverter(main_component, cells_to_components) vcdvcd.VCDVCD(vcd_filename, callbacks=converter) converter.postprocess() - print("Active groups info: " + str(converter.active_elements_info.keys())) - print() - print("Call stack info: " + str(converter.call_stack_probe_info)) - print() - print("Cell stack info: " + str(converter.cell_invoke_caller_probe_info)) - print() - - # NOTE: for a more robust implementation, we can even skip the part where we store active - # cycles per group. new_timeline_map = create_traces(converter.active_elements_info, converter.call_stack_probe_info, converter.cell_invoke_caller_probe_info, converter.clock_cycles, cells_to_components, main_component) - create_tree(new_timeline_map) - create_output(new_timeline_map, out_dir)