From 745b7cc04d299db459500e41cc640d26b2b63916 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Wed, 30 Oct 2024 16:40:33 -0600 Subject: [PATCH 01/22] added dataset processing on per-graph basis to bfasst --- bfasst/flows/analyze_dataset.py | 35 +++++ bfasst/flows/flow_descriptions.yaml | 8 +- bfasst/paths.py | 2 + bfasst/tools/dataset_metrics/graph_metrics.py | 44 ++++++ .../process_graph_build.ninja.mustache | 2 + .../process_graph_rules.ninja.mustache | 4 + bfasst/utils/process_graph.py | 148 ++++++++++++++++++ 7 files changed, 242 insertions(+), 1 deletion(-) create mode 100644 bfasst/flows/analyze_dataset.py create mode 100644 bfasst/tools/dataset_metrics/graph_metrics.py create mode 100644 bfasst/tools/dataset_metrics/process_graph_build.ninja.mustache create mode 100644 bfasst/tools/dataset_metrics/process_graph_rules.ninja.mustache create mode 100644 bfasst/utils/process_graph.py diff --git a/bfasst/flows/analyze_dataset.py b/bfasst/flows/analyze_dataset.py new file mode 100644 index 00000000..a07e5ccd --- /dev/null +++ b/bfasst/flows/analyze_dataset.py @@ -0,0 +1,35 @@ +"""Analyze dataset metrics.""" + +from pathlib import Path +from bfasst.flows.flow import Flow +from bfasst.paths import FLOWS_PATH +from bfasst.tools.dataset_metrics.graph_metrics import GraphMetrics + + +class AnalyzeDataset(Flow): + """Analyze dataset metrics.""" + + def __init__(self, design, dataset): + # pylint: disable=duplicate-code + super().__init__(design) + self.design = design + self.dataset = Path(dataset) + + self.graph_metrics_default_tool = GraphMetrics( + self, design, None, None + ) # only used for configuring ninja + # pylint: enable=duplicate-code + + def create_build_snippets(self): + # get the size of the dataset + directories = [x for x in self.dataset.iterdir() if x.is_dir()] + iterations = len(directories) + + for i in range(1, iterations + 1): + graph_metrics_tool = GraphMetrics( + self, self.design, directories[i - 1] / f"{directories[i-1].name}.dump", i + ) + graph_metrics_tool.create_build_snippets() + + def get_top_level_flow_path(self) -> str: + return FLOWS_PATH / "analyze_dataset.py" diff --git a/bfasst/flows/flow_descriptions.yaml b/bfasst/flows/flow_descriptions.yaml index 53282896..a10cb3ad 100644 --- a/bfasst/flows/flow_descriptions.yaml +++ b/bfasst/flows/flow_descriptions.yaml @@ -156,4 +156,10 @@ flows: class: OpenTitan external_tools: - vivado - - opentitan \ No newline at end of file + - opentitan + +- name: AnalyzeDataset + description: Compute Metrics on an FPGA Circuit dataset for GNNs. + module: analyze_dataset + class: AnalyzeDataset + \ No newline at end of file diff --git a/bfasst/paths.py b/bfasst/paths.py index 9dbeb27f..db5fe500 100644 --- a/bfasst/paths.py +++ b/bfasst/paths.py @@ -20,6 +20,8 @@ COMMON_TOOLS_PATH = TOOLS_PATH / "common" +DATASET_METRICS_TOOLS_PATH = TOOLS_PATH / "dataset_metrics" + REV_BIT_TOOLS_PATH = TOOLS_PATH / "rev_bit" NINJA_TRANSFORM_TOOLS_PATH = TOOLS_PATH / "transform" diff --git a/bfasst/tools/dataset_metrics/graph_metrics.py b/bfasst/tools/dataset_metrics/graph_metrics.py new file mode 100644 index 00000000..8768fa3c --- /dev/null +++ b/bfasst/tools/dataset_metrics/graph_metrics.py @@ -0,0 +1,44 @@ +"""Create the rule and build snippets for computing gnn dataset metrics.""" + +import chevron + +from bfasst.tools.tool import Tool +from bfasst.paths import NINJA_BUILD_PATH, DATASET_METRICS_TOOLS_PATH, BFASST_UTILS_PATH + + +class GraphMetrics(Tool): + """Create the rule and build snippets for computing gnn dataset metrics + .""" + + def __init__( + self, + flow, + design, + graph, + num, + ): + super().__init__(flow, design) + self.graph = graph + self.num = num + self.build_path = self.design_build_path / "dataset_metrics" + self.metrics_path = self.build_path / f"metrics_{num}.log" + + self._init_outputs() + self.rule_snippet_path = DATASET_METRICS_TOOLS_PATH / "process_graph_rules.ninja.mustache" + + def create_build_snippets(self): + with open(DATASET_METRICS_TOOLS_PATH / "process_graph_build.ninja.mustache", "r") as f: + build = chevron.render( + f, + {"output": self.metrics_path, "graph": self.graph}, + ) + + with open(NINJA_BUILD_PATH, "a") as f: + f.write(build) + + def _init_outputs(self): + self.outputs["metrics_path"] = self.metrics_path + + def add_ninja_deps(self, deps): + self._add_ninja_deps_default(deps, __file__) + deps.append(BFASST_UTILS_PATH / "process_graph.py") diff --git a/bfasst/tools/dataset_metrics/process_graph_build.ninja.mustache b/bfasst/tools/dataset_metrics/process_graph_build.ninja.mustache new file mode 100644 index 00000000..92cf4887 --- /dev/null +++ b/bfasst/tools/dataset_metrics/process_graph_build.ninja.mustache @@ -0,0 +1,2 @@ +build {{ output }}: process_graph {{ graph }} + diff --git a/bfasst/tools/dataset_metrics/process_graph_rules.ninja.mustache b/bfasst/tools/dataset_metrics/process_graph_rules.ninja.mustache new file mode 100644 index 00000000..7bde2576 --- /dev/null +++ b/bfasst/tools/dataset_metrics/process_graph_rules.ninja.mustache @@ -0,0 +1,4 @@ +rule process_graph + command = python {{ bfasst_path }}/bfasst/utils/process_graph.py $in -o $out + description = compute metrics on $in and save them to $out + diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py new file mode 100644 index 00000000..79453634 --- /dev/null +++ b/bfasst/utils/process_graph.py @@ -0,0 +1,148 @@ +"""Compute metrics on a single graph in a dataset.""" + +import argparse +from collections import defaultdict +import logging +import os +import json + +logger = logging.getLogger(__name__) + + +def main(): + """Load the graph, convert to adj_list, and compute metrics.""" + # ArgParse + parser = argparse.ArgumentParser(description="Compute metrics on a graph.") + parser.add_argument("graph", help="The graph to compute metrics on.") + parser.add_argument( + "-v", "--verbose", action="store_true", help="Enable debug logging." + ) + parser.add_argument("-o", help="The name of the output file to create") + args = parser.parse_args() + + # Logging (for debug, don't use in parallel) + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + ) + + component_nodes, component_edges = load_graph(args.graph) + + adj_lists = convert_to_adj_list(component_nodes, component_edges) + + # Compute metrics for each component + metrics_per_ip = compute_metrics_per_ip(adj_lists, args) + + # write metrics to a file + output = args.o if args.o else "metrics.log" + with open(os.path.abspath(output), "w") as f: + f.write(json.dumps(metrics_per_ip)) + + +def load_graph(graph): + """Load a graph from a file.""" + graph_path = os.path.abspath(graph) + + component_nodes = defaultdict(list) # {ip_inst: [node1, node2, ...]} + component_edges = defaultdict(list) # {ip_inst: [(node1, node2), ...]} + section = None # track the section: nodes or edges + + with open(graph_path, "r") as f: + for line in f: + line = line.strip() + + # Detect the beginning of a section + if line.startswith("(("): + if section is None: + section = "nodes" + else: + section = "edges" + line = line[1:].strip() # Remove the opening '(' + + # Detect the end of a section + if line == ")": + continue + + if not line: + continue # Skip empty lines + + if section == "nodes": + parts = line.replace('"', "").split() + node_id, label = parts[0], parts[2] + node_id = node_id.replace("(", "") + if "ip" not in label: + label = "fabric" + component_nodes[label].append(node_id) + + elif section == "edges": + node1, node2 = line.replace('"', "").replace("(", "").replace(")", "").split() + # get the label for both nodes + node1_label = find_label(node1, component_nodes) + node2_label = find_label(node2, component_nodes) + if node1_label == node2_label: + component_edges[node1_label].append((node1, node2)) + + return component_nodes, component_edges + + +def find_label(node, component_nodes): + """Find the label for a node.""" + for label, nodes in component_nodes.items(): + if node in nodes: + return label + return None + + +def convert_to_adj_list(component_nodes, component_edges): + """Convert the graph to adjacency lists.""" + adj_lists = {} + for label, nodes in component_nodes.items(): + adj_lists[label] = {} + for node in nodes: + adj_lists[label][node] = [] + + for label, edges in component_edges.items(): + for node1, node2 in edges: + adj_lists[label][node1].append(node2) + adj_lists[label][node2].append(node1) + + return adj_lists + + +def compute_metrics_per_ip(adj_lists, args): + metrics_per_ip = {} + for label, adj_list in adj_lists.items(): + + # set up default entries + ip = get_ip_name_from_label(label) + if ip not in metrics_per_ip: + metrics_per_ip[ip] = {"order": [], "size": []} + + # Order + metrics_per_ip[ip]["order"].append(len(adj_list)) + + # Size + edge_count = 0 + for node in adj_list: + for neighbor in adj_list[node]: + edge_count += 1 + edge_count = edge_count // 2 + metrics_per_ip[ip]["size"].append(edge_count) + + # Debug (verbose flag only) + logger.debug(f"IP: {ip}") + logger.debug(f"Component: {label}") + logger.debug(f"Nodes: {len(adj_list)}") + logger.debug(f"Edges: {edge_count}") + logger.debug("") + + return metrics_per_ip + + +def get_ip_name_from_label(label): + ip_name = ("_").join(label.split("_")[2:]) + return ip_name if ip_name else label + + +if __name__ == "__main__": + main() From c7876bb0165d120d2d9181fb540bb5e82d456873 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Wed, 30 Oct 2024 16:47:49 -0600 Subject: [PATCH 02/22] minor format fix --- bfasst/flows/analyze_dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bfasst/flows/analyze_dataset.py b/bfasst/flows/analyze_dataset.py index a07e5ccd..5a78696f 100644 --- a/bfasst/flows/analyze_dataset.py +++ b/bfasst/flows/analyze_dataset.py @@ -26,8 +26,9 @@ def create_build_snippets(self): iterations = len(directories) for i in range(1, iterations + 1): + num = int(directories[i - 1].name.split("_")[-1]) graph_metrics_tool = GraphMetrics( - self, self.design, directories[i - 1] / f"{directories[i-1].name}.dump", i + self, self.design, directories[i - 1] / f"{directories[i - 1].name}.dump", num ) graph_metrics_tool.create_build_snippets() From fc704b116d03d3379d51d644dd7d28ba7f79809c Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Wed, 30 Oct 2024 17:20:11 -0600 Subject: [PATCH 03/22] Added basics for accumulation of graph metrics --- bfasst/utils/accumulate_metrics.py | 100 +++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 bfasst/utils/accumulate_metrics.py diff --git a/bfasst/utils/accumulate_metrics.py b/bfasst/utils/accumulate_metrics.py new file mode 100644 index 00000000..2613623b --- /dev/null +++ b/bfasst/utils/accumulate_metrics.py @@ -0,0 +1,100 @@ +"""Accumulate metrics from graphs in a dataset after computing them for all graphs""" + +import argparse +import logging +import json +from pathlib import Path +import statistics + +logger = logging.getLogger(__name__) + + +def main(): + """Load the graph, convert to adj_list, and compute metrics.""" + # ArgParse + parser = argparse.ArgumentParser(description="Compute metrics on a graph.") + parser.add_argument( + "analysis_dir", help="The path to the folder containing all analysis files for all graphs." + ) + parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.") + parser.add_argument("-o", help="The name of the output file to create") + args = parser.parse_args() + + # Logging (for debug, don't use in parallel) + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + ) + + # Initialize the master dictionary + master_metrics = {} + + # Iterate through the files in the analysis directory + for file in Path(args.analysis_dir).iterdir(): + if file.is_dir(): + continue + + with open(file, "r") as f: + graph_metrics = json.loads(f.readline()) + + for ip, metrics in graph_metrics.items(): + # Initialize the IP entry in the master dictionary if it doesn't exist + if ip not in master_metrics: + master_metrics[ip] = {} + + for metric, values in metrics.items(): + # Initialize the metric entry if it doesn't exist + if metric not in master_metrics[ip]: + master_metrics[ip][metric] = [] + + # Concatenate the lists + master_metrics[ip][metric].extend(values) + + # sort the values for each metric after merging + for ip in master_metrics: + for metric in master_metrics[ip]: + master_metrics[ip][metric] = sorted(master_metrics[ip][metric]) + + # Compute the stats for each metric + stats_summary = {} + for ip, metrics in master_metrics.items(): + for metric, values in metrics.items(): + # Calculate statistics + if values: # Check if the list is not empty + min_val, Q1, median, Q3, max_val = five_number_summary(values) + mean = sum(values) / len(values) + stddev = statistics.stdev(values) if len(values) > 1 else 0.0 + + # Prepare the summary dictionary + if ip not in stats_summary: + stats_summary[ip] = {} + + stats_summary[ip][metric] = { + "min": min_val, + "Q1": Q1, + "median": median, + "Q3": Q3, + "max": max_val, + "mean": mean, + "stddev": stddev, + } + + for k, v in master_metrics.items(): + logger.debug(k + ": " + str(v)) + + for k, v in stats_summary.items(): + logger.debug(k + ": " + str(v)) + + +def five_number_summary(data): + n = len(data) + min_val = data[0] + max_val = data[-1] + Q1 = data[n // 4] + median = data[n // 2] + Q3 = data[(3 * n) // 4] + return min_val, Q1, median, Q3, max_val + + +if __name__ == "__main__": + main() From 75883bd727dd134548368e323f5eeff93443cb6d Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Wed, 30 Oct 2024 17:27:27 -0600 Subject: [PATCH 04/22] updated accumulation script to write to file --- bfasst/utils/accumulate_metrics.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/bfasst/utils/accumulate_metrics.py b/bfasst/utils/accumulate_metrics.py index 2613623b..c1f63c53 100644 --- a/bfasst/utils/accumulate_metrics.py +++ b/bfasst/utils/accumulate_metrics.py @@ -17,7 +17,10 @@ def main(): "analysis_dir", help="The path to the folder containing all analysis files for all graphs." ) parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.") - parser.add_argument("-o", help="The name of the output file to create") + parser.add_argument("-m", help="The name of the metrics file to create") + parser.add_argument( + "-s", help="The name of the stats (5-num summary, mean, stddev) file to create" + ) args = parser.parse_args() # Logging (for debug, don't use in parallel) @@ -79,11 +82,14 @@ def main(): "stddev": stddev, } - for k, v in master_metrics.items(): - logger.debug(k + ": " + str(v)) + # write master_metrics to a file + output = args.m if args.m else "master_metrics.log" + with open(output, "w") as f: + f.write(json.dumps(master_metrics, indent=4)) - for k, v in stats_summary.items(): - logger.debug(k + ": " + str(v)) + output = args.s if args.s else "summary_statistics.log" + with open(output, "w") as f: + f.write(json.dumps(stats_summary, indent=4)) def five_number_summary(data): From aff18972878f8c8b3cb832369d90e998e5bb0ff7 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Thu, 31 Oct 2024 13:43:51 -0600 Subject: [PATCH 05/22] Added accumulation of metrics --- bfasst/flows/analyze_dataset.py | 11 +++-- .../dataset_metrics/accumulate_metrics.py | 45 +++++++++++++++++++ .../accumulate_metrics_build.ninja.mustache | 4 ++ .../accumulate_metrics_rules.ninja.mustache | 4 ++ bfasst/tools/dataset_metrics/graph_metrics.py | 3 +- 5 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 bfasst/tools/dataset_metrics/accumulate_metrics.py create mode 100644 bfasst/tools/dataset_metrics/accumulate_metrics_build.ninja.mustache create mode 100644 bfasst/tools/dataset_metrics/accumulate_metrics_rules.ninja.mustache diff --git a/bfasst/flows/analyze_dataset.py b/bfasst/flows/analyze_dataset.py index 5a78696f..a4402695 100644 --- a/bfasst/flows/analyze_dataset.py +++ b/bfasst/flows/analyze_dataset.py @@ -3,6 +3,7 @@ from pathlib import Path from bfasst.flows.flow import Flow from bfasst.paths import FLOWS_PATH +from bfasst.tools.dataset_metrics.accumulate_metrics import AccumulateMetrics from bfasst.tools.dataset_metrics.graph_metrics import GraphMetrics @@ -15,22 +16,26 @@ def __init__(self, design, dataset): self.design = design self.dataset = Path(dataset) - self.graph_metrics_default_tool = GraphMetrics( - self, design, None, None - ) # only used for configuring ninja + # only used for configuring ninja rule snippets + self.graph_metrics_default_tool = GraphMetrics(self, design, None, None) + self.accumulate_metrics_tool = AccumulateMetrics(self, design, None) # pylint: enable=duplicate-code def create_build_snippets(self): # get the size of the dataset directories = [x for x in self.dataset.iterdir() if x.is_dir()] iterations = len(directories) + pieces = [] for i in range(1, iterations + 1): num = int(directories[i - 1].name.split("_")[-1]) graph_metrics_tool = GraphMetrics( self, self.design, directories[i - 1] / f"{directories[i - 1].name}.dump", num ) + pieces.append(graph_metrics_tool.metrics_path) graph_metrics_tool.create_build_snippets() + AccumulateMetrics(self, self.design, pieces).create_build_snippets() + def get_top_level_flow_path(self) -> str: return FLOWS_PATH / "analyze_dataset.py" diff --git a/bfasst/tools/dataset_metrics/accumulate_metrics.py b/bfasst/tools/dataset_metrics/accumulate_metrics.py new file mode 100644 index 00000000..713e8424 --- /dev/null +++ b/bfasst/tools/dataset_metrics/accumulate_metrics.py @@ -0,0 +1,45 @@ +"""Accumulate metrics from the graph_metrics tool.""" + +import chevron + +from bfasst.tools.tool import Tool +from bfasst.paths import NINJA_BUILD_PATH, DATASET_METRICS_TOOLS_PATH, BFASST_UTILS_PATH + + +class AccumulateMetrics(Tool): + """Accumulate metrics from the graph_metrics tool.""" + + def __init__(self, flow, design, pieces): + super().__init__(flow, design) + self.pieces = pieces + self.build_path = self.design_build_path / "dataset_metrics" + self.metrics_path = self.build_path / "master_metrics.log" + self.summary_stats = self.build_path / "summary_stats.log" + + self._init_outputs() + self.rule_snippet_path = ( + DATASET_METRICS_TOOLS_PATH / "accumulate_metrics_rules.ninja.mustache" + ) + + def create_build_snippets(self): + with open(DATASET_METRICS_TOOLS_PATH / "accumulate_metrics_build.ninja.mustache", "r") as f: + build = chevron.render( + f, + { + "metrics_file": self.metrics_path, + "summary_stats": self.summary_stats, + "aggregation_dir": self.build_path, + "pieces": self.pieces, + }, + ) + + with open(NINJA_BUILD_PATH, "a") as f: + f.write(build) + + def _init_outputs(self): + self.outputs["metrics_path"] = self.metrics_path + self.outputs["summary_stats"] = self.summary_stats + + def add_ninja_deps(self, deps): + self._add_ninja_deps_default(deps, __file__) + deps.append(BFASST_UTILS_PATH / "accumulate_metrics.py") diff --git a/bfasst/tools/dataset_metrics/accumulate_metrics_build.ninja.mustache b/bfasst/tools/dataset_metrics/accumulate_metrics_build.ninja.mustache new file mode 100644 index 00000000..2299f729 --- /dev/null +++ b/bfasst/tools/dataset_metrics/accumulate_metrics_build.ninja.mustache @@ -0,0 +1,4 @@ +build {{ metrics_file }} {{ summary_stats }}: accumulate_metrics {{ aggregation_dir }} | {{#pieces}}{{.}} {{/pieces}} + metrics_file = {{ metrics_file }} + summary_stats = {{ summary_stats }} + diff --git a/bfasst/tools/dataset_metrics/accumulate_metrics_rules.ninja.mustache b/bfasst/tools/dataset_metrics/accumulate_metrics_rules.ninja.mustache new file mode 100644 index 00000000..2454a7eb --- /dev/null +++ b/bfasst/tools/dataset_metrics/accumulate_metrics_rules.ninja.mustache @@ -0,0 +1,4 @@ +rule accumulate_metrics + command = python {{ bfasst_path }}/bfasst/utils/accumulate_metrics.py $in -m $metrics_file -s $summary_stats + description = accumulate metrics from $in to produce master_metrics and summary_stats files + diff --git a/bfasst/tools/dataset_metrics/graph_metrics.py b/bfasst/tools/dataset_metrics/graph_metrics.py index 8768fa3c..5a8311f7 100644 --- a/bfasst/tools/dataset_metrics/graph_metrics.py +++ b/bfasst/tools/dataset_metrics/graph_metrics.py @@ -7,8 +7,7 @@ class GraphMetrics(Tool): - """Create the rule and build snippets for computing gnn dataset metrics - .""" + """Create the rule and build snippets for computing gnn dataset metrics.""" def __init__( self, From 8155cfcfbaee1eac09abe8ddd102a416657e747e Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Thu, 31 Oct 2024 13:52:57 -0600 Subject: [PATCH 06/22] refactored to use FlowNoDesign --- bfasst/flows/analyze_dataset.py | 30 ++++++++++++------- .../dataset_metrics/accumulate_metrics.py | 12 ++++---- bfasst/tools/dataset_metrics/graph_metrics.py | 11 ++++--- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/bfasst/flows/analyze_dataset.py b/bfasst/flows/analyze_dataset.py index a4402695..399630c9 100644 --- a/bfasst/flows/analyze_dataset.py +++ b/bfasst/flows/analyze_dataset.py @@ -1,24 +1,24 @@ """Analyze dataset metrics.""" from pathlib import Path -from bfasst.flows.flow import Flow +import pathlib +from bfasst.flows.flow import FlowNoDesign from bfasst.paths import FLOWS_PATH from bfasst.tools.dataset_metrics.accumulate_metrics import AccumulateMetrics from bfasst.tools.dataset_metrics.graph_metrics import GraphMetrics -class AnalyzeDataset(Flow): +class AnalyzeDataset(FlowNoDesign): """Analyze dataset metrics.""" - def __init__(self, design, dataset): + def __init__(self, dataset): # pylint: disable=duplicate-code - super().__init__(design) - self.design = design + super().__init__() self.dataset = Path(dataset) # only used for configuring ninja rule snippets - self.graph_metrics_default_tool = GraphMetrics(self, design, None, None) - self.accumulate_metrics_tool = AccumulateMetrics(self, design, None) + self.graph_metrics_default_tool = GraphMetrics(self, None, None) + self.accumulate_metrics_tool = AccumulateMetrics(self, None) # pylint: enable=duplicate-code def create_build_snippets(self): @@ -30,12 +30,20 @@ def create_build_snippets(self): for i in range(1, iterations + 1): num = int(directories[i - 1].name.split("_")[-1]) graph_metrics_tool = GraphMetrics( - self, self.design, directories[i - 1] / f"{directories[i - 1].name}.dump", num + self, directories[i - 1] / f"{directories[i - 1].name}.dump", num ) pieces.append(graph_metrics_tool.metrics_path) graph_metrics_tool.create_build_snippets() - AccumulateMetrics(self, self.design, pieces).create_build_snippets() + AccumulateMetrics(self, pieces).create_build_snippets() - def get_top_level_flow_path(self) -> str: - return FLOWS_PATH / "analyze_dataset.py" + @classmethod + def flow_build_dir_name(cls) -> str: + """Get the name of the build directory for this flow""" + return "dataset_metrics" + + def add_ninja_deps(self, deps): + super().add_ninja_deps(deps) + + def get_top_level_flow_path(self): + return pathlib.Path(__file__).resolve() diff --git a/bfasst/tools/dataset_metrics/accumulate_metrics.py b/bfasst/tools/dataset_metrics/accumulate_metrics.py index 713e8424..74473b62 100644 --- a/bfasst/tools/dataset_metrics/accumulate_metrics.py +++ b/bfasst/tools/dataset_metrics/accumulate_metrics.py @@ -2,17 +2,17 @@ import chevron -from bfasst.tools.tool import Tool -from bfasst.paths import NINJA_BUILD_PATH, DATASET_METRICS_TOOLS_PATH, BFASST_UTILS_PATH +from bfasst.tools.tool import ToolBase +from bfasst.paths import BUILD_PATH, NINJA_BUILD_PATH, DATASET_METRICS_TOOLS_PATH, BFASST_UTILS_PATH -class AccumulateMetrics(Tool): +class AccumulateMetrics(ToolBase): """Accumulate metrics from the graph_metrics tool.""" - def __init__(self, flow, design, pieces): - super().__init__(flow, design) + def __init__(self, flow, pieces): + super().__init__(flow) self.pieces = pieces - self.build_path = self.design_build_path / "dataset_metrics" + self.build_path = BUILD_PATH / "dataset_metrics" self.metrics_path = self.build_path / "master_metrics.log" self.summary_stats = self.build_path / "summary_stats.log" diff --git a/bfasst/tools/dataset_metrics/graph_metrics.py b/bfasst/tools/dataset_metrics/graph_metrics.py index 5a8311f7..8e4917c4 100644 --- a/bfasst/tools/dataset_metrics/graph_metrics.py +++ b/bfasst/tools/dataset_metrics/graph_metrics.py @@ -2,24 +2,23 @@ import chevron -from bfasst.tools.tool import Tool -from bfasst.paths import NINJA_BUILD_PATH, DATASET_METRICS_TOOLS_PATH, BFASST_UTILS_PATH +from bfasst.tools.tool import ToolBase +from bfasst.paths import BUILD_PATH, NINJA_BUILD_PATH, DATASET_METRICS_TOOLS_PATH, BFASST_UTILS_PATH -class GraphMetrics(Tool): +class GraphMetrics(ToolBase): """Create the rule and build snippets for computing gnn dataset metrics.""" def __init__( self, flow, - design, graph, num, ): - super().__init__(flow, design) + super().__init__(flow) self.graph = graph self.num = num - self.build_path = self.design_build_path / "dataset_metrics" + self.build_path = BUILD_PATH / "dataset_metrics" self.metrics_path = self.build_path / f"metrics_{num}.log" self._init_outputs() From 604ac18dc7388c68333bf088db8f9048c1b7b121 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Thu, 31 Oct 2024 16:51:51 -0600 Subject: [PATCH 07/22] added diameter --- bfasst/utils/process_graph.py | 117 ++++++++++++++++++++++++++++++---- 1 file changed, 106 insertions(+), 11 deletions(-) diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py index 79453634..818189a8 100644 --- a/bfasst/utils/process_graph.py +++ b/bfasst/utils/process_graph.py @@ -14,9 +14,7 @@ def main(): # ArgParse parser = argparse.ArgumentParser(description="Compute metrics on a graph.") parser.add_argument("graph", help="The graph to compute metrics on.") - parser.add_argument( - "-v", "--verbose", action="store_true", help="Enable debug logging." - ) + parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.") parser.add_argument("-o", help="The name of the output file to create") args = parser.parse_args() @@ -112,23 +110,26 @@ def convert_to_adj_list(component_nodes, component_edges): def compute_metrics_per_ip(adj_lists, args): metrics_per_ip = {} for label, adj_list in adj_lists.items(): - + # set up default entries ip = get_ip_name_from_label(label) if ip not in metrics_per_ip: - metrics_per_ip[ip] = {"order": [], "size": []} - + metrics_per_ip[ip] = {"order": [], "size": [], "degree": [], "diameter": []} + # Order metrics_per_ip[ip]["order"].append(len(adj_list)) # Size - edge_count = 0 - for node in adj_list: - for neighbor in adj_list[node]: - edge_count += 1 - edge_count = edge_count // 2 + edge_count = compute_size(adj_list) metrics_per_ip[ip]["size"].append(edge_count) + # Degree + avg_desgree = compute_average_degree(adj_list) + + # Diameter + avg_diameter = compute_average_diameter(adj_list) + metrics_per_ip[ip]["diameter"].append(avg_diameter) + # Debug (verbose flag only) logger.debug(f"IP: {ip}") logger.debug(f"Component: {label}") @@ -139,6 +140,100 @@ def compute_metrics_per_ip(adj_lists, args): return metrics_per_ip +def compute_size(adj_list): + edge_count = 0 + for node in adj_list: + for neighbor in adj_list[node]: + edge_count += 1 + return edge_count // 2 + + +def compute_average_diameter(adj_list): + uf = UnionFind() + + for u in adj_list: + for v in adj_list[u]: + uf.union(u, v) + + components = {} + for node in adj_list: + root = uf.find(node) + if root not in components: + components[root] = set() + components[root].add(node) + + diameters = [] + + for component in components.values(): + node = next(iter(component)) + u, _ = bfs_farthest(adj_list, node) + _, diameter = bfs_farthest(adj_list, u) + diameters.append(diameter) + + return sum(diameters) / len(diameters) if diameters else 0 + + +def compute_average_degree(adj_list): + degrees = [] + for node in adj_list: + degrees.append(len(adj_list[node])) + return sum(degrees) / len(degrees) if degrees else 0 + + +class UnionFind: + def __init__(self): + self.parent = {} + self.rank = {} + + def add(self, u): + if u not in self.parent: + self.parent[u] = u + self.rank[u] = 0 + + def find(self, u): + # Ensure u is in the union find + self.add(u) + + # Path compression + if self.parent[u] != u: + self.parent[u] = self.find(self.parent[u]) + return self.parent[u] + + def union(self, u, v): + self.add(u) + self.add(v) + pu, pv = self.find(u), self.find(v) + + if pv != pu: + if self.rank[pu] > self.rank[pv]: + self.parent[pv] = pu + elif self.rank[pv] > self.rank[pu]: + self.parent[pu] = pv + else: + self.parent[pv] = pu + self.rank[pu] += 1 + + +def bfs_farthest(adj_list, start_node): + queue = [(start_node, 0)] + visited = {start_node} + farthest_node = start_node + max_distance = 0 + + while queue: + node, distance = queue.pop(0) + if distance > max_distance: + max_distance = distance + farthest_node = node + + for neighbor in adj_list[node]: + if neighbor not in visited: + queue.append((neighbor, distance + 1)) + visited.add(neighbor) + + return farthest_node, max_distance + + def get_ip_name_from_label(label): ip_name = ("_").join(label.split("_")[2:]) return ip_name if ip_name else label From 1657e9c3be52b1c61b9207d27a12fa987dac9f93 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Thu, 31 Oct 2024 16:56:55 -0600 Subject: [PATCH 08/22] added degree --- bfasst/utils/process_graph.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py index 818189a8..50268c9e 100644 --- a/bfasst/utils/process_graph.py +++ b/bfasst/utils/process_graph.py @@ -125,6 +125,7 @@ def compute_metrics_per_ip(adj_lists, args): # Degree avg_desgree = compute_average_degree(adj_list) + metrics_per_ip[ip]["degree"].append(avg_desgree) # Diameter avg_diameter = compute_average_diameter(adj_list) From f28c449ef33141414a8e98eca7c007c8ab0ac374 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Thu, 31 Oct 2024 17:25:05 -0600 Subject: [PATCH 09/22] added kcore and global/local clustering coefficients --- bfasst/utils/process_graph.py | 109 +++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 2 deletions(-) diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py index 50268c9e..165a14c3 100644 --- a/bfasst/utils/process_graph.py +++ b/bfasst/utils/process_graph.py @@ -1,7 +1,7 @@ """Compute metrics on a single graph in a dataset.""" import argparse -from collections import defaultdict +from collections import defaultdict, deque import logging import os import json @@ -114,7 +114,15 @@ def compute_metrics_per_ip(adj_lists, args): # set up default entries ip = get_ip_name_from_label(label) if ip not in metrics_per_ip: - metrics_per_ip[ip] = {"order": [], "size": [], "degree": [], "diameter": []} + metrics_per_ip[ip] = { + "order": [], + "size": [], + "degree": [], + "diameter": [], + "kcore": [], + "clustering": [], + "local_clustering": [], + } # Order metrics_per_ip[ip]["order"].append(len(adj_list)) @@ -131,6 +139,18 @@ def compute_metrics_per_ip(adj_lists, args): avg_diameter = compute_average_diameter(adj_list) metrics_per_ip[ip]["diameter"].append(avg_diameter) + # K-core + max_k, _ = compute_k_core(adj_list) + metrics_per_ip[ip]["kcore"].append(max_k) + + # Global Clustering Coefficient + global_clustering = compute_global_clustering(adj_list) + metrics_per_ip[ip]["clustering"].append(global_clustering) + + # Local Clustering Coefficient + local_clustering = compute_local_clustering(adj_list) + metrics_per_ip[ip]["local_clustering"].append(local_clustering) + # Debug (verbose flag only) logger.debug(f"IP: {ip}") logger.debug(f"Component: {label}") @@ -235,6 +255,91 @@ def bfs_farthest(adj_list, start_node): return farthest_node, max_distance +def compute_k_core(adj_list): + degree = {node: len(neighbors) for node, neighbors in adj_list.items()} + max_k = 0 + k_core_subgraph = {} + + k = 1 + while True: + queue = deque(node for node, d in degree.items() if d <= k) + + while queue: + node = queue.popleft() + for neighbor in adj_list[node]: + if degree[neighbor] >= k: + degree[neighbor] -= 1 + if degree[neighbor] < k: + queue.append(neighbor) + degree[node] = 0 + + k_core = { + node: {neighbor for neighbor in neighbors if degree[neighbor] >= k} + for node, neighbors in adj_list.items() + if degree[node] >= k + } + + if k_core: + k_core_subgraph = k_core + max_k = k + else: + break + + k += 1 + + return max_k, k_core_subgraph + + +def compute_global_clustering(adj_list): + closed_triplets = 0 + total_triplets = 0 + visited_pairs = set() + + for node in adj_list: + neighbors = set(adj_list[node]) + degree = len(neighbors) + + total_triplets += degree * (degree - 1) // 2 + + for neighbor in neighbors: + if (node, neighbor) in visited_pairs or (neighbor, node) in visited_pairs: + continue + + common_neighbors = neighbors.intersection(set(adj_list[neighbor])) + closed_triplets += len(common_neighbors) + visited_pairs.add((node, neighbor)) + + return (3 * closed_triplets) / total_triplets if total_triplets else 0 + + +def compute_local_clustering(adj_list): + local_clustering_coefficients = [] + + for node in adj_list: + neighbors = set(adj_list[node]) + degree = len(neighbors) + + if degree < 2: + local_clustering_coefficients.append(0) + continue + + closed_triplets = 0 + + for neighbor in neighbors: + common_neighbors = neighbors.intersection(set(adj_list[neighbor])) + closed_triplets += len(common_neighbors) + + local_clustering_coefficients.append( + (closed_triplets) / (degree * (degree - 1)) if degree > 1 else 0 + ) + + return ( + sum(local_clustering_coefficients) / len(local_clustering_coefficients) + if local_clustering_coefficients + else 0 + ) + + def get_ip_name_from_label(label): ip_name = ("_").join(label.split("_")[2:]) return ip_name if ip_name else label From d01031fbe2feb5b1772ce2aa457287f847d6c00d Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Thu, 31 Oct 2024 17:26:37 -0600 Subject: [PATCH 10/22] updated names for clustering coefficients --- bfasst/utils/process_graph.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py index 165a14c3..4a4df071 100644 --- a/bfasst/utils/process_graph.py +++ b/bfasst/utils/process_graph.py @@ -120,8 +120,8 @@ def compute_metrics_per_ip(adj_lists, args): "degree": [], "diameter": [], "kcore": [], - "clustering": [], - "local_clustering": [], + "global_clustering_coeff": [], + "local_clustering_coeff": [], } # Order @@ -145,11 +145,11 @@ def compute_metrics_per_ip(adj_lists, args): # Global Clustering Coefficient global_clustering = compute_global_clustering(adj_list) - metrics_per_ip[ip]["clustering"].append(global_clustering) + metrics_per_ip[ip]["global_clustering_coeff"].append(global_clustering) # Local Clustering Coefficient local_clustering = compute_local_clustering(adj_list) - metrics_per_ip[ip]["local_clustering"].append(local_clustering) + metrics_per_ip[ip]["local_clustering_coeff"].append(local_clustering) # Debug (verbose flag only) logger.debug(f"IP: {ip}") From 0c4a6657bb4d2fc2e3fed853f5381a81ead28456 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Thu, 31 Oct 2024 17:35:06 -0600 Subject: [PATCH 11/22] added options on each metric so they can be turned off/on --- bfasst/utils/process_graph.py | 55 ++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py index 4a4df071..5afea97d 100644 --- a/bfasst/utils/process_graph.py +++ b/bfasst/utils/process_graph.py @@ -16,6 +16,28 @@ def main(): parser.add_argument("graph", help="The graph to compute metrics on.") parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.") parser.add_argument("-o", help="The name of the output file to create") + + parser.add_argument("--order", action="store_true", help="Compute the order of the graph.") + parser.add_argument("--size", action="store_true", help="Compute the size of the graph.") + parser.add_argument( + "--degree", action="store_true", help="Compute the average degree of the graph." + ) + parser.add_argument( + "--diameter", action="store_true", help="Compute the average diameter of the graph." + ) + parser.add_argument("--kcore", action="store_true", help="Compute the k-core of the graph.") + parser.add_argument( + "--global_clustering_coeff", + action="store_true", + help="Compute the global clustering coefficient of the graph.", + ) + parser.add_argument( + "--local_clustering_coeff", + action="store_true", + help="Compute the local clustering coefficient of the graph.", + ) + parser.add_argument("--all", action="store_true", help="Compute all metrics.", default=True) + args = parser.parse_args() # Logging (for debug, don't use in parallel) @@ -125,31 +147,38 @@ def compute_metrics_per_ip(adj_lists, args): } # Order - metrics_per_ip[ip]["order"].append(len(adj_list)) + if args.all or args.order: + metrics_per_ip[ip]["order"].append(len(adj_list)) # Size - edge_count = compute_size(adj_list) - metrics_per_ip[ip]["size"].append(edge_count) + if args.all or args.size: + edge_count = compute_size(adj_list) + metrics_per_ip[ip]["size"].append(edge_count) # Degree - avg_desgree = compute_average_degree(adj_list) - metrics_per_ip[ip]["degree"].append(avg_desgree) + if args.all or args.degree: + avg_desgree = compute_average_degree(adj_list) + metrics_per_ip[ip]["degree"].append(avg_desgree) # Diameter - avg_diameter = compute_average_diameter(adj_list) - metrics_per_ip[ip]["diameter"].append(avg_diameter) + if args.all or args.diameter: + avg_diameter = compute_average_diameter(adj_list) + metrics_per_ip[ip]["diameter"].append(avg_diameter) # K-core - max_k, _ = compute_k_core(adj_list) - metrics_per_ip[ip]["kcore"].append(max_k) + if args.all or args.kcore: + max_k, _ = compute_k_core(adj_list) + metrics_per_ip[ip]["kcore"].append(max_k) # Global Clustering Coefficient - global_clustering = compute_global_clustering(adj_list) - metrics_per_ip[ip]["global_clustering_coeff"].append(global_clustering) + if args.all or args.global_clustering_coeff: + global_clustering = compute_global_clustering(adj_list) + metrics_per_ip[ip]["global_clustering_coeff"].append(global_clustering) # Local Clustering Coefficient - local_clustering = compute_local_clustering(adj_list) - metrics_per_ip[ip]["local_clustering_coeff"].append(local_clustering) + if args.all or args.local_clustering_coeff: + local_clustering = compute_local_clustering(adj_list) + metrics_per_ip[ip]["local_clustering_coeff"].append(local_clustering) # Debug (verbose flag only) logger.debug(f"IP: {ip}") From 80e3edaa6a186b134e4755f72d85d8a3142619ad Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Thu, 31 Oct 2024 17:45:18 -0600 Subject: [PATCH 12/22] do not iterate over the summary or master metrics logs --- bfasst/utils/accumulate_metrics.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/bfasst/utils/accumulate_metrics.py b/bfasst/utils/accumulate_metrics.py index c1f63c53..055fa13e 100644 --- a/bfasst/utils/accumulate_metrics.py +++ b/bfasst/utils/accumulate_metrics.py @@ -31,12 +31,23 @@ def main(): # Initialize the master dictionary master_metrics = {} + master_metrics_output = args.m if args.m else "master_metrics.log" + stats_summary_output = args.s if args.s else "summary_statistics.log" # Iterate through the files in the analysis directory for file in Path(args.analysis_dir).iterdir(): if file.is_dir(): continue + if ( + file.name == master_metrics_output + or file.name == stats_summary_output + # if these exist, don't read them even if master_metrics_output and stats_summary_output are different + or file.name == "master_metrics.log" + or file.name == "summary_statistics.log" + ): + continue + with open(file, "r") as f: graph_metrics = json.loads(f.readline()) @@ -83,12 +94,10 @@ def main(): } # write master_metrics to a file - output = args.m if args.m else "master_metrics.log" - with open(output, "w") as f: + with open(master_metrics_output, "w") as f: f.write(json.dumps(master_metrics, indent=4)) - output = args.s if args.s else "summary_statistics.log" - with open(output, "w") as f: + with open(stats_summary_output, "w") as f: f.write(json.dumps(stats_summary, indent=4)) From 705feda77d15152639b0d3841bab98c6f15cc141 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Thu, 31 Oct 2024 17:59:38 -0600 Subject: [PATCH 13/22] pylint --- bfasst/flows/analyze_dataset.py | 1 - bfasst/utils/accumulate_metrics.py | 109 ++++++++++++++++++----------- bfasst/utils/process_graph.py | 11 ++- 3 files changed, 78 insertions(+), 43 deletions(-) diff --git a/bfasst/flows/analyze_dataset.py b/bfasst/flows/analyze_dataset.py index 399630c9..b92bc258 100644 --- a/bfasst/flows/analyze_dataset.py +++ b/bfasst/flows/analyze_dataset.py @@ -3,7 +3,6 @@ from pathlib import Path import pathlib from bfasst.flows.flow import FlowNoDesign -from bfasst.paths import FLOWS_PATH from bfasst.tools.dataset_metrics.accumulate_metrics import AccumulateMetrics from bfasst.tools.dataset_metrics.graph_metrics import GraphMetrics diff --git a/bfasst/utils/accumulate_metrics.py b/bfasst/utils/accumulate_metrics.py index 055fa13e..0d457652 100644 --- a/bfasst/utils/accumulate_metrics.py +++ b/bfasst/utils/accumulate_metrics.py @@ -12,16 +12,7 @@ def main(): """Load the graph, convert to adj_list, and compute metrics.""" # ArgParse - parser = argparse.ArgumentParser(description="Compute metrics on a graph.") - parser.add_argument( - "analysis_dir", help="The path to the folder containing all analysis files for all graphs." - ) - parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.") - parser.add_argument("-m", help="The name of the metrics file to create") - parser.add_argument( - "-s", help="The name of the stats (5-num summary, mean, stddev) file to create" - ) - args = parser.parse_args() + args = get_args() # Logging (for debug, don't use in parallel) logging.basicConfig( @@ -30,21 +21,55 @@ def main(): ) # Initialize the master dictionary - master_metrics = {} master_metrics_output = args.m if args.m else "master_metrics.log" stats_summary_output = args.s if args.s else "summary_statistics.log" # Iterate through the files in the analysis directory - for file in Path(args.analysis_dir).iterdir(): + master_metrics = compute_master_metrics( + args.analysis_dir, master_metrics_output, stats_summary_output + ) + + # sort the values for each metric after merging + master_metrics = sort_metrics(master_metrics) + + # Compute the stats for each metric + stats_summary = get_stats_summary(master_metrics) + + # write master_metrics to a file + with open(master_metrics_output, "w") as f: + f.write(json.dumps(master_metrics, indent=4)) + + with open(stats_summary_output, "w") as f: + f.write(json.dumps(stats_summary, indent=4)) + + +def get_args(): + parser = argparse.ArgumentParser(description="Compute metrics on a graph.") + parser.add_argument( + "analysis_dir", help="The path to the folder containing all analysis files for all graphs." + ) + parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.") + parser.add_argument("-m", help="The name of the metrics file to create") + parser.add_argument( + "-s", help="The name of the stats (5-num summary, mean, stddev) file to create" + ) + return parser.parse_args() + + +def compute_master_metrics(analysis_dir, master_metrics_output, stats_summary_output): + master_metrics = {} + for file in Path(analysis_dir).iterdir(): if file.is_dir(): continue - if ( - file.name == master_metrics_output - or file.name == stats_summary_output - # if these exist, don't read them even if master_metrics_output and stats_summary_output are different - or file.name == "master_metrics.log" - or file.name == "summary_statistics.log" + if file.name in ( + master_metrics_output, + stats_summary_output, + # Skip the master_metrics and stats_summary files + # Even if the user has specified different names + # for this run + "master_metrics.log", + "summary_statistics.log", ): continue @@ -64,51 +89,53 @@ def main(): # Concatenate the lists master_metrics[ip][metric].extend(values) - # sort the values for each metric after merging - for ip in master_metrics: - for metric in master_metrics[ip]: - master_metrics[ip][metric] = sorted(master_metrics[ip][metric]) + return master_metrics - # Compute the stats for each metric - stats_summary = {} - for ip, metrics in master_metrics.items(): + +def sort_metrics(metrics): + """Sort the values for each metric in the dictionary.""" + for ip, _ in metrics.items(): + for metric in metrics[ip]: + metrics[ip][metric] = sorted(metrics[ip][metric]) + return metrics + + +def get_stats_summary(metrics): + summary = {} + for ip, metrics in metrics.items(): for metric, values in metrics.items(): # Calculate statistics if values: # Check if the list is not empty - min_val, Q1, median, Q3, max_val = five_number_summary(values) + min_val, first_quartile, median, third_quartile, max_val = five_number_summary( + values + ) mean = sum(values) / len(values) stddev = statistics.stdev(values) if len(values) > 1 else 0.0 # Prepare the summary dictionary - if ip not in stats_summary: - stats_summary[ip] = {} + if ip not in summary: + summary[ip] = {} - stats_summary[ip][metric] = { + summary[ip][metric] = { "min": min_val, - "Q1": Q1, + "Q1": first_quartile, "median": median, - "Q3": Q3, + "Q3": third_quartile, "max": max_val, "mean": mean, "stddev": stddev, } - - # write master_metrics to a file - with open(master_metrics_output, "w") as f: - f.write(json.dumps(master_metrics, indent=4)) - - with open(stats_summary_output, "w") as f: - f.write(json.dumps(stats_summary, indent=4)) + return summary def five_number_summary(data): n = len(data) min_val = data[0] max_val = data[-1] - Q1 = data[n // 4] + first_quartile = data[n // 4] median = data[n // 2] - Q3 = data[(3 * n) // 4] - return min_val, Q1, median, Q3, max_val + third_quartile = data[(3 * n) // 4] + return min_val, first_quartile, median, third_quartile, max_val if __name__ == "__main__": diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py index 5afea97d..4b55df46 100644 --- a/bfasst/utils/process_graph.py +++ b/bfasst/utils/process_graph.py @@ -130,6 +130,7 @@ def convert_to_adj_list(component_nodes, component_edges): def compute_metrics_per_ip(adj_lists, args): + """Compute metrics for each IP in the graph.""" metrics_per_ip = {} for label, adj_list in adj_lists.items(): @@ -193,7 +194,7 @@ def compute_metrics_per_ip(adj_lists, args): def compute_size(adj_list): edge_count = 0 for node in adj_list: - for neighbor in adj_list[node]: + for _ in adj_list[node]: edge_count += 1 return edge_count // 2 @@ -231,6 +232,8 @@ def compute_average_degree(adj_list): class UnionFind: + """Union-find data structure.""" + def __init__(self): self.parent = {} self.rank = {} @@ -241,6 +244,7 @@ def add(self, u): self.rank[u] = 0 def find(self, u): + """Find the parent of a node.""" # Ensure u is in the union find self.add(u) @@ -250,6 +254,7 @@ def find(self, u): return self.parent[u] def union(self, u, v): + """Union two nodes.""" self.add(u) self.add(v) pu, pv = self.find(u), self.find(v) @@ -265,6 +270,7 @@ def union(self, u, v): def bfs_farthest(adj_list, start_node): + """Breadth-first search to find the farthest node from a starting node.""" queue = [(start_node, 0)] visited = {start_node} farthest_node = start_node @@ -285,6 +291,7 @@ def bfs_farthest(adj_list, start_node): def compute_k_core(adj_list): + """Compute the k-core of a graph.""" degree = {node: len(neighbors) for node, neighbors in adj_list.items()} max_k = 0 k_core_subgraph = {} @@ -320,6 +327,7 @@ def compute_k_core(adj_list): def compute_global_clustering(adj_list): + """Compute the global clustering coefficient of a graph.""" closed_triplets = 0 total_triplets = 0 visited_pairs = set() @@ -342,6 +350,7 @@ def compute_global_clustering(adj_list): def compute_local_clustering(adj_list): + """Compute the local clustering coefficient of a graph.""" local_clustering_coefficients = [] for node in adj_list: From 2f77374ca485581c490991b5ea22f14486ad8103 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Thu, 31 Oct 2024 18:06:29 -0600 Subject: [PATCH 14/22] pylint --- bfasst/utils/accumulate_metrics.py | 8 ++++++-- bfasst/utils/process_graph.py | 9 +++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/bfasst/utils/accumulate_metrics.py b/bfasst/utils/accumulate_metrics.py index 0d457652..3b163f00 100644 --- a/bfasst/utils/accumulate_metrics.py +++ b/bfasst/utils/accumulate_metrics.py @@ -44,6 +44,7 @@ def main(): def get_args(): + """Get the command line arguments.""" parser = argparse.ArgumentParser(description="Compute metrics on a graph.") parser.add_argument( "analysis_dir", help="The path to the folder containing all analysis files for all graphs." @@ -57,6 +58,7 @@ def get_args(): def compute_master_metrics(analysis_dir, master_metrics_output, stats_summary_output): + """Compute the master metrics from the analysis directory.""" master_metrics = {} for file in Path(analysis_dir).iterdir(): if file.is_dir(): @@ -100,9 +102,10 @@ def sort_metrics(metrics): return metrics -def get_stats_summary(metrics): +def get_stats_summary(master_metrics): + """Compute the 5-number summary, mean, and standard deviation for each metric.""" summary = {} - for ip, metrics in metrics.items(): + for ip, metrics in master_metrics.items(): for metric, values in metrics.items(): # Calculate statistics if values: # Check if the list is not empty @@ -129,6 +132,7 @@ def get_stats_summary(metrics): def five_number_summary(data): + """Compute the 5-number summary for the given data.""" n = len(data) min_val = data[0] max_val = data[-1] diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py index 4b55df46..1170788d 100644 --- a/bfasst/utils/process_graph.py +++ b/bfasst/utils/process_graph.py @@ -182,10 +182,10 @@ def compute_metrics_per_ip(adj_lists, args): metrics_per_ip[ip]["local_clustering_coeff"].append(local_clustering) # Debug (verbose flag only) - logger.debug(f"IP: {ip}") - logger.debug(f"Component: {label}") - logger.debug(f"Nodes: {len(adj_list)}") - logger.debug(f"Edges: {edge_count}") + logger.debug("IP: %s", ip) + logger.debug("Component: %s", label) + logger.debug("Nodes: %s", len(adj_list)) + logger.debug("Edges: %s", edge_count) logger.debug("") return metrics_per_ip @@ -200,6 +200,7 @@ def compute_size(adj_list): def compute_average_diameter(adj_list): + """Compute the average diameter of a graph.""" uf = UnionFind() for u in adj_list: From 76dbad651dfb3e8e4291aae58893956145646807 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Mon, 4 Nov 2024 14:35:53 -0700 Subject: [PATCH 15/22] removed kcore and local clustering --- bfasst/utils/process_graph.py | 83 ----------------------------------- 1 file changed, 83 deletions(-) diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py index 1170788d..ee1853ee 100644 --- a/bfasst/utils/process_graph.py +++ b/bfasst/utils/process_graph.py @@ -25,17 +25,11 @@ def main(): parser.add_argument( "--diameter", action="store_true", help="Compute the average diameter of the graph." ) - parser.add_argument("--kcore", action="store_true", help="Compute the k-core of the graph.") parser.add_argument( "--global_clustering_coeff", action="store_true", help="Compute the global clustering coefficient of the graph.", ) - parser.add_argument( - "--local_clustering_coeff", - action="store_true", - help="Compute the local clustering coefficient of the graph.", - ) parser.add_argument("--all", action="store_true", help="Compute all metrics.", default=True) args = parser.parse_args() @@ -142,9 +136,7 @@ def compute_metrics_per_ip(adj_lists, args): "size": [], "degree": [], "diameter": [], - "kcore": [], "global_clustering_coeff": [], - "local_clustering_coeff": [], } # Order @@ -166,21 +158,11 @@ def compute_metrics_per_ip(adj_lists, args): avg_diameter = compute_average_diameter(adj_list) metrics_per_ip[ip]["diameter"].append(avg_diameter) - # K-core - if args.all or args.kcore: - max_k, _ = compute_k_core(adj_list) - metrics_per_ip[ip]["kcore"].append(max_k) - # Global Clustering Coefficient if args.all or args.global_clustering_coeff: global_clustering = compute_global_clustering(adj_list) metrics_per_ip[ip]["global_clustering_coeff"].append(global_clustering) - # Local Clustering Coefficient - if args.all or args.local_clustering_coeff: - local_clustering = compute_local_clustering(adj_list) - metrics_per_ip[ip]["local_clustering_coeff"].append(local_clustering) - # Debug (verbose flag only) logger.debug("IP: %s", ip) logger.debug("Component: %s", label) @@ -291,42 +273,6 @@ def bfs_farthest(adj_list, start_node): return farthest_node, max_distance -def compute_k_core(adj_list): - """Compute the k-core of a graph.""" - degree = {node: len(neighbors) for node, neighbors in adj_list.items()} - max_k = 0 - k_core_subgraph = {} - - k = 1 - while True: - queue = deque(node for node, d in degree.items() if d <= k) - - while queue: - node = queue.popleft() - for neighbor in adj_list[node]: - if degree[neighbor] >= k: - degree[neighbor] -= 1 - if degree[neighbor] < k: - queue.append(neighbor) - degree[node] = 0 - - k_core = { - node: {neighbor for neighbor in neighbors if degree[neighbor] >= k} - for node, neighbors in adj_list.items() - if degree[node] >= k - } - - if k_core: - k_core_subgraph = k_core - max_k = k - else: - break - - k += 1 - - return max_k, k_core_subgraph - - def compute_global_clustering(adj_list): """Compute the global clustering coefficient of a graph.""" closed_triplets = 0 @@ -350,35 +296,6 @@ def compute_global_clustering(adj_list): return (3 * closed_triplets) / total_triplets if total_triplets else 0 -def compute_local_clustering(adj_list): - """Compute the local clustering coefficient of a graph.""" - local_clustering_coefficients = [] - - for node in adj_list: - neighbors = set(adj_list[node]) - degree = len(neighbors) - - if degree < 2: - local_clustering_coefficients.append(0) - continue - - closed_triplets = 0 - - for neighbor in neighbors: - common_neighbors = neighbors.intersection(set(adj_list[neighbor])) - closed_triplets += len(common_neighbors) - - local_clustering_coefficients.append( - (closed_triplets) / (degree * (degree - 1)) if degree > 1 else 0 - ) - - return ( - sum(local_clustering_coefficients) / len(local_clustering_coefficients) - if local_clustering_coefficients - else 0 - ) - - def get_ip_name_from_label(label): ip_name = ("_").join(label.split("_")[2:]) return ip_name if ip_name else label From 5961870d4bd91b3496841ccb56d69170b21c5410 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Mon, 18 Nov 2024 12:31:30 -0700 Subject: [PATCH 16/22] added utility scripts as deps to dataset_metrics tools --- bfasst/tools/dataset_metrics/accumulate_metrics.py | 1 + .../dataset_metrics/accumulate_metrics_build.ninja.mustache | 2 +- bfasst/tools/dataset_metrics/graph_metrics.py | 6 +++++- .../dataset_metrics/process_graph_build.ninja.mustache | 2 +- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/bfasst/tools/dataset_metrics/accumulate_metrics.py b/bfasst/tools/dataset_metrics/accumulate_metrics.py index 74473b62..7869749c 100644 --- a/bfasst/tools/dataset_metrics/accumulate_metrics.py +++ b/bfasst/tools/dataset_metrics/accumulate_metrics.py @@ -30,6 +30,7 @@ def create_build_snippets(self): "summary_stats": self.summary_stats, "aggregation_dir": self.build_path, "pieces": self.pieces, + "accumulate_metrics_util": BFASST_UTILS_PATH / "accumulate_metrics.py", }, ) diff --git a/bfasst/tools/dataset_metrics/accumulate_metrics_build.ninja.mustache b/bfasst/tools/dataset_metrics/accumulate_metrics_build.ninja.mustache index 2299f729..2b0b775c 100644 --- a/bfasst/tools/dataset_metrics/accumulate_metrics_build.ninja.mustache +++ b/bfasst/tools/dataset_metrics/accumulate_metrics_build.ninja.mustache @@ -1,4 +1,4 @@ -build {{ metrics_file }} {{ summary_stats }}: accumulate_metrics {{ aggregation_dir }} | {{#pieces}}{{.}} {{/pieces}} +build {{ metrics_file }} {{ summary_stats }}: accumulate_metrics {{ aggregation_dir }} | {{#pieces}}{{.}} {{/pieces}} {{ accumulate_metrics_util }} metrics_file = {{ metrics_file }} summary_stats = {{ summary_stats }} diff --git a/bfasst/tools/dataset_metrics/graph_metrics.py b/bfasst/tools/dataset_metrics/graph_metrics.py index 8e4917c4..f00ebf9c 100644 --- a/bfasst/tools/dataset_metrics/graph_metrics.py +++ b/bfasst/tools/dataset_metrics/graph_metrics.py @@ -28,7 +28,11 @@ def create_build_snippets(self): with open(DATASET_METRICS_TOOLS_PATH / "process_graph_build.ninja.mustache", "r") as f: build = chevron.render( f, - {"output": self.metrics_path, "graph": self.graph}, + { + "output": self.metrics_path, + "graph": self.graph, + "process_graph_util": BFASST_UTILS_PATH / "process_graph.py", + }, ) with open(NINJA_BUILD_PATH, "a") as f: diff --git a/bfasst/tools/dataset_metrics/process_graph_build.ninja.mustache b/bfasst/tools/dataset_metrics/process_graph_build.ninja.mustache index 92cf4887..954d06ab 100644 --- a/bfasst/tools/dataset_metrics/process_graph_build.ninja.mustache +++ b/bfasst/tools/dataset_metrics/process_graph_build.ninja.mustache @@ -1,2 +1,2 @@ -build {{ output }}: process_graph {{ graph }} +build {{ output }}: process_graph {{ graph }} | {{ process_graph_util }} From e9e09c65c5037a97796ecd754965e4a4048a7057 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Mon, 18 Nov 2024 12:46:48 -0700 Subject: [PATCH 17/22] updated scripts to work per-component and per-instance, updated summary stats file name --- bfasst/utils/accumulate_metrics.py | 4 +- bfasst/utils/process_graph.py | 118 ++++++++++++++++++++++++++--- 2 files changed, 109 insertions(+), 13 deletions(-) diff --git a/bfasst/utils/accumulate_metrics.py b/bfasst/utils/accumulate_metrics.py index 3b163f00..e2d8f45d 100644 --- a/bfasst/utils/accumulate_metrics.py +++ b/bfasst/utils/accumulate_metrics.py @@ -71,10 +71,12 @@ def compute_master_metrics(analysis_dir, master_metrics_output, stats_summary_ou # Even if the user has specified different names # for this run "master_metrics.log", - "summary_statistics.log", + "summary_stats.log", ): continue + logger.debug(f"Processing {file}") + with open(file, "r") as f: graph_metrics = json.loads(f.readline()) diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py index ee1853ee..f916acf6 100644 --- a/bfasst/utils/process_graph.py +++ b/bfasst/utils/process_graph.py @@ -25,6 +25,9 @@ def main(): parser.add_argument( "--diameter", action="store_true", help="Compute the average diameter of the graph." ) + parser.add_argument( + "--component_count", action="store_true", help="Compute the number of components." + ) parser.add_argument( "--global_clustering_coeff", action="store_true", @@ -128,35 +131,61 @@ def compute_metrics_per_ip(adj_lists, args): metrics_per_ip = {} for label, adj_list in adj_lists.items(): + # Compute components + components = compute_components(adj_list) + # set up default entries ip = get_ip_name_from_label(label) if ip not in metrics_per_ip: metrics_per_ip[ip] = { - "order": [], - "size": [], - "degree": [], - "diameter": [], + "instance_order": [], + "component_orders": [], + "instance_size": [], + "component_sizes": [], + "avg_degree": [], + "avg_diameter": [], + "component_diameters": [], + "component_count": [], "global_clustering_coeff": [], } # Order if args.all or args.order: - metrics_per_ip[ip]["order"].append(len(adj_list)) + metrics_per_ip[ip]["instance_order"].append(len(adj_list)) + + # Component-wise order + if args.all or args.order: + component_orders = compute_component_orders(components) + metrics_per_ip[ip]["component_orders"].extend(component_orders) # Size if args.all or args.size: edge_count = compute_size(adj_list) - metrics_per_ip[ip]["size"].append(edge_count) + metrics_per_ip[ip]["instance_size"].append(edge_count) - # Degree + # Component-wise size + if args.all or args.size: + component_sizes = compute_component_sizes(components, adj_list) + metrics_per_ip[ip]["component_sizes"].extend(component_sizes) + + # Avg Degree if args.all or args.degree: avg_desgree = compute_average_degree(adj_list) - metrics_per_ip[ip]["degree"].append(avg_desgree) + metrics_per_ip[ip]["avg_degree"].append(avg_desgree) + + # Avg Diameter + if args.all or args.diameter: + avg_diameter = compute_average_diameter(components, adj_list) + metrics_per_ip[ip]["avg_diameter"].append(avg_diameter) - # Diameter + # Component Diameters if args.all or args.diameter: - avg_diameter = compute_average_diameter(adj_list) - metrics_per_ip[ip]["diameter"].append(avg_diameter) + component_diameters = compute_component_diameters(components, adj_list) + metrics_per_ip[ip]["component_diameters"].extend(component_diameters) + + # Component Count + if args.all or args.component_count: + metrics_per_ip[ip]["component_count"].append(len(components)) # Global Clustering Coefficient if args.all or args.global_clustering_coeff: @@ -173,6 +202,32 @@ def compute_metrics_per_ip(adj_lists, args): return metrics_per_ip +def compute_components(adj_list): + """Compute the components of a graph.""" + uf = UnionFind() + + for u in adj_list: + for v in adj_list[u]: + uf.union(u, v) + + components = {} + for node in adj_list: + root = uf.find(node) + if root not in components: + components[root] = set() + components[root].add(node) + + return components + + +def compute_component_orders(components): + """Compute the order of each component in a graph.""" + orders = [] + for component in components.values(): + orders.append(len(component)) + return orders + + def compute_size(adj_list): edge_count = 0 for node in adj_list: @@ -181,7 +236,20 @@ def compute_size(adj_list): return edge_count // 2 -def compute_average_diameter(adj_list): +def compute_component_sizes(components, adj_list): + """Compute the size of each component in a graph.""" + sizes = [] + for component in components.values(): + edge_count = 0 + for node in component: + for neighbor in adj_list[node]: + if neighbor in component: + edge_count += 1 + sizes.append(edge_count // 2) + return sizes + + +def compute_average_diameter(components, adj_list): """Compute the average diameter of a graph.""" uf = UnionFind() @@ -207,6 +275,17 @@ def compute_average_diameter(adj_list): return sum(diameters) / len(diameters) if diameters else 0 +def compute_component_diameters(components, adj_list): + """Compute the diameter of each component in a graph.""" + diameters = [] + for component in components.values(): + node = next(iter(component)) + u, _ = bfs_farthest(adj_list, node) + _, diameter = bfs_farthest(adj_list, u) + diameters.append(diameter) + return diameters + + def compute_average_degree(adj_list): degrees = [] for node in adj_list: @@ -301,5 +380,20 @@ def get_ip_name_from_label(label): return ip_name if ip_name else label +def run_test(): + adj_list = { + "A": ["B", "C"], + "B": ["A", "C"], + "C": ["A", "B"], + "D": ["E"], + "E": ["D"], + } + + components = compute_components(adj_list) + assert len(components) == 2 + logger.debug(components) + + if __name__ == "__main__": main() + run_test() From a55111d887bd436eb298c92c34d2eacc09d4c5f3 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Mon, 18 Nov 2024 12:50:54 -0700 Subject: [PATCH 18/22] pylint --- bfasst/utils/accumulate_metrics.py | 2 +- bfasst/utils/process_graph.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bfasst/utils/accumulate_metrics.py b/bfasst/utils/accumulate_metrics.py index e2d8f45d..8461ffa7 100644 --- a/bfasst/utils/accumulate_metrics.py +++ b/bfasst/utils/accumulate_metrics.py @@ -75,7 +75,7 @@ def compute_master_metrics(analysis_dir, master_metrics_output, stats_summary_ou ): continue - logger.debug(f"Processing {file}") + logger.debug("Processing %s", file) with open(file, "r") as f: graph_metrics = json.loads(f.readline()) diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py index f916acf6..1332b3be 100644 --- a/bfasst/utils/process_graph.py +++ b/bfasst/utils/process_graph.py @@ -1,7 +1,7 @@ """Compute metrics on a single graph in a dataset.""" import argparse -from collections import defaultdict, deque +from collections import defaultdict import logging import os import json @@ -381,6 +381,7 @@ def get_ip_name_from_label(label): def run_test(): + """Ensure union find works.""" adj_list = { "A": ["B", "C"], "B": ["A", "C"], From a1c3dd0ee7bf9aefe0b0b7986b66f2d1e259401a Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Mon, 18 Nov 2024 14:53:49 -0700 Subject: [PATCH 19/22] added k core --- bfasst/utils/process_graph.py | 76 +++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 3 deletions(-) diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py index 1332b3be..d53898af 100644 --- a/bfasst/utils/process_graph.py +++ b/bfasst/utils/process_graph.py @@ -1,7 +1,7 @@ """Compute metrics on a single graph in a dataset.""" import argparse -from collections import defaultdict +from collections import defaultdict, deque import logging import os import json @@ -33,6 +33,9 @@ def main(): action="store_true", help="Compute the global clustering coefficient of the graph.", ) + parser.add_argument( + "--k_core", action="store_true", help="Compute the maximal k-core of the graph." + ) parser.add_argument("--all", action="store_true", help="Compute all metrics.", default=True) args = parser.parse_args() @@ -147,6 +150,7 @@ def compute_metrics_per_ip(adj_lists, args): "component_diameters": [], "component_count": [], "global_clustering_coeff": [], + "max_k_core": [], } # Order @@ -192,6 +196,11 @@ def compute_metrics_per_ip(adj_lists, args): global_clustering = compute_global_clustering(adj_list) metrics_per_ip[ip]["global_clustering_coeff"].append(global_clustering) + # K-Core + if args.all or args.k_core: + max_k, _ = compute_k_core(adj_list) + metrics_per_ip[ip]["max_k_core"].append(max_k) + # Debug (verbose flag only) logger.debug("IP: %s", ip) logger.debug("Component: %s", label) @@ -375,12 +384,50 @@ def compute_global_clustering(adj_list): return (3 * closed_triplets) / total_triplets if total_triplets else 0 +def compute_k_core(adj_list): + """Compute the k-core of a graph.""" + degree = {node: len(neighbors) for node, neighbors in adj_list.items()} + max_k = 0 + k_core_subgraph = {} + + k = 1 + while True: + queue = deque(node for node, d in degree.items() if d <= k) + + while queue: + node = queue.popleft() + for neighbor in adj_list[node]: + if degree[neighbor] >= k: + degree[neighbor] -= 1 + if degree[neighbor] < k: + queue.append(neighbor) + degree[node] = 0 + + k_core = { + node: {neighbor for neighbor in neighbors if degree[neighbor] >= k} + for node, neighbors in adj_list.items() + if degree[node] >= k + } + + if k_core: + k_core_subgraph = k_core + max_k = k + else: + if max_k != 0: + max_k += 1 + break + + k += 1 + + return max_k, k_core_subgraph + + def get_ip_name_from_label(label): ip_name = ("_").join(label.split("_")[2:]) return ip_name if ip_name else label -def run_test(): +def test_uf_components(): """Ensure union find works.""" adj_list = { "A": ["B", "C"], @@ -395,6 +442,29 @@ def run_test(): logger.debug(components) +def test_k_core(): + """Ensure k-core works.""" + adj_list = { + "A": ["B", "C", "D", "E"], + "B": ["A", "C", "D", "E"], + "C": ["A", "B", "D", "F"], + "D": ["A", "B", "C", "J"], + "E": ["A", "B", "F", "I"], + "F": ["C", "E", "G", "H"], + "G": ["F"], + "H": ["F"], + "I": ["E"], + "J": ["D", "K", "L"], + "K": ["J"], + "L": ["J"], + } + + max_k, k_core = compute_k_core(adj_list) + assert max_k == 3 # A, B, C, D is a 3-core + logger.debug(k_core) + + if __name__ == "__main__": main() - run_test() + test_uf_components() + test_k_core() From 36ebf50979789f22d62d99de6a01fa241ab0448f Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Mon, 18 Nov 2024 15:05:41 -0700 Subject: [PATCH 20/22] make sure k core increments correctly --- bfasst/utils/process_graph.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py index d53898af..9ee12b9d 100644 --- a/bfasst/utils/process_graph.py +++ b/bfasst/utils/process_graph.py @@ -409,15 +409,13 @@ def compute_k_core(adj_list): if degree[node] >= k } + k += 1 if k_core: k_core_subgraph = k_core max_k = k else: - if max_k != 0: - max_k += 1 break - k += 1 return max_k, k_core_subgraph From 1bccb274b643c1b7b7d3768cc3e42ab5dc96ad1a Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Mon, 9 Dec 2024 10:11:40 -0700 Subject: [PATCH 21/22] format --- bfasst/utils/process_graph.py | 1 - bfasst/utils/structural.py | 12 ++++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py index 9ee12b9d..b46375e2 100644 --- a/bfasst/utils/process_graph.py +++ b/bfasst/utils/process_graph.py @@ -416,7 +416,6 @@ def compute_k_core(adj_list): else: break - return max_k, k_core_subgraph diff --git a/bfasst/utils/structural.py b/bfasst/utils/structural.py index 65c1364f..6e6933bb 100644 --- a/bfasst/utils/structural.py +++ b/bfasst/utils/structural.py @@ -727,10 +727,14 @@ def check_for_potential_bram_mapping(self, instance_name: str) -> set[str]: if bram_do: assert named_instance.properties["DOB_REG"] == "0" - bram_a_only = named_instance.properties["RAM_MODE"] == '"TDP"' and { - None, - SdnInstanceWrapper.GND_PIN.net, - } >= {named_instance.get_pin("DOBDO", i).net for i in range(32)} + bram_a_only = ( + named_instance.properties["RAM_MODE"] == '"TDP"' + and { + None, + SdnInstanceWrapper.GND_PIN.net, + } + >= {named_instance.get_pin("DOBDO", i).net for i in range(32)} + ) if named_instance.cell_type.startswith("RAMB36E1"): # A15 is only connected to a non-const net when cascade is enabled From 726b49859e24b5ce4c7c5cc009a4f193ff8cf712 Mon Sep 17 00:00:00 2001 From: KeenanRileyFaulkner Date: Mon, 9 Dec 2024 10:14:43 -0700 Subject: [PATCH 22/22] revert changes --- bfasst/utils/structural.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/bfasst/utils/structural.py b/bfasst/utils/structural.py index 6e6933bb..65c1364f 100644 --- a/bfasst/utils/structural.py +++ b/bfasst/utils/structural.py @@ -727,14 +727,10 @@ def check_for_potential_bram_mapping(self, instance_name: str) -> set[str]: if bram_do: assert named_instance.properties["DOB_REG"] == "0" - bram_a_only = ( - named_instance.properties["RAM_MODE"] == '"TDP"' - and { - None, - SdnInstanceWrapper.GND_PIN.net, - } - >= {named_instance.get_pin("DOBDO", i).net for i in range(32)} - ) + bram_a_only = named_instance.properties["RAM_MODE"] == '"TDP"' and { + None, + SdnInstanceWrapper.GND_PIN.net, + } >= {named_instance.get_pin("DOBDO", i).net for i in range(32)} if named_instance.cell_type.startswith("RAMB36E1"): # A15 is only connected to a non-const net when cascade is enabled