Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataset metrics #514

Merged
merged 22 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
745b7cc
added dataset processing on per-graph basis to bfasst
KeenanRileyFaulkner Oct 30, 2024
c7876bb
minor format fix
KeenanRileyFaulkner Oct 30, 2024
fc704b1
Added basics for accumulation of graph metrics
KeenanRileyFaulkner Oct 30, 2024
75883bd
updated accumulation script to write to file
KeenanRileyFaulkner Oct 30, 2024
aff1897
Added accumulation of metrics
KeenanRileyFaulkner Oct 31, 2024
8155cfc
refactored to use FlowNoDesign
KeenanRileyFaulkner Oct 31, 2024
604ac18
added diameter
KeenanRileyFaulkner Oct 31, 2024
1657e9c
added degree
KeenanRileyFaulkner Oct 31, 2024
f28c449
added kcore and global/local clustering coefficients
KeenanRileyFaulkner Oct 31, 2024
d01031f
updated names for clustering coefficients
KeenanRileyFaulkner Oct 31, 2024
0c4a665
added options on each metric so they can be turned off/on
KeenanRileyFaulkner Oct 31, 2024
80e3eda
do not iterate over the summary or master metrics logs
KeenanRileyFaulkner Oct 31, 2024
705feda
pylint
KeenanRileyFaulkner Oct 31, 2024
2f77374
pylint
KeenanRileyFaulkner Nov 1, 2024
76dbad6
removed kcore and local clustering
KeenanRileyFaulkner Nov 4, 2024
5961870
added utility scripts as deps to dataset_metrics tools
KeenanRileyFaulkner Nov 18, 2024
e9e09c6
updated scripts to work per-component and per-instance, updated summa…
KeenanRileyFaulkner Nov 18, 2024
a55111d
pylint
KeenanRileyFaulkner Nov 18, 2024
a1c3dd0
added k core
KeenanRileyFaulkner Nov 18, 2024
36ebf50
make sure k core increments correctly
KeenanRileyFaulkner Nov 18, 2024
1bccb27
format
KeenanRileyFaulkner Dec 9, 2024
726b498
revert changes
KeenanRileyFaulkner Dec 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions bfasst/flows/analyze_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Analyze dataset metrics."""

from pathlib import Path
import pathlib
from bfasst.flows.flow import FlowNoDesign
from bfasst.tools.dataset_metrics.accumulate_metrics import AccumulateMetrics
from bfasst.tools.dataset_metrics.graph_metrics import GraphMetrics


class AnalyzeDataset(FlowNoDesign):
"""Analyze dataset metrics."""

def __init__(self, dataset):
# pylint: disable=duplicate-code
super().__init__()
self.dataset = Path(dataset)

# only used for configuring ninja rule snippets
self.graph_metrics_default_tool = GraphMetrics(self, None, None)
self.accumulate_metrics_tool = AccumulateMetrics(self, None)
# pylint: enable=duplicate-code

def create_build_snippets(self):
# get the size of the dataset
directories = [x for x in self.dataset.iterdir() if x.is_dir()]
iterations = len(directories)
pieces = []

for i in range(1, iterations + 1):
num = int(directories[i - 1].name.split("_")[-1])
graph_metrics_tool = GraphMetrics(
self, directories[i - 1] / f"{directories[i - 1].name}.dump", num
)
pieces.append(graph_metrics_tool.metrics_path)
graph_metrics_tool.create_build_snippets()

AccumulateMetrics(self, pieces).create_build_snippets()

@classmethod
def flow_build_dir_name(cls) -> str:
"""Get the name of the build directory for this flow"""
return "dataset_metrics"

def add_ninja_deps(self, deps):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is required if it's just calling super. The behavior will be the same if you remove it.

super().add_ninja_deps(deps)

def get_top_level_flow_path(self):
return pathlib.Path(__file__).resolve()
8 changes: 7 additions & 1 deletion bfasst/flows/flow_descriptions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -156,4 +156,10 @@ flows:
class: OpenTitan
external_tools:
- vivado
- opentitan
- opentitan

- name: AnalyzeDataset
description: Compute Metrics on an FPGA Circuit dataset for GNNs.
module: analyze_dataset
class: AnalyzeDataset

2 changes: 2 additions & 0 deletions bfasst/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

COMMON_TOOLS_PATH = TOOLS_PATH / "common"

DATASET_METRICS_TOOLS_PATH = TOOLS_PATH / "dataset_metrics"

REV_BIT_TOOLS_PATH = TOOLS_PATH / "rev_bit"
NINJA_TRANSFORM_TOOLS_PATH = TOOLS_PATH / "transform"

Expand Down
46 changes: 46 additions & 0 deletions bfasst/tools/dataset_metrics/accumulate_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Accumulate metrics from the graph_metrics tool."""

import chevron

from bfasst.tools.tool import ToolBase
from bfasst.paths import BUILD_PATH, NINJA_BUILD_PATH, DATASET_METRICS_TOOLS_PATH, BFASST_UTILS_PATH


class AccumulateMetrics(ToolBase):
"""Accumulate metrics from the graph_metrics tool."""

def __init__(self, flow, pieces):
super().__init__(flow)
self.pieces = pieces
self.build_path = BUILD_PATH / "dataset_metrics"
self.metrics_path = self.build_path / "master_metrics.log"
self.summary_stats = self.build_path / "summary_stats.log"

self._init_outputs()
self.rule_snippet_path = (
DATASET_METRICS_TOOLS_PATH / "accumulate_metrics_rules.ninja.mustache"
)

def create_build_snippets(self):
with open(DATASET_METRICS_TOOLS_PATH / "accumulate_metrics_build.ninja.mustache", "r") as f:
build = chevron.render(
f,
{
"metrics_file": self.metrics_path,
"summary_stats": self.summary_stats,
"aggregation_dir": self.build_path,
"pieces": self.pieces,
"accumulate_metrics_util": BFASST_UTILS_PATH / "accumulate_metrics.py",
},
)

with open(NINJA_BUILD_PATH, "a") as f:
f.write(build)

def _init_outputs(self):
self.outputs["metrics_path"] = self.metrics_path
self.outputs["summary_stats"] = self.summary_stats

def add_ninja_deps(self, deps):
self._add_ninja_deps_default(deps, __file__)
deps.append(BFASST_UTILS_PATH / "accumulate_metrics.py")
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
build {{ metrics_file }} {{ summary_stats }}: accumulate_metrics {{ aggregation_dir }} | {{#pieces}}{{.}} {{/pieces}} {{ accumulate_metrics_util }}
metrics_file = {{ metrics_file }}
summary_stats = {{ summary_stats }}

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
rule accumulate_metrics
command = python {{ bfasst_path }}/bfasst/utils/accumulate_metrics.py $in -m $metrics_file -s $summary_stats
description = accumulate metrics from $in to produce master_metrics and summary_stats files

46 changes: 46 additions & 0 deletions bfasst/tools/dataset_metrics/graph_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Create the rule and build snippets for computing gnn dataset metrics."""

import chevron

from bfasst.tools.tool import ToolBase
from bfasst.paths import BUILD_PATH, NINJA_BUILD_PATH, DATASET_METRICS_TOOLS_PATH, BFASST_UTILS_PATH


class GraphMetrics(ToolBase):
"""Create the rule and build snippets for computing gnn dataset metrics."""

def __init__(
self,
flow,
graph,
num,
):
super().__init__(flow)
self.graph = graph
self.num = num
self.build_path = BUILD_PATH / "dataset_metrics"
self.metrics_path = self.build_path / f"metrics_{num}.log"

self._init_outputs()
self.rule_snippet_path = DATASET_METRICS_TOOLS_PATH / "process_graph_rules.ninja.mustache"

def create_build_snippets(self):
with open(DATASET_METRICS_TOOLS_PATH / "process_graph_build.ninja.mustache", "r") as f:
build = chevron.render(
f,
{
"output": self.metrics_path,
"graph": self.graph,
"process_graph_util": BFASST_UTILS_PATH / "process_graph.py",
},
)

with open(NINJA_BUILD_PATH, "a") as f:
f.write(build)

def _init_outputs(self):
self.outputs["metrics_path"] = self.metrics_path

def add_ninja_deps(self, deps):
self._add_ninja_deps_default(deps, __file__)
deps.append(BFASST_UTILS_PATH / "process_graph.py")
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
build {{ output }}: process_graph {{ graph }} | {{ process_graph_util }}

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
rule process_graph
command = python {{ bfasst_path }}/bfasst/utils/process_graph.py $in -o $out
description = compute metrics on $in and save them to $out

148 changes: 148 additions & 0 deletions bfasst/utils/accumulate_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""Accumulate metrics from graphs in a dataset after computing them for all graphs"""

import argparse
import logging
import json
from pathlib import Path
import statistics

logger = logging.getLogger(__name__)


def main():
"""Load the graph, convert to adj_list, and compute metrics."""
# ArgParse
args = get_args()

# Logging (for debug, don't use in parallel)
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)

# Initialize the master dictionary
master_metrics_output = args.m if args.m else "master_metrics.log"
stats_summary_output = args.s if args.s else "summary_statistics.log"

# Iterate through the files in the analysis directory
master_metrics = compute_master_metrics(
args.analysis_dir, master_metrics_output, stats_summary_output
)

# sort the values for each metric after merging
master_metrics = sort_metrics(master_metrics)

# Compute the stats for each metric
stats_summary = get_stats_summary(master_metrics)

# write master_metrics to a file
with open(master_metrics_output, "w") as f:
f.write(json.dumps(master_metrics, indent=4))

with open(stats_summary_output, "w") as f:
f.write(json.dumps(stats_summary, indent=4))


def get_args():
"""Get the command line arguments."""
parser = argparse.ArgumentParser(description="Compute metrics on a graph.")
parser.add_argument(
"analysis_dir", help="The path to the folder containing all analysis files for all graphs."
)
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.")
parser.add_argument("-m", help="The name of the metrics file to create")
parser.add_argument(
"-s", help="The name of the stats (5-num summary, mean, stddev) file to create"
)
return parser.parse_args()


def compute_master_metrics(analysis_dir, master_metrics_output, stats_summary_output):
"""Compute the master metrics from the analysis directory."""
master_metrics = {}
for file in Path(analysis_dir).iterdir():
if file.is_dir():
continue

if file.name in (
master_metrics_output,
stats_summary_output,
# Skip the master_metrics and stats_summary files
# Even if the user has specified different names
# for this run
"master_metrics.log",
"summary_stats.log",
):
continue

logger.debug("Processing %s", file)

with open(file, "r") as f:
graph_metrics = json.loads(f.readline())

for ip, metrics in graph_metrics.items():
# Initialize the IP entry in the master dictionary if it doesn't exist
if ip not in master_metrics:
master_metrics[ip] = {}

for metric, values in metrics.items():
# Initialize the metric entry if it doesn't exist
if metric not in master_metrics[ip]:
master_metrics[ip][metric] = []

# Concatenate the lists
master_metrics[ip][metric].extend(values)

return master_metrics


def sort_metrics(metrics):
"""Sort the values for each metric in the dictionary."""
for ip, _ in metrics.items():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want to loop through keys, just use:

for ip in metrics:

for metric in metrics[ip]:
metrics[ip][metric] = sorted(metrics[ip][metric])
return metrics


def get_stats_summary(master_metrics):
"""Compute the 5-number summary, mean, and standard deviation for each metric."""
summary = {}
for ip, metrics in master_metrics.items():
for metric, values in metrics.items():
# Calculate statistics
if values: # Check if the list is not empty
min_val, first_quartile, median, third_quartile, max_val = five_number_summary(
values
)
mean = sum(values) / len(values)
stddev = statistics.stdev(values) if len(values) > 1 else 0.0

# Prepare the summary dictionary
if ip not in summary:
summary[ip] = {}

summary[ip][metric] = {
"min": min_val,
"Q1": first_quartile,
"median": median,
"Q3": third_quartile,
"max": max_val,
"mean": mean,
"stddev": stddev,
}
return summary


def five_number_summary(data):
"""Compute the 5-number summary for the given data."""
n = len(data)
min_val = data[0]
max_val = data[-1]
first_quartile = data[n // 4]
median = data[n // 2]
third_quartile = data[(3 * n) // 4]
return min_val, first_quartile, median, third_quartile, max_val


if __name__ == "__main__":
main()
Loading
Loading