Skip to content

Commit

Permalink
Merge pull request #514 from byuccl/dataset_metrics
Browse files Browse the repository at this point in the history
Dataset metrics
  • Loading branch information
jgoeders authored Dec 9, 2024
2 parents 2919d1b + 726b498 commit 1461dd1
Show file tree
Hide file tree
Showing 11 changed files with 778 additions and 1 deletion.
48 changes: 48 additions & 0 deletions bfasst/flows/analyze_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Analyze dataset metrics."""

from pathlib import Path
import pathlib
from bfasst.flows.flow import FlowNoDesign
from bfasst.tools.dataset_metrics.accumulate_metrics import AccumulateMetrics
from bfasst.tools.dataset_metrics.graph_metrics import GraphMetrics


class AnalyzeDataset(FlowNoDesign):
"""Analyze dataset metrics."""

def __init__(self, dataset):
# pylint: disable=duplicate-code
super().__init__()
self.dataset = Path(dataset)

# only used for configuring ninja rule snippets
self.graph_metrics_default_tool = GraphMetrics(self, None, None)
self.accumulate_metrics_tool = AccumulateMetrics(self, None)
# pylint: enable=duplicate-code

def create_build_snippets(self):
# get the size of the dataset
directories = [x for x in self.dataset.iterdir() if x.is_dir()]
iterations = len(directories)
pieces = []

for i in range(1, iterations + 1):
num = int(directories[i - 1].name.split("_")[-1])
graph_metrics_tool = GraphMetrics(
self, directories[i - 1] / f"{directories[i - 1].name}.dump", num
)
pieces.append(graph_metrics_tool.metrics_path)
graph_metrics_tool.create_build_snippets()

AccumulateMetrics(self, pieces).create_build_snippets()

@classmethod
def flow_build_dir_name(cls) -> str:
"""Get the name of the build directory for this flow"""
return "dataset_metrics"

def add_ninja_deps(self, deps):
super().add_ninja_deps(deps)

def get_top_level_flow_path(self):
return pathlib.Path(__file__).resolve()
8 changes: 7 additions & 1 deletion bfasst/flows/flow_descriptions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -156,4 +156,10 @@ flows:
class: OpenTitan
external_tools:
- vivado
- opentitan
- opentitan

- name: AnalyzeDataset
description: Compute Metrics on an FPGA Circuit dataset for GNNs.
module: analyze_dataset
class: AnalyzeDataset

2 changes: 2 additions & 0 deletions bfasst/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

COMMON_TOOLS_PATH = TOOLS_PATH / "common"

DATASET_METRICS_TOOLS_PATH = TOOLS_PATH / "dataset_metrics"

REV_BIT_TOOLS_PATH = TOOLS_PATH / "rev_bit"
NINJA_TRANSFORM_TOOLS_PATH = TOOLS_PATH / "transform"

Expand Down
46 changes: 46 additions & 0 deletions bfasst/tools/dataset_metrics/accumulate_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Accumulate metrics from the graph_metrics tool."""

import chevron

from bfasst.tools.tool import ToolBase
from bfasst.paths import BUILD_PATH, NINJA_BUILD_PATH, DATASET_METRICS_TOOLS_PATH, BFASST_UTILS_PATH


class AccumulateMetrics(ToolBase):
"""Accumulate metrics from the graph_metrics tool."""

def __init__(self, flow, pieces):
super().__init__(flow)
self.pieces = pieces
self.build_path = BUILD_PATH / "dataset_metrics"
self.metrics_path = self.build_path / "master_metrics.log"
self.summary_stats = self.build_path / "summary_stats.log"

self._init_outputs()
self.rule_snippet_path = (
DATASET_METRICS_TOOLS_PATH / "accumulate_metrics_rules.ninja.mustache"
)

def create_build_snippets(self):
with open(DATASET_METRICS_TOOLS_PATH / "accumulate_metrics_build.ninja.mustache", "r") as f:
build = chevron.render(
f,
{
"metrics_file": self.metrics_path,
"summary_stats": self.summary_stats,
"aggregation_dir": self.build_path,
"pieces": self.pieces,
"accumulate_metrics_util": BFASST_UTILS_PATH / "accumulate_metrics.py",
},
)

with open(NINJA_BUILD_PATH, "a") as f:
f.write(build)

def _init_outputs(self):
self.outputs["metrics_path"] = self.metrics_path
self.outputs["summary_stats"] = self.summary_stats

def add_ninja_deps(self, deps):
self._add_ninja_deps_default(deps, __file__)
deps.append(BFASST_UTILS_PATH / "accumulate_metrics.py")
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
build {{ metrics_file }} {{ summary_stats }}: accumulate_metrics {{ aggregation_dir }} | {{#pieces}}{{.}} {{/pieces}} {{ accumulate_metrics_util }}
metrics_file = {{ metrics_file }}
summary_stats = {{ summary_stats }}

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
rule accumulate_metrics
command = python {{ bfasst_path }}/bfasst/utils/accumulate_metrics.py $in -m $metrics_file -s $summary_stats
description = accumulate metrics from $in to produce master_metrics and summary_stats files

46 changes: 46 additions & 0 deletions bfasst/tools/dataset_metrics/graph_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Create the rule and build snippets for computing gnn dataset metrics."""

import chevron

from bfasst.tools.tool import ToolBase
from bfasst.paths import BUILD_PATH, NINJA_BUILD_PATH, DATASET_METRICS_TOOLS_PATH, BFASST_UTILS_PATH


class GraphMetrics(ToolBase):
"""Create the rule and build snippets for computing gnn dataset metrics."""

def __init__(
self,
flow,
graph,
num,
):
super().__init__(flow)
self.graph = graph
self.num = num
self.build_path = BUILD_PATH / "dataset_metrics"
self.metrics_path = self.build_path / f"metrics_{num}.log"

self._init_outputs()
self.rule_snippet_path = DATASET_METRICS_TOOLS_PATH / "process_graph_rules.ninja.mustache"

def create_build_snippets(self):
with open(DATASET_METRICS_TOOLS_PATH / "process_graph_build.ninja.mustache", "r") as f:
build = chevron.render(
f,
{
"output": self.metrics_path,
"graph": self.graph,
"process_graph_util": BFASST_UTILS_PATH / "process_graph.py",
},
)

with open(NINJA_BUILD_PATH, "a") as f:
f.write(build)

def _init_outputs(self):
self.outputs["metrics_path"] = self.metrics_path

def add_ninja_deps(self, deps):
self._add_ninja_deps_default(deps, __file__)
deps.append(BFASST_UTILS_PATH / "process_graph.py")
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
build {{ output }}: process_graph {{ graph }} | {{ process_graph_util }}

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
rule process_graph
command = python {{ bfasst_path }}/bfasst/utils/process_graph.py $in -o $out
description = compute metrics on $in and save them to $out

148 changes: 148 additions & 0 deletions bfasst/utils/accumulate_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""Accumulate metrics from graphs in a dataset after computing them for all graphs"""

import argparse
import logging
import json
from pathlib import Path
import statistics

logger = logging.getLogger(__name__)


def main():
"""Load the graph, convert to adj_list, and compute metrics."""
# ArgParse
args = get_args()

# Logging (for debug, don't use in parallel)
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)

# Initialize the master dictionary
master_metrics_output = args.m if args.m else "master_metrics.log"
stats_summary_output = args.s if args.s else "summary_statistics.log"

# Iterate through the files in the analysis directory
master_metrics = compute_master_metrics(
args.analysis_dir, master_metrics_output, stats_summary_output
)

# sort the values for each metric after merging
master_metrics = sort_metrics(master_metrics)

# Compute the stats for each metric
stats_summary = get_stats_summary(master_metrics)

# write master_metrics to a file
with open(master_metrics_output, "w") as f:
f.write(json.dumps(master_metrics, indent=4))

with open(stats_summary_output, "w") as f:
f.write(json.dumps(stats_summary, indent=4))


def get_args():
"""Get the command line arguments."""
parser = argparse.ArgumentParser(description="Compute metrics on a graph.")
parser.add_argument(
"analysis_dir", help="The path to the folder containing all analysis files for all graphs."
)
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.")
parser.add_argument("-m", help="The name of the metrics file to create")
parser.add_argument(
"-s", help="The name of the stats (5-num summary, mean, stddev) file to create"
)
return parser.parse_args()


def compute_master_metrics(analysis_dir, master_metrics_output, stats_summary_output):
"""Compute the master metrics from the analysis directory."""
master_metrics = {}
for file in Path(analysis_dir).iterdir():
if file.is_dir():
continue

if file.name in (
master_metrics_output,
stats_summary_output,
# Skip the master_metrics and stats_summary files
# Even if the user has specified different names
# for this run
"master_metrics.log",
"summary_stats.log",
):
continue

logger.debug("Processing %s", file)

with open(file, "r") as f:
graph_metrics = json.loads(f.readline())

for ip, metrics in graph_metrics.items():
# Initialize the IP entry in the master dictionary if it doesn't exist
if ip not in master_metrics:
master_metrics[ip] = {}

for metric, values in metrics.items():
# Initialize the metric entry if it doesn't exist
if metric not in master_metrics[ip]:
master_metrics[ip][metric] = []

# Concatenate the lists
master_metrics[ip][metric].extend(values)

return master_metrics


def sort_metrics(metrics):
"""Sort the values for each metric in the dictionary."""
for ip, _ in metrics.items():
for metric in metrics[ip]:
metrics[ip][metric] = sorted(metrics[ip][metric])
return metrics


def get_stats_summary(master_metrics):
"""Compute the 5-number summary, mean, and standard deviation for each metric."""
summary = {}
for ip, metrics in master_metrics.items():
for metric, values in metrics.items():
# Calculate statistics
if values: # Check if the list is not empty
min_val, first_quartile, median, third_quartile, max_val = five_number_summary(
values
)
mean = sum(values) / len(values)
stddev = statistics.stdev(values) if len(values) > 1 else 0.0

# Prepare the summary dictionary
if ip not in summary:
summary[ip] = {}

summary[ip][metric] = {
"min": min_val,
"Q1": first_quartile,
"median": median,
"Q3": third_quartile,
"max": max_val,
"mean": mean,
"stddev": stddev,
}
return summary


def five_number_summary(data):
"""Compute the 5-number summary for the given data."""
n = len(data)
min_val = data[0]
max_val = data[-1]
first_quartile = data[n // 4]
median = data[n // 2]
third_quartile = data[(3 * n) // 4]
return min_val, first_quartile, median, third_quartile, max_val


if __name__ == "__main__":
main()
Loading

0 comments on commit 1461dd1

Please sign in to comment.