-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #514 from byuccl/dataset_metrics
Dataset metrics
- Loading branch information
Showing
11 changed files
with
778 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
"""Analyze dataset metrics.""" | ||
|
||
from pathlib import Path | ||
import pathlib | ||
from bfasst.flows.flow import FlowNoDesign | ||
from bfasst.tools.dataset_metrics.accumulate_metrics import AccumulateMetrics | ||
from bfasst.tools.dataset_metrics.graph_metrics import GraphMetrics | ||
|
||
|
||
class AnalyzeDataset(FlowNoDesign): | ||
"""Analyze dataset metrics.""" | ||
|
||
def __init__(self, dataset): | ||
# pylint: disable=duplicate-code | ||
super().__init__() | ||
self.dataset = Path(dataset) | ||
|
||
# only used for configuring ninja rule snippets | ||
self.graph_metrics_default_tool = GraphMetrics(self, None, None) | ||
self.accumulate_metrics_tool = AccumulateMetrics(self, None) | ||
# pylint: enable=duplicate-code | ||
|
||
def create_build_snippets(self): | ||
# get the size of the dataset | ||
directories = [x for x in self.dataset.iterdir() if x.is_dir()] | ||
iterations = len(directories) | ||
pieces = [] | ||
|
||
for i in range(1, iterations + 1): | ||
num = int(directories[i - 1].name.split("_")[-1]) | ||
graph_metrics_tool = GraphMetrics( | ||
self, directories[i - 1] / f"{directories[i - 1].name}.dump", num | ||
) | ||
pieces.append(graph_metrics_tool.metrics_path) | ||
graph_metrics_tool.create_build_snippets() | ||
|
||
AccumulateMetrics(self, pieces).create_build_snippets() | ||
|
||
@classmethod | ||
def flow_build_dir_name(cls) -> str: | ||
"""Get the name of the build directory for this flow""" | ||
return "dataset_metrics" | ||
|
||
def add_ninja_deps(self, deps): | ||
super().add_ninja_deps(deps) | ||
|
||
def get_top_level_flow_path(self): | ||
return pathlib.Path(__file__).resolve() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
"""Accumulate metrics from the graph_metrics tool.""" | ||
|
||
import chevron | ||
|
||
from bfasst.tools.tool import ToolBase | ||
from bfasst.paths import BUILD_PATH, NINJA_BUILD_PATH, DATASET_METRICS_TOOLS_PATH, BFASST_UTILS_PATH | ||
|
||
|
||
class AccumulateMetrics(ToolBase): | ||
"""Accumulate metrics from the graph_metrics tool.""" | ||
|
||
def __init__(self, flow, pieces): | ||
super().__init__(flow) | ||
self.pieces = pieces | ||
self.build_path = BUILD_PATH / "dataset_metrics" | ||
self.metrics_path = self.build_path / "master_metrics.log" | ||
self.summary_stats = self.build_path / "summary_stats.log" | ||
|
||
self._init_outputs() | ||
self.rule_snippet_path = ( | ||
DATASET_METRICS_TOOLS_PATH / "accumulate_metrics_rules.ninja.mustache" | ||
) | ||
|
||
def create_build_snippets(self): | ||
with open(DATASET_METRICS_TOOLS_PATH / "accumulate_metrics_build.ninja.mustache", "r") as f: | ||
build = chevron.render( | ||
f, | ||
{ | ||
"metrics_file": self.metrics_path, | ||
"summary_stats": self.summary_stats, | ||
"aggregation_dir": self.build_path, | ||
"pieces": self.pieces, | ||
"accumulate_metrics_util": BFASST_UTILS_PATH / "accumulate_metrics.py", | ||
}, | ||
) | ||
|
||
with open(NINJA_BUILD_PATH, "a") as f: | ||
f.write(build) | ||
|
||
def _init_outputs(self): | ||
self.outputs["metrics_path"] = self.metrics_path | ||
self.outputs["summary_stats"] = self.summary_stats | ||
|
||
def add_ninja_deps(self, deps): | ||
self._add_ninja_deps_default(deps, __file__) | ||
deps.append(BFASST_UTILS_PATH / "accumulate_metrics.py") |
4 changes: 4 additions & 0 deletions
4
bfasst/tools/dataset_metrics/accumulate_metrics_build.ninja.mustache
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
build {{ metrics_file }} {{ summary_stats }}: accumulate_metrics {{ aggregation_dir }} | {{#pieces}}{{.}} {{/pieces}} {{ accumulate_metrics_util }} | ||
metrics_file = {{ metrics_file }} | ||
summary_stats = {{ summary_stats }} | ||
|
4 changes: 4 additions & 0 deletions
4
bfasst/tools/dataset_metrics/accumulate_metrics_rules.ninja.mustache
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
rule accumulate_metrics | ||
command = python {{ bfasst_path }}/bfasst/utils/accumulate_metrics.py $in -m $metrics_file -s $summary_stats | ||
description = accumulate metrics from $in to produce master_metrics and summary_stats files | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
"""Create the rule and build snippets for computing gnn dataset metrics.""" | ||
|
||
import chevron | ||
|
||
from bfasst.tools.tool import ToolBase | ||
from bfasst.paths import BUILD_PATH, NINJA_BUILD_PATH, DATASET_METRICS_TOOLS_PATH, BFASST_UTILS_PATH | ||
|
||
|
||
class GraphMetrics(ToolBase): | ||
"""Create the rule and build snippets for computing gnn dataset metrics.""" | ||
|
||
def __init__( | ||
self, | ||
flow, | ||
graph, | ||
num, | ||
): | ||
super().__init__(flow) | ||
self.graph = graph | ||
self.num = num | ||
self.build_path = BUILD_PATH / "dataset_metrics" | ||
self.metrics_path = self.build_path / f"metrics_{num}.log" | ||
|
||
self._init_outputs() | ||
self.rule_snippet_path = DATASET_METRICS_TOOLS_PATH / "process_graph_rules.ninja.mustache" | ||
|
||
def create_build_snippets(self): | ||
with open(DATASET_METRICS_TOOLS_PATH / "process_graph_build.ninja.mustache", "r") as f: | ||
build = chevron.render( | ||
f, | ||
{ | ||
"output": self.metrics_path, | ||
"graph": self.graph, | ||
"process_graph_util": BFASST_UTILS_PATH / "process_graph.py", | ||
}, | ||
) | ||
|
||
with open(NINJA_BUILD_PATH, "a") as f: | ||
f.write(build) | ||
|
||
def _init_outputs(self): | ||
self.outputs["metrics_path"] = self.metrics_path | ||
|
||
def add_ninja_deps(self, deps): | ||
self._add_ninja_deps_default(deps, __file__) | ||
deps.append(BFASST_UTILS_PATH / "process_graph.py") |
2 changes: 2 additions & 0 deletions
2
bfasst/tools/dataset_metrics/process_graph_build.ninja.mustache
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
build {{ output }}: process_graph {{ graph }} | {{ process_graph_util }} | ||
|
4 changes: 4 additions & 0 deletions
4
bfasst/tools/dataset_metrics/process_graph_rules.ninja.mustache
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
rule process_graph | ||
command = python {{ bfasst_path }}/bfasst/utils/process_graph.py $in -o $out | ||
description = compute metrics on $in and save them to $out | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
"""Accumulate metrics from graphs in a dataset after computing them for all graphs""" | ||
|
||
import argparse | ||
import logging | ||
import json | ||
from pathlib import Path | ||
import statistics | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def main(): | ||
"""Load the graph, convert to adj_list, and compute metrics.""" | ||
# ArgParse | ||
args = get_args() | ||
|
||
# Logging (for debug, don't use in parallel) | ||
logging.basicConfig( | ||
level=logging.DEBUG if args.verbose else logging.INFO, | ||
format="%(asctime)s - %(levelname)s - %(message)s", | ||
) | ||
|
||
# Initialize the master dictionary | ||
master_metrics_output = args.m if args.m else "master_metrics.log" | ||
stats_summary_output = args.s if args.s else "summary_statistics.log" | ||
|
||
# Iterate through the files in the analysis directory | ||
master_metrics = compute_master_metrics( | ||
args.analysis_dir, master_metrics_output, stats_summary_output | ||
) | ||
|
||
# sort the values for each metric after merging | ||
master_metrics = sort_metrics(master_metrics) | ||
|
||
# Compute the stats for each metric | ||
stats_summary = get_stats_summary(master_metrics) | ||
|
||
# write master_metrics to a file | ||
with open(master_metrics_output, "w") as f: | ||
f.write(json.dumps(master_metrics, indent=4)) | ||
|
||
with open(stats_summary_output, "w") as f: | ||
f.write(json.dumps(stats_summary, indent=4)) | ||
|
||
|
||
def get_args(): | ||
"""Get the command line arguments.""" | ||
parser = argparse.ArgumentParser(description="Compute metrics on a graph.") | ||
parser.add_argument( | ||
"analysis_dir", help="The path to the folder containing all analysis files for all graphs." | ||
) | ||
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.") | ||
parser.add_argument("-m", help="The name of the metrics file to create") | ||
parser.add_argument( | ||
"-s", help="The name of the stats (5-num summary, mean, stddev) file to create" | ||
) | ||
return parser.parse_args() | ||
|
||
|
||
def compute_master_metrics(analysis_dir, master_metrics_output, stats_summary_output): | ||
"""Compute the master metrics from the analysis directory.""" | ||
master_metrics = {} | ||
for file in Path(analysis_dir).iterdir(): | ||
if file.is_dir(): | ||
continue | ||
|
||
if file.name in ( | ||
master_metrics_output, | ||
stats_summary_output, | ||
# Skip the master_metrics and stats_summary files | ||
# Even if the user has specified different names | ||
# for this run | ||
"master_metrics.log", | ||
"summary_stats.log", | ||
): | ||
continue | ||
|
||
logger.debug("Processing %s", file) | ||
|
||
with open(file, "r") as f: | ||
graph_metrics = json.loads(f.readline()) | ||
|
||
for ip, metrics in graph_metrics.items(): | ||
# Initialize the IP entry in the master dictionary if it doesn't exist | ||
if ip not in master_metrics: | ||
master_metrics[ip] = {} | ||
|
||
for metric, values in metrics.items(): | ||
# Initialize the metric entry if it doesn't exist | ||
if metric not in master_metrics[ip]: | ||
master_metrics[ip][metric] = [] | ||
|
||
# Concatenate the lists | ||
master_metrics[ip][metric].extend(values) | ||
|
||
return master_metrics | ||
|
||
|
||
def sort_metrics(metrics): | ||
"""Sort the values for each metric in the dictionary.""" | ||
for ip, _ in metrics.items(): | ||
for metric in metrics[ip]: | ||
metrics[ip][metric] = sorted(metrics[ip][metric]) | ||
return metrics | ||
|
||
|
||
def get_stats_summary(master_metrics): | ||
"""Compute the 5-number summary, mean, and standard deviation for each metric.""" | ||
summary = {} | ||
for ip, metrics in master_metrics.items(): | ||
for metric, values in metrics.items(): | ||
# Calculate statistics | ||
if values: # Check if the list is not empty | ||
min_val, first_quartile, median, third_quartile, max_val = five_number_summary( | ||
values | ||
) | ||
mean = sum(values) / len(values) | ||
stddev = statistics.stdev(values) if len(values) > 1 else 0.0 | ||
|
||
# Prepare the summary dictionary | ||
if ip not in summary: | ||
summary[ip] = {} | ||
|
||
summary[ip][metric] = { | ||
"min": min_val, | ||
"Q1": first_quartile, | ||
"median": median, | ||
"Q3": third_quartile, | ||
"max": max_val, | ||
"mean": mean, | ||
"stddev": stddev, | ||
} | ||
return summary | ||
|
||
|
||
def five_number_summary(data): | ||
"""Compute the 5-number summary for the given data.""" | ||
n = len(data) | ||
min_val = data[0] | ||
max_val = data[-1] | ||
first_quartile = data[n // 4] | ||
median = data[n // 2] | ||
third_quartile = data[(3 * n) // 4] | ||
return min_val, first_quartile, median, third_quartile, max_val | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.