Skip to content

Commit

Permalink
pylint
Browse files Browse the repository at this point in the history
  • Loading branch information
KeenanRileyFaulkner committed Oct 31, 2024
1 parent 80e3eda commit 705feda
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 43 deletions.
1 change: 0 additions & 1 deletion bfasst/flows/analyze_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from pathlib import Path
import pathlib
from bfasst.flows.flow import FlowNoDesign
from bfasst.paths import FLOWS_PATH
from bfasst.tools.dataset_metrics.accumulate_metrics import AccumulateMetrics
from bfasst.tools.dataset_metrics.graph_metrics import GraphMetrics

Expand Down
109 changes: 68 additions & 41 deletions bfasst/utils/accumulate_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,7 @@
def main():
"""Load the graph, convert to adj_list, and compute metrics."""
# ArgParse
parser = argparse.ArgumentParser(description="Compute metrics on a graph.")
parser.add_argument(
"analysis_dir", help="The path to the folder containing all analysis files for all graphs."
)
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.")
parser.add_argument("-m", help="The name of the metrics file to create")
parser.add_argument(
"-s", help="The name of the stats (5-num summary, mean, stddev) file to create"
)
args = parser.parse_args()
args = get_args()

# Logging (for debug, don't use in parallel)
logging.basicConfig(
Expand All @@ -30,21 +21,55 @@ def main():
)

# Initialize the master dictionary
master_metrics = {}
master_metrics_output = args.m if args.m else "master_metrics.log"
stats_summary_output = args.s if args.s else "summary_statistics.log"

# Iterate through the files in the analysis directory
for file in Path(args.analysis_dir).iterdir():
master_metrics = compute_master_metrics(
args.analysis_dir, master_metrics_output, stats_summary_output
)

# sort the values for each metric after merging
master_metrics = sort_metrics(master_metrics)

# Compute the stats for each metric
stats_summary = get_stats_summary(master_metrics)

# write master_metrics to a file
with open(master_metrics_output, "w") as f:
f.write(json.dumps(master_metrics, indent=4))

with open(stats_summary_output, "w") as f:
f.write(json.dumps(stats_summary, indent=4))


def get_args():
parser = argparse.ArgumentParser(description="Compute metrics on a graph.")
parser.add_argument(
"analysis_dir", help="The path to the folder containing all analysis files for all graphs."
)
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.")
parser.add_argument("-m", help="The name of the metrics file to create")
parser.add_argument(
"-s", help="The name of the stats (5-num summary, mean, stddev) file to create"
)
return parser.parse_args()


def compute_master_metrics(analysis_dir, master_metrics_output, stats_summary_output):
master_metrics = {}
for file in Path(analysis_dir).iterdir():
if file.is_dir():
continue

if (
file.name == master_metrics_output
or file.name == stats_summary_output
# if these exist, don't read them even if master_metrics_output and stats_summary_output are different
or file.name == "master_metrics.log"
or file.name == "summary_statistics.log"
if file.name in (
master_metrics_output,
stats_summary_output,
# Skip the master_metrics and stats_summary files
# Even if the user has specified different names
# for this run
"master_metrics.log",
"summary_statistics.log",
):
continue

Expand All @@ -64,51 +89,53 @@ def main():
# Concatenate the lists
master_metrics[ip][metric].extend(values)

# sort the values for each metric after merging
for ip in master_metrics:
for metric in master_metrics[ip]:
master_metrics[ip][metric] = sorted(master_metrics[ip][metric])
return master_metrics

# Compute the stats for each metric
stats_summary = {}
for ip, metrics in master_metrics.items():

def sort_metrics(metrics):
"""Sort the values for each metric in the dictionary."""
for ip, _ in metrics.items():
for metric in metrics[ip]:
metrics[ip][metric] = sorted(metrics[ip][metric])
return metrics


def get_stats_summary(metrics):
summary = {}
for ip, metrics in metrics.items():
for metric, values in metrics.items():
# Calculate statistics
if values: # Check if the list is not empty
min_val, Q1, median, Q3, max_val = five_number_summary(values)
min_val, first_quartile, median, third_quartile, max_val = five_number_summary(
values
)
mean = sum(values) / len(values)
stddev = statistics.stdev(values) if len(values) > 1 else 0.0

# Prepare the summary dictionary
if ip not in stats_summary:
stats_summary[ip] = {}
if ip not in summary:
summary[ip] = {}

stats_summary[ip][metric] = {
summary[ip][metric] = {
"min": min_val,
"Q1": Q1,
"Q1": first_quartile,
"median": median,
"Q3": Q3,
"Q3": third_quartile,
"max": max_val,
"mean": mean,
"stddev": stddev,
}

# write master_metrics to a file
with open(master_metrics_output, "w") as f:
f.write(json.dumps(master_metrics, indent=4))

with open(stats_summary_output, "w") as f:
f.write(json.dumps(stats_summary, indent=4))
return summary


def five_number_summary(data):
n = len(data)
min_val = data[0]
max_val = data[-1]
Q1 = data[n // 4]
first_quartile = data[n // 4]
median = data[n // 2]
Q3 = data[(3 * n) // 4]
return min_val, Q1, median, Q3, max_val
third_quartile = data[(3 * n) // 4]
return min_val, first_quartile, median, third_quartile, max_val


if __name__ == "__main__":
Expand Down
11 changes: 10 additions & 1 deletion bfasst/utils/process_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ def convert_to_adj_list(component_nodes, component_edges):


def compute_metrics_per_ip(adj_lists, args):
"""Compute metrics for each IP in the graph."""
metrics_per_ip = {}
for label, adj_list in adj_lists.items():

Expand Down Expand Up @@ -193,7 +194,7 @@ def compute_metrics_per_ip(adj_lists, args):
def compute_size(adj_list):
edge_count = 0
for node in adj_list:
for neighbor in adj_list[node]:
for _ in adj_list[node]:
edge_count += 1
return edge_count // 2

Expand Down Expand Up @@ -231,6 +232,8 @@ def compute_average_degree(adj_list):


class UnionFind:
"""Union-find data structure."""

def __init__(self):
self.parent = {}
self.rank = {}
Expand All @@ -241,6 +244,7 @@ def add(self, u):
self.rank[u] = 0

def find(self, u):
"""Find the parent of a node."""
# Ensure u is in the union find
self.add(u)

Expand All @@ -250,6 +254,7 @@ def find(self, u):
return self.parent[u]

def union(self, u, v):
"""Union two nodes."""
self.add(u)
self.add(v)
pu, pv = self.find(u), self.find(v)
Expand All @@ -265,6 +270,7 @@ def union(self, u, v):


def bfs_farthest(adj_list, start_node):
"""Breadth-first search to find the farthest node from a starting node."""
queue = [(start_node, 0)]
visited = {start_node}
farthest_node = start_node
Expand All @@ -285,6 +291,7 @@ def bfs_farthest(adj_list, start_node):


def compute_k_core(adj_list):
"""Compute the k-core of a graph."""
degree = {node: len(neighbors) for node, neighbors in adj_list.items()}
max_k = 0
k_core_subgraph = {}
Expand Down Expand Up @@ -320,6 +327,7 @@ def compute_k_core(adj_list):


def compute_global_clustering(adj_list):
"""Compute the global clustering coefficient of a graph."""
closed_triplets = 0
total_triplets = 0
visited_pairs = set()
Expand All @@ -342,6 +350,7 @@ def compute_global_clustering(adj_list):


def compute_local_clustering(adj_list):
"""Compute the local clustering coefficient of a graph."""
local_clustering_coefficients = []

for node in adj_list:
Expand Down

0 comments on commit 705feda

Please sign in to comment.