pylint

byuccl · Oct 31, 2024 · 705feda · 705feda
1 parent 80e3eda
commit 705feda
Show file tree

Hide file tree

Showing 3 changed files with 78 additions and 43 deletions.
diff --git a/bfasst/flows/analyze_dataset.py b/bfasst/flows/analyze_dataset.py
@@ -3,7 +3,6 @@
 from pathlib import Path
 import pathlib
 from bfasst.flows.flow import FlowNoDesign
-from bfasst.paths import FLOWS_PATH
 from bfasst.tools.dataset_metrics.accumulate_metrics import AccumulateMetrics
 from bfasst.tools.dataset_metrics.graph_metrics import GraphMetrics
 

diff --git a/bfasst/utils/accumulate_metrics.py b/bfasst/utils/accumulate_metrics.py
@@ -12,16 +12,7 @@
 def main():
     """Load the graph, convert to adj_list, and compute metrics."""
     # ArgParse
-    parser = argparse.ArgumentParser(description="Compute metrics on a graph.")
-    parser.add_argument(
-        "analysis_dir", help="The path to the folder containing all analysis files for all graphs."
-    )
-    parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.")
-    parser.add_argument("-m", help="The name of the metrics file to create")
-    parser.add_argument(
-        "-s", help="The name of the stats (5-num summary, mean, stddev) file to create"
-    )
-    args = parser.parse_args()
+    args = get_args()
 
     # Logging (for debug, don't use in parallel)
     logging.basicConfig(
@@ -30,21 +21,55 @@ def main():
     )
 
     # Initialize the master dictionary
-    master_metrics = {}
     master_metrics_output = args.m if args.m else "master_metrics.log"
     stats_summary_output = args.s if args.s else "summary_statistics.log"
 
     # Iterate through the files in the analysis directory
-    for file in Path(args.analysis_dir).iterdir():
+    master_metrics = compute_master_metrics(
+        args.analysis_dir, master_metrics_output, stats_summary_output
+    )
+
+    # sort the values for each metric after merging
+    master_metrics = sort_metrics(master_metrics)
+
+    # Compute the stats for each metric
+    stats_summary = get_stats_summary(master_metrics)
+
+    # write master_metrics to a file
+    with open(master_metrics_output, "w") as f:
+        f.write(json.dumps(master_metrics, indent=4))
+
+    with open(stats_summary_output, "w") as f:
+        f.write(json.dumps(stats_summary, indent=4))
+
+
+def get_args():
+    parser = argparse.ArgumentParser(description="Compute metrics on a graph.")
+    parser.add_argument(
+        "analysis_dir", help="The path to the folder containing all analysis files for all graphs."
+    )
+    parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging.")
+    parser.add_argument("-m", help="The name of the metrics file to create")
+    parser.add_argument(
+        "-s", help="The name of the stats (5-num summary, mean, stddev) file to create"
+    )
+    return parser.parse_args()
+
+
+def compute_master_metrics(analysis_dir, master_metrics_output, stats_summary_output):
+    master_metrics = {}
+    for file in Path(analysis_dir).iterdir():
         if file.is_dir():
             continue
 
-        if (
-            file.name == master_metrics_output
-            or file.name == stats_summary_output
-            # if these exist, don't read them even if master_metrics_output and stats_summary_output are different
-            or file.name == "master_metrics.log"
-            or file.name == "summary_statistics.log"
+        if file.name in (
+            master_metrics_output,
+            stats_summary_output,
+            # Skip the master_metrics and stats_summary files
+            # Even if the user has specified different names
+            # for this run
+            "master_metrics.log",
+            "summary_statistics.log",
         ):
             continue
 
@@ -64,51 +89,53 @@ def main():
                 # Concatenate the lists
                 master_metrics[ip][metric].extend(values)
 
-    # sort the values for each metric after merging
-    for ip in master_metrics:
-        for metric in master_metrics[ip]:
-            master_metrics[ip][metric] = sorted(master_metrics[ip][metric])
+    return master_metrics
 
-    # Compute the stats for each metric
-    stats_summary = {}
-    for ip, metrics in master_metrics.items():
+
+def sort_metrics(metrics):
+    """Sort the values for each metric in the dictionary."""
+    for ip, _ in metrics.items():
+        for metric in metrics[ip]:
+            metrics[ip][metric] = sorted(metrics[ip][metric])
+    return metrics
+
+
+def get_stats_summary(metrics):
+    summary = {}
+    for ip, metrics in metrics.items():
         for metric, values in metrics.items():
             # Calculate statistics
             if values:  # Check if the list is not empty
-                min_val, Q1, median, Q3, max_val = five_number_summary(values)
+                min_val, first_quartile, median, third_quartile, max_val = five_number_summary(
+                    values
+                )
                 mean = sum(values) / len(values)
                 stddev = statistics.stdev(values) if len(values) > 1 else 0.0
 
                 # Prepare the summary dictionary
-                if ip not in stats_summary:
-                    stats_summary[ip] = {}
+                if ip not in summary:
+                    summary[ip] = {}
 
-                stats_summary[ip][metric] = {
+                summary[ip][metric] = {
                     "min": min_val,
-                    "Q1": Q1,
+                    "Q1": first_quartile,
                     "median": median,
-                    "Q3": Q3,
+                    "Q3": third_quartile,
                     "max": max_val,
                     "mean": mean,
                     "stddev": stddev,
                 }
-
-    # write master_metrics to a file
-    with open(master_metrics_output, "w") as f:
-        f.write(json.dumps(master_metrics, indent=4))
-
-    with open(stats_summary_output, "w") as f:
-        f.write(json.dumps(stats_summary, indent=4))
+    return summary
 
 
 def five_number_summary(data):
     n = len(data)
     min_val = data[0]
     max_val = data[-1]
-    Q1 = data[n // 4]
+    first_quartile = data[n // 4]
     median = data[n // 2]
-    Q3 = data[(3 * n) // 4]
-    return min_val, Q1, median, Q3, max_val
+    third_quartile = data[(3 * n) // 4]
+    return min_val, first_quartile, median, third_quartile, max_val
 
 
 if __name__ == "__main__":

diff --git a/bfasst/utils/process_graph.py b/bfasst/utils/process_graph.py
@@ -130,6 +130,7 @@ def convert_to_adj_list(component_nodes, component_edges):
 
 
 def compute_metrics_per_ip(adj_lists, args):
+    """Compute metrics for each IP in the graph."""
     metrics_per_ip = {}
     for label, adj_list in adj_lists.items():
 
@@ -193,7 +194,7 @@ def compute_metrics_per_ip(adj_lists, args):
 def compute_size(adj_list):
     edge_count = 0
     for node in adj_list:
-        for neighbor in adj_list[node]:
+        for _ in adj_list[node]:
             edge_count += 1
     return edge_count // 2
 
@@ -231,6 +232,8 @@ def compute_average_degree(adj_list):
 
 
 class UnionFind:
+    """Union-find data structure."""
+
     def __init__(self):
         self.parent = {}
         self.rank = {}
@@ -241,6 +244,7 @@ def add(self, u):
             self.rank[u] = 0
 
     def find(self, u):
+        """Find the parent of a node."""
         # Ensure u is in the union find
         self.add(u)
 
@@ -250,6 +254,7 @@ def find(self, u):
         return self.parent[u]
 
     def union(self, u, v):
+        """Union two nodes."""
         self.add(u)
         self.add(v)
         pu, pv = self.find(u), self.find(v)
@@ -265,6 +270,7 @@ def union(self, u, v):
 
 
 def bfs_farthest(adj_list, start_node):
+    """Breadth-first search to find the farthest node from a starting node."""
     queue = [(start_node, 0)]
     visited = {start_node}
     farthest_node = start_node
@@ -285,6 +291,7 @@ def bfs_farthest(adj_list, start_node):
 
 
 def compute_k_core(adj_list):
+    """Compute the k-core of a graph."""
     degree = {node: len(neighbors) for node, neighbors in adj_list.items()}
     max_k = 0
     k_core_subgraph = {}
@@ -320,6 +327,7 @@ def compute_k_core(adj_list):
 
 
 def compute_global_clustering(adj_list):
+    """Compute the global clustering coefficient of a graph."""
     closed_triplets = 0
     total_triplets = 0
     visited_pairs = set()
@@ -342,6 +350,7 @@ def compute_global_clustering(adj_list):
 
 
 def compute_local_clustering(adj_list):
+    """Compute the local clustering coefficient of a graph."""
     local_clustering_coefficients = []
 
     for node in adj_list: