From 7af4f2b3672aab637fd5d1f911ea6edaaf525eb5 Mon Sep 17 00:00:00 2001
From: AlexandreSinger <alex.singer@mail.utoronto.ca>
Date: Thu, 21 Nov 2024 15:53:22 -0500
Subject: [PATCH 1/2] [Packer] Clean Up Clustering Algorithm

Cleaned up the overall clustering algorithm in the greedy clusterer
class. This change is non-functional, the control flow was rearanged and
inlined / outlined to make the overall clustering algorithm more clear.

More needs to be done to abstract timing and the gain calculation.
---
 vpr/src/base/SetupVPR.cpp          |   1 -
 vpr/src/base/ShowSetup.cpp         |   1 -
 vpr/src/base/vpr_types.h           |   1 -
 vpr/src/pack/attraction_groups.cpp |   1 +
 vpr/src/pack/attraction_groups.h   |   7 +-
 vpr/src/pack/cluster_util.cpp      | 354 ++------------
 vpr/src/pack/cluster_util.h        | 107 +----
 vpr/src/pack/greedy_clusterer.cpp  | 741 +++++++++++++++++++----------
 vpr/src/pack/greedy_clusterer.h    | 121 ++++-
 vpr/src/pack/pack.cpp              |   8 +-
 vpr/src/pack/pack_types.h          |   2 +
 11 files changed, 678 insertions(+), 666 deletions(-)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index 6900fa80bd2..609b85df751 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -552,7 +552,6 @@ void SetupPackerOpts(const t_options& Options,
 
     //TODO: document?
     PackerOpts->global_clocks = true;       /* DEFAULT */
-    PackerOpts->hill_climbing_flag = false; /* DEFAULT */
 
     PackerOpts->allow_unrelated_clustering = Options.allow_unrelated_clustering;
     PackerOpts->connection_driven = Options.connection_driven_clustering;
diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp
index ab261e6b7b1..b89c21ca4e2 100644
--- a/vpr/src/base/ShowSetup.cpp
+++ b/vpr/src/base/ShowSetup.cpp
@@ -706,7 +706,6 @@ static void ShowPackerOpts(const t_packer_opts& PackerOpts) {
     }
     VTR_LOG("PackerOpts.connection_driven: %s", (PackerOpts.connection_driven ? "true\n" : "false\n"));
     VTR_LOG("PackerOpts.global_clocks: %s", (PackerOpts.global_clocks ? "true\n" : "false\n"));
-    VTR_LOG("PackerOpts.hill_climbing_flag: %s", (PackerOpts.hill_climbing_flag ? "true\n" : "false\n"));
     VTR_LOG("PackerOpts.inter_cluster_net_delay: %f\n", PackerOpts.inter_cluster_net_delay);
     VTR_LOG("PackerOpts.timing_driven: %s", (PackerOpts.timing_driven ? "true\n" : "false\n"));
     VTR_LOG("PackerOpts.target_external_pin_util: %s", vtr::join(PackerOpts.target_external_pin_util, " ").c_str());
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index df280c52c53..e33c1ac004e 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -790,7 +790,6 @@ struct t_packer_opts {
     std::string sdc_file_name;
     std::string output_file;
     bool global_clocks;
-    bool hill_climbing_flag;
     bool timing_driven;
     enum e_cluster_seed cluster_seed_type;
     float alpha;
diff --git a/vpr/src/pack/attraction_groups.cpp b/vpr/src/pack/attraction_groups.cpp
index 1cf6f428e37..8d151c6c6aa 100644
--- a/vpr/src/pack/attraction_groups.cpp
+++ b/vpr/src/pack/attraction_groups.cpp
@@ -1,4 +1,5 @@
 #include "attraction_groups.h"
+#include "globals.h"
 
 AttractionInfo::AttractionInfo(bool attraction_groups_on) {
     const auto& floorplanning_ctx = g_vpr_ctx.floorplanning();
diff --git a/vpr/src/pack/attraction_groups.h b/vpr/src/pack/attraction_groups.h
index 813d6e0fb1b..ae2409cf772 100644
--- a/vpr/src/pack/attraction_groups.h
+++ b/vpr/src/pack/attraction_groups.h
@@ -10,8 +10,7 @@
 
 #include "vtr_strong_id.h"
 #include "vtr_vector.h"
-#include "atom_netlist.h"
-#include "globals.h"
+#include "atom_netlist_fwd.h"
 
 /**
  * @file
@@ -78,7 +77,7 @@ class AttractionInfo {
 
     void add_attraction_group(const AttractionGroup& group_info);
 
-    int num_attraction_groups();
+    int num_attraction_groups() const;
 
     int get_att_group_pulls() const;
 
@@ -118,7 +117,7 @@ inline void AttractionInfo::set_atom_attraction_group(const AtomBlockId atom_id,
     attraction_groups[group_id].group_atoms.push_back(atom_id);
 }
 
-inline int AttractionInfo::num_attraction_groups() {
+inline int AttractionInfo::num_attraction_groups() const {
     return attraction_groups.size();
 }
 
diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index 75a1ce82a53..c2cc00424be 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -1,6 +1,6 @@
 #include "cluster_util.h"
 #include <algorithm>
-#include <tuple>
+#include <unordered_set>
 
 #include "PreClusterTimingGraphResolver.h"
 #include "PreClusterDelayCalculator.h"
@@ -14,8 +14,6 @@
 #include "tatum/TimingReporter.hpp"
 #include "tatum/echo_writer.hpp"
 #include "vpr_context.h"
-#include "vtr_math.h"
-#include "SetupGrid.h"
 
 /**********************************/
 /* Global variables in clustering */
@@ -125,12 +123,7 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts,
     }
 }
 
-void free_clustering_data(const t_packer_opts& packer_opts,
-                          t_clustering_data& clustering_data) {
-
-    if (packer_opts.hill_climbing_flag)
-        delete[] clustering_data.hill_climbing_inputs_avail;
-
+void free_clustering_data(t_clustering_data& clustering_data) {
     delete[] clustering_data.unclustered_list_head;
     delete[] clustering_data.memory_pool;
 }
@@ -161,8 +154,7 @@ void print_pack_status_header() {
     VTR_LOG("-------------------   --------------------------   ---------\n");
 }
 
-void print_pack_status(int num_clb,
-                       int tot_num_molecules,
+void print_pack_status(int tot_num_molecules,
                        int num_molecules_processed,
                        int& mols_since_last_print,
                        int device_width,
@@ -178,6 +170,8 @@ void print_pack_status(int num_clb,
 
     int int_molecule_increment = (int)(print_frequency * tot_num_molecules);
 
+    int num_clusters_created = cluster_legalizer.clusters().size();
+
     if (mols_since_last_print == int_molecule_increment) {
         VTR_LOG(
             "%6d/%-6d  %3d%%   "
@@ -186,13 +180,16 @@ void print_pack_status(int num_clb,
             num_molecules_processed,
             tot_num_molecules,
             int_percentage,
-            num_clb,
+            num_clusters_created,
             device_width,
             device_height);
 
         VTR_LOG("\n");
         fflush(stdout);
         mols_since_last_print = 0;
+        // FIXME: This really should not be here. This has nothing to do with
+        //        printing the pack status! Abstract this into the candidate
+        //        selector class.
         if (attraction_groups.num_attraction_groups() > 0) {
             rebuild_attraction_groups(attraction_groups, cluster_legalizer);
         }
@@ -326,8 +323,6 @@ void add_molecule_to_pb_stats_candidates(t_pack_molecule* molecule,
 void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats,
                                const Prepacker& prepacker,
                                t_clustering_data& clustering_data,
-                               std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input,
-                               int& unclustered_list_head_size,
                                int num_molecules) {
     /* Allocates the main data structures used for clustering and properly *
      * initializes them.                                                   */
@@ -335,7 +330,7 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats,
 
     /* alloc and load list of molecules to pack */
     clustering_data.unclustered_list_head = new t_molecule_link[max_molecule_stats.num_used_ext_inputs + 1];
-    unclustered_list_head_size = max_molecule_stats.num_used_ext_inputs + 1;
+    clustering_data.unclustered_list_head_size = max_molecule_stats.num_used_ext_inputs + 1;
 
     for (int i = 0; i <= max_molecule_stats.num_used_ext_inputs; i++) {
         clustering_data.unclustered_list_head[i] = t_molecule_link();
@@ -366,20 +361,6 @@ void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats,
 
         next_ptr++;
     }
-
-    /* load net info */
-    for (AtomNetId net : atom_ctx.nlist.nets()) {
-        AtomPinId driver_pin = atom_ctx.nlist.net_driver(net);
-        AtomBlockId driver_block = atom_ctx.nlist.pin_block(driver_pin);
-
-        for (AtomPinId sink_pin : atom_ctx.nlist.net_sinks(net)) {
-            AtomBlockId sink_block = atom_ctx.nlist.pin_block(sink_pin);
-
-            if (driver_block == sink_block) {
-                net_output_feeds_driving_block_input[net]++;
-            }
-        }
-    }
 }
 
 /*****************************************/
@@ -521,164 +502,6 @@ void update_connection_gain_values(const AtomNetId net_id,
     }
 }
 
-void try_fill_cluster(ClusterLegalizer& cluster_legalizer,
-                      const Prepacker& prepacker,
-                      const t_packer_opts& packer_opts,
-                      t_pack_molecule*& prev_molecule,
-                      t_pack_molecule*& next_molecule,
-                      int& num_same_molecules,
-                      t_cluster_progress_stats& cluster_stats,
-                      int num_clb,
-                      const LegalizationClusterId legalization_cluster_id,
-                      AttractionInfo& attraction_groups,
-                      vtr::vector<LegalizationClusterId, std::vector<AtomNetId>>& clb_inter_blk_nets,
-                      bool allow_unrelated_clustering,
-                      const int& high_fanout_threshold,
-                      const std::unordered_set<AtomNetId>& is_clock,
-                      const std::unordered_set<AtomNetId>& is_global,
-                      const std::shared_ptr<SetupTimingInfo>& timing_info,
-                      e_block_pack_status& block_pack_status,
-                      t_molecule_link* unclustered_list_head,
-                      const int& unclustered_list_head_size,
-                      std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input,
-                      std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types) {
-    const AtomContext& atom_ctx = g_vpr_ctx.atom();
-    const DeviceContext& device_ctx = g_vpr_ctx.device();
-
-    block_pack_status = cluster_legalizer.add_mol_to_cluster(next_molecule,
-                                                             legalization_cluster_id);
-
-    auto blk_id = next_molecule->atom_block_ids[next_molecule->root];
-    VTR_ASSERT(blk_id);
-
-    std::string blk_name = atom_ctx.nlist.block_name(blk_id);
-    const t_model* blk_model = atom_ctx.nlist.block_model(blk_id);
-
-    if (block_pack_status != e_block_pack_status::BLK_PASSED) {
-        if (packer_opts.pack_verbosity > 2) {
-            if (block_pack_status == e_block_pack_status::BLK_FAILED_ROUTE) {
-                VTR_LOG("\tNO_ROUTE: '%s' (%s)", blk_name.c_str(), blk_model->name);
-                VTR_LOGV(next_molecule->pack_pattern, " molecule %s molecule_size %zu",
-                         next_molecule->pack_pattern->name, next_molecule->atom_block_ids.size());
-                VTR_LOG("\n");
-                fflush(stdout);
-            } else if (block_pack_status == e_block_pack_status::BLK_FAILED_FLOORPLANNING) {
-                VTR_LOG("\tFAILED_FLOORPLANNING_CONSTRAINTS_CHECK: '%s' (%s)", blk_name.c_str(), blk_model->name);
-                VTR_LOG("\n");
-            } else {
-                VTR_LOG("\tFAILED_FEASIBILITY_CHECK: '%s' (%s)", blk_name.c_str(), blk_model->name, block_pack_status);
-                VTR_LOGV(next_molecule->pack_pattern, " molecule %s molecule_size %zu",
-                         next_molecule->pack_pattern->name, next_molecule->atom_block_ids.size());
-                VTR_LOG("\n");
-                fflush(stdout);
-            }
-        }
-
-        next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id),
-                                                 attraction_groups,
-                                                 allow_unrelated_clustering,
-                                                 packer_opts.prioritize_transitive_connectivity,
-                                                 packer_opts.transitive_fanout_threshold,
-                                                 packer_opts.feasible_block_array_size,
-                                                 &cluster_stats.num_unrelated_clustering_attempts,
-                                                 prepacker,
-                                                 cluster_legalizer,
-                                                 clb_inter_blk_nets,
-                                                 legalization_cluster_id,
-                                                 packer_opts.pack_verbosity,
-                                                 unclustered_list_head,
-                                                 unclustered_list_head_size,
-                                                 primitive_candidate_block_types);
-        if (prev_molecule == next_molecule) {
-            num_same_molecules++;
-        }
-        return;
-    }
-
-    /* Continue packing by filling smallest cluster */
-    if (packer_opts.pack_verbosity > 2) {
-        VTR_LOG("\tPASSED: '%s' (%s)", blk_name.c_str(), blk_model->name);
-        VTR_LOGV(next_molecule->pack_pattern, " molecule %s molecule_size %zu",
-                 next_molecule->pack_pattern->name, next_molecule->atom_block_ids.size());
-        VTR_LOG("\n");
-    }
-
-    fflush(stdout);
-
-    //Since molecule passed, update num_molecules_processed
-    cluster_stats.num_molecules_processed++;
-    cluster_stats.mols_since_last_print++;
-    print_pack_status(num_clb, cluster_stats.num_molecules,
-                      cluster_stats.num_molecules_processed,
-                      cluster_stats.mols_since_last_print,
-                      device_ctx.grid.width(),
-                      device_ctx.grid.height(),
-                      attraction_groups,
-                      cluster_legalizer);
-
-    update_cluster_stats(next_molecule,
-                         cluster_legalizer,
-                         is_clock,  //Set of all clocks
-                         is_global, //Set of all global signals (currently clocks)
-                         packer_opts.global_clocks, packer_opts.alpha, packer_opts.beta, packer_opts.timing_driven,
-                         packer_opts.connection_driven,
-                         high_fanout_threshold,
-                         *timing_info,
-                         attraction_groups,
-                         net_output_feeds_driving_block_input);
-    cluster_stats.num_unrelated_clustering_attempts = 0;
-
-    if (packer_opts.timing_driven) {
-        cluster_stats.blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */
-    }
-    next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id),
-                                             attraction_groups,
-                                             allow_unrelated_clustering,
-                                             packer_opts.prioritize_transitive_connectivity,
-                                             packer_opts.transitive_fanout_threshold,
-                                             packer_opts.feasible_block_array_size,
-                                             &cluster_stats.num_unrelated_clustering_attempts,
-                                             prepacker,
-                                             cluster_legalizer,
-                                             clb_inter_blk_nets,
-                                             legalization_cluster_id,
-                                             packer_opts.pack_verbosity,
-                                             unclustered_list_head,
-                                             unclustered_list_head_size,
-                                             primitive_candidate_block_types);
-
-    if (prev_molecule == next_molecule) {
-        num_same_molecules++;
-    }
-}
-
-void store_cluster_info_and_free(const t_packer_opts& packer_opts,
-                                 const LegalizationClusterId legalization_cluster_id,
-                                 const t_logical_block_type_ptr logic_block_type,
-                                 const t_pb_type* le_pb_type,
-                                 std::vector<int>& le_count,
-                                 const ClusterLegalizer& cluster_legalizer,
-                                 vtr::vector<LegalizationClusterId, std::vector<AtomNetId>>& clb_inter_blk_nets) {
-    const AtomContext& atom_ctx = g_vpr_ctx.atom();
-
-    /* store info that will be used later in packing from pb_stats and free the rest */
-    t_pb* cur_pb = cluster_legalizer.get_cluster_pb(legalization_cluster_id);
-    t_pb_stats* pb_stats = cur_pb->pb_stats;
-    for (const AtomNetId mnet_id : pb_stats->marked_nets) {
-        int external_terminals = atom_ctx.nlist.net_pins(mnet_id).size() - pb_stats->num_pins_of_net_in_pb[mnet_id];
-        /* Check if external terminals of net is within the fanout limit and that there exists external terminals */
-        if (external_terminals < packer_opts.transitive_fanout_threshold && external_terminals > 0) {
-            clb_inter_blk_nets[legalization_cluster_id].push_back(mnet_id);
-        }
-    }
-
-    // update the data structure holding the LE counts
-    update_le_count(cur_pb, logic_block_type, le_pb_type, le_count);
-
-    //print clustering progress incrementally
-    //print_pack_status(num_clb, num_molecules, num_molecules_processed, mols_since_last_print, device_ctx.grid.width(), device_ctx.grid.height());
-}
-
 /*****************************************/
 void update_timing_gain_values(const AtomNetId net_id,
                                t_pb* cur_pb,
@@ -686,7 +509,7 @@ void update_timing_gain_values(const AtomNetId net_id,
                                enum e_net_relation_to_clustered_block net_relation_to_clustered_block,
                                const SetupTimingInfo& timing_info,
                                const std::unordered_set<AtomNetId>& is_global,
-                               std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input) {
+                               const std::unordered_set<AtomNetId>& net_output_feeds_driving_block_input) {
     /*This function is called when the timing_gain values on the atom net*
      *net_id requires updating.   */
     float timinggain;
@@ -696,7 +519,7 @@ void update_timing_gain_values(const AtomNetId net_id,
     /* Check if this atom net lists its driving atom block twice.  If so, avoid  *
      * double counting this atom block by skipping the first (driving) pin. */
     auto pins = atom_ctx.nlist.net_pins(net_id);
-    if (net_output_feeds_driving_block_input[net_id] != 0)
+    if (net_output_feeds_driving_block_input.count(net_id) != 0)
         pins = atom_ctx.nlist.net_sinks(net_id);
 
     if (net_relation_to_clustered_block == OUTPUT
@@ -747,7 +570,7 @@ void mark_and_update_partial_gain(const AtomNetId net_id,
                                   const SetupTimingInfo& timing_info,
                                   const std::unordered_set<AtomNetId>& is_global,
                                   const int high_fanout_net_threshold,
-                                  std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input) {
+                                  const std::unordered_set<AtomNetId>& net_output_feeds_driving_block_input) {
 
     const AtomContext& atom_ctx = g_vpr_ctx.atom();
     t_pb* cur_pb = atom_ctx.lookup.atom_pb(clustered_blk_id)->parent_pb;
@@ -781,7 +604,7 @@ void mark_and_update_partial_gain(const AtomNetId net_id,
          * If so, avoid double counting by skipping the first (driving) pin. */
 
         auto pins = atom_ctx.nlist.net_pins(net_id);
-        if (net_output_feeds_driving_block_input[net_id] != 0)
+        if (net_output_feeds_driving_block_input.count(net_id) != 0)
             //We implicitly assume here that net_output_feeds_driver_block_input[net_id] is 2
             //(i.e. the net loops back to the block only once)
             pins = atom_ctx.nlist.net_sinks(net_id);
@@ -891,7 +714,7 @@ void update_cluster_stats(const t_pack_molecule* molecule,
                           const int high_fanout_net_threshold,
                           const SetupTimingInfo& timing_info,
                           AttractionInfo& attraction_groups,
-                          std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input) {
+                          const std::unordered_set<AtomNetId>& net_output_feeds_driving_block_input) {
 
     int molecule_size;
     int iblock;
@@ -995,115 +818,6 @@ void update_cluster_stats(const t_pack_molecule* molecule,
     }
 }
 
-void start_new_cluster(ClusterLegalizer& cluster_legalizer,
-                       LegalizationClusterId& legalization_cluster_id,
-                       t_pack_molecule* molecule,
-                       std::map<t_logical_block_type_ptr, size_t>& num_used_type_instances,
-                       const float target_device_utilization,
-                       const t_arch* arch,
-                       const std::string& device_layout_name,
-                       const std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types,
-                       int verbosity,
-                       bool balance_block_type_utilization) {
-
-    const AtomContext& atom_ctx = g_vpr_ctx.atom();
-    DeviceContext& mutable_device_ctx = g_vpr_ctx.mutable_device();
-    const DeviceContext& device_ctx = g_vpr_ctx.mutable_device();
-
-    /* Allocate a dummy initial cluster and load a atom block as a seed and check if it is legal */
-    AtomBlockId root_atom = molecule->atom_block_ids[molecule->root];
-    const std::string& root_atom_name = atom_ctx.nlist.block_name(root_atom);
-    const t_model* root_model = atom_ctx.nlist.block_model(root_atom);
-
-    auto itr = primitive_candidate_block_types.find(root_model);
-    VTR_ASSERT(itr != primitive_candidate_block_types.end());
-    std::vector<t_logical_block_type_ptr> candidate_types = itr->second;
-
-    if (balance_block_type_utilization) {
-        //We sort the candidate types in ascending order by their current utilization.
-        //This means that the packer will prefer to use types with lower utilization.
-        //This is a naive approach to try balancing utilization when multiple types can
-        //support the same primitive(s).
-        std::stable_sort(candidate_types.begin(), candidate_types.end(),
-                         [&](t_logical_block_type_ptr lhs, t_logical_block_type_ptr rhs) {
-                             int lhs_num_instances = 0;
-                             int rhs_num_instances = 0;
-                             // Count number of instances for each type
-                             for (auto type : lhs->equivalent_tiles)
-                                 lhs_num_instances += device_ctx.grid.num_instances(type, -1);
-                             for (auto type : rhs->equivalent_tiles)
-                                 rhs_num_instances += device_ctx.grid.num_instances(type, -1);
-
-                             float lhs_util = vtr::safe_ratio<float>(num_used_type_instances[lhs], lhs_num_instances);
-                             float rhs_util = vtr::safe_ratio<float>(num_used_type_instances[rhs], rhs_num_instances);
-                             //Lower util first
-                             return lhs_util < rhs_util;
-                         });
-    }
-
-    if (verbosity > 2) {
-        VTR_LOG("\tSeed: '%s' (%s)", root_atom_name.c_str(), root_model->name);
-        VTR_LOGV(molecule->pack_pattern, " molecule_type %s molecule_size %zu",
-                 molecule->pack_pattern->name, molecule->atom_block_ids.size());
-        VTR_LOG("\n");
-    }
-
-    //Try packing into each candidate type
-    bool success = false;
-    t_logical_block_type_ptr block_type;
-    LegalizationClusterId new_cluster_id;
-    for (auto type : candidate_types) {
-        //Try packing into each mode
-        e_block_pack_status pack_result = e_block_pack_status::BLK_STATUS_UNDEFINED;
-        for (int j = 0; j < type->pb_graph_head->pb_type->num_modes && !success; j++) {
-            std::tie(pack_result, new_cluster_id) = cluster_legalizer.start_new_cluster(molecule, type, j);
-            success = (pack_result == e_block_pack_status::BLK_PASSED);
-        }
-
-        if (success) {
-            VTR_LOGV(verbosity > 2, "\tPASSED_SEED: Block Type %s\n", type->name.c_str());
-            // If clustering succeeds return the new_cluster_id and type.
-            legalization_cluster_id = new_cluster_id;
-            block_type = type;
-            break;
-        } else {
-            VTR_LOGV(verbosity > 2, "\tFAILED_SEED: Block Type %s\n", type->name.c_str());
-        }
-    }
-
-    if (!success) {
-        //Explored all candidates
-        if (molecule->type == MOLECULE_FORCED_PACK) {
-            VPR_FATAL_ERROR(VPR_ERROR_PACK,
-                            "Can not find any logic block that can implement molecule.\n"
-                            "\tPattern %s %s\n",
-                            molecule->pack_pattern->name,
-                            root_atom_name.c_str());
-        } else {
-            VPR_FATAL_ERROR(VPR_ERROR_PACK,
-                            "Can not find any logic block that can implement molecule.\n"
-                            "\tAtom %s (%s)\n",
-                            root_atom_name.c_str(), root_model->name);
-        }
-    }
-
-    VTR_ASSERT(success);
-
-    //Successfully create cluster
-    num_used_type_instances[block_type]++;
-
-    /* Expand FPGA size if needed */
-    // Check used type instances against the possible equivalent physical locations
-    unsigned int num_instances = 0;
-    for (auto equivalent_tile : block_type->equivalent_tiles) {
-        num_instances += device_ctx.grid.num_instances(equivalent_tile, -1);
-    }
-
-    if (num_used_type_instances[block_type] > num_instances) {
-        mutable_device_ctx.grid = create_device_grid(device_layout_name, arch->grid_layouts, num_used_type_instances, target_device_utilization);
-    }
-}
-
 t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb,
                                            AttractionInfo& attraction_groups,
                                            const enum e_gain_type gain_mode,
@@ -1114,7 +828,7 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb,
                                            bool prioritize_transitive_connectivity,
                                            int transitive_fanout_threshold,
                                            const int feasible_block_array_size,
-                                           std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types) {
+                                           const std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types) {
     /*
      * This routine populates a list of feasible blocks outside the cluster, then returns the best candidate for the cluster.
      * If there are no feasible blocks it returns a nullptr.
@@ -1269,7 +983,7 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb,
                                                          AttractionInfo& attraction_groups,
                                                          const int feasible_block_array_size,
                                                          LegalizationClusterId legalization_cluster_id,
-                                                         std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types) {
+                                                         const std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types) {
     const AtomNetlist& atom_nlist = g_vpr_ctx.atom().nlist;
 
     auto cluster_type = cluster_legalizer.get_cluster_type(legalization_cluster_id);
@@ -1301,7 +1015,7 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb,
         const auto& atom_model = atom_nlist.block_model(atom_id);
         auto itr = primitive_candidate_block_types.find(atom_model);
         VTR_ASSERT(itr != primitive_candidate_block_types.end());
-        std::vector<t_logical_block_type_ptr>& candidate_types = itr->second;
+        const std::vector<t_logical_block_type_ptr>& candidate_types = itr->second;
 
         //Only consider molecules that are unpacked and of the correct type
         if (!cluster_legalizer.is_atom_clustered(atom_id)
@@ -1322,7 +1036,7 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb,
             const auto& atom_model = atom_nlist.block_model(atom_id);
             auto itr = primitive_candidate_block_types.find(atom_model);
             VTR_ASSERT(itr != primitive_candidate_block_types.end());
-            std::vector<t_logical_block_type_ptr>& candidate_types = itr->second;
+            const std::vector<t_logical_block_type_ptr>& candidate_types = itr->second;
 
             //Only consider molecules that are unpacked and of the correct type
             if (!cluster_legalizer.is_atom_clustered(atom_id)
@@ -1353,7 +1067,7 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb,
         const auto& atom_model = atom_nlist.block_model(blk_id);
         auto itr = primitive_candidate_block_types.find(atom_model);
         VTR_ASSERT(itr != primitive_candidate_block_types.end());
-        std::vector<t_logical_block_type_ptr>& candidate_types = itr->second;
+        const std::vector<t_logical_block_type_ptr>& candidate_types = itr->second;
 
         //Only consider molecules that are unpacked and of the correct type
         if (!cluster_legalizer.is_atom_clustered(blk_id)
@@ -1414,7 +1128,7 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb,
                                           int verbosity,
                                           t_molecule_link* unclustered_list_head,
                                           const int& unclustered_list_head_size,
-                                          std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types) {
+                                          const std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types) {
     /* Finds the block with the greatest gain that satisfies the
      * input, clock and capacity constraints of a cluster that are
      * passed in.  If no suitable block is found it returns nullptr.
@@ -1654,6 +1368,26 @@ std::map<const t_model*, std::vector<t_logical_block_type_ptr>> identify_primiti
     return model_candidates;
 }
 
+std::unordered_set<AtomNetId> identify_net_output_feeds_driving_block_input(const AtomNetlist& atom_netlist) {
+    std::unordered_set<AtomNetId> net_output_feeds_driving_block_input;
+
+    for (AtomNetId net : atom_netlist.nets()) {
+        AtomPinId driver_pin = atom_netlist.net_driver(net);
+        AtomBlockId driver_block = atom_netlist.pin_block(driver_pin);
+
+        for (AtomPinId sink_pin : atom_netlist.net_sinks(net)) {
+            AtomBlockId sink_block = atom_netlist.pin_block(sink_pin);
+
+            if (driver_block == sink_block) {
+                net_output_feeds_driving_block_input.insert(net);
+                break;
+            }
+        }
+    }
+
+    return net_output_feeds_driving_block_input;
+}
+
 size_t update_pb_type_count(const t_pb* pb, std::map<t_pb_type*, int>& pb_type_count, size_t depth) {
     size_t max_depth = depth;
 
@@ -1723,7 +1457,7 @@ void print_pb_type_count(const ClusteredNetlist& clb_nlist) {
     VTR_LOG("\n");
 }
 
-t_logical_block_type_ptr identify_logic_block_type(std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types) {
+t_logical_block_type_ptr identify_logic_block_type(const std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types) {
     std::string lut_name = ".names";
 
     for (auto& model : primitive_candidate_block_types) {
@@ -1759,7 +1493,7 @@ t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type) {
     return nullptr;
 }
 
-void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector<int>& le_count) {
+void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::array<int, 3>& le_count) {
     // if this cluster doesn't contain LEs or there
     // are no les in this architecture, ignore it
     if (!logic_block_type || pb->pb_graph_node != logic_block_type->pb_graph_head || !le_pb_type)
@@ -1825,7 +1559,7 @@ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name)
     return false;
 }
 
-void print_le_count(std::vector<int>& le_count, const t_pb_type* le_pb_type) {
+void print_le_count(const std::array<int, 3>& le_count, const t_pb_type* le_pb_type) {
     VTR_LOG("\nLogic Element (%s) detailed count:\n", le_pb_type->name);
     VTR_LOG("  Total number of Logic Elements used : %d\n", le_count[0] + le_count[1] + le_count[2]);
     VTR_LOG("  LEs used for logic and registers    : %d\n", le_count[0]);
diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h
index c794daf066d..b4f4e3a9440 100644
--- a/vpr/src/pack/cluster_util.h
+++ b/vpr/src/pack/cluster_util.h
@@ -1,6 +1,7 @@
 #ifndef CLUSTER_UTIL_H
 #define CLUSTER_UTIL_H
 
+#include <unordered_set>
 #include <vector>
 #include "cluster_legalizer.h"
 #include "pack_types.h"
@@ -63,18 +64,9 @@ struct t_molecule_stats {
     int num_used_ext_outputs = 0; //Number of *used external* output pins across all primitives in molecule
 };
 
-struct t_cluster_progress_stats {
-    int num_molecules = 0;
-    int num_molecules_processed = 0;
-    int mols_since_last_print = 0;
-    int blocks_since_last_analysis = 0;
-    int num_unrelated_clustering_attempts = 0;
-};
-
 /* Useful data structures for creating or modifying clusters */
 struct t_clustering_data {
-    int* hill_climbing_inputs_avail;
-
+    int unclustered_list_head_size = 0;
     /* Keeps a linked list of the unclustered blocks to speed up looking for *                                                                  
      * unclustered blocks with a certain number of *external* inputs.        *
      * [0..lut_size].  Unclustered_list_head[i] points to the head of the    *
@@ -83,16 +75,6 @@ struct t_clustering_data {
 
     //Maintaining a linked list of free molecule data for speed
     t_molecule_link* memory_pool = nullptr;
-
-    /* Does the atom block that drives the output of this atom net also appear as a   *
-     * receiver (input) pin of the atom net? If so, then by how much?
-     *
-     * This is used in the gain routines to avoid double counting the connections from   *
-     * the current cluster to other blocks (hence yielding better clusterings). *
-     * The only time an atom block should connect to the same atom net *
-     * twice is when one connection is an output and the other is an input, *
-     * so this should take care of all multiple connections.                */
-    std::unordered_map<AtomNetId, int> net_output_feeds_driving_block_input;
 };
 
 /***********************************/
@@ -112,8 +94,7 @@ void calc_init_packing_timing(const t_packer_opts& packer_opts,
 /*
  * @brief Free the clustering data structures.
  */
-void free_clustering_data(const t_packer_opts& packer_opts,
-                          t_clustering_data& clustering_data);
+void free_clustering_data(t_clustering_data& clustering_data);
 
 /*
  * @brief Check clustering legality and output it.
@@ -154,8 +135,6 @@ void remove_molecule_from_pb_stats_candidates(t_pack_molecule* molecule,
 void alloc_and_init_clustering(const t_molecule_stats& max_molecule_stats,
                                const Prepacker& prepacker,
                                t_clustering_data& clustering_data,
-                               std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input,
-                               int& unclustered_list_head_size,
                                int num_molecules);
 
 /*
@@ -195,8 +174,7 @@ void print_pack_status_header();
 /*
  * @brief Incrementally print progress updates during clustering.
  */
-void print_pack_status(int num_clb,
-                       int tot_num_molecules,
+void print_pack_status(int tot_num_molecules,
                        int num_molecules_processed,
                        int& mols_since_last_print,
                        int device_width,
@@ -212,42 +190,6 @@ void print_pack_status(int num_clb,
 void rebuild_attraction_groups(AttractionInfo& attraction_groups,
                                const ClusterLegalizer& cluster_legalizer);
 
-/*
- * @brief Try to pack next_molecule into the given cluster. If this succeeds
- *        prepares the next_molecule with a new value to pack next iteration.
- *
- * This method will print the pack status and update the cluster stats.
- */
-void try_fill_cluster(ClusterLegalizer& cluster_legalizer,
-                      const Prepacker& prepacker,
-                      const t_packer_opts& packer_opts,
-                      t_pack_molecule*& prev_molecule,
-                      t_pack_molecule*& next_molecule,
-                      int& num_same_molecules,
-                      t_cluster_progress_stats& cluster_stats,
-                      int num_clb,
-                      const LegalizationClusterId legalization_cluster_id,
-                      AttractionInfo& attraction_groups,
-                      vtr::vector<LegalizationClusterId, std::vector<AtomNetId>>& clb_inter_blk_nets,
-                      bool allow_unrelated_clustering,
-                      const int& high_fanout_threshold,
-                      const std::unordered_set<AtomNetId>& is_clock,
-                      const std::unordered_set<AtomNetId>& is_global,
-                      const std::shared_ptr<SetupTimingInfo>& timing_info,
-                      e_block_pack_status& block_pack_status,
-                      t_molecule_link* unclustered_list_head,
-                      const int& unclustered_list_head_size,
-                      std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input,
-                      std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types);
-
-void store_cluster_info_and_free(const t_packer_opts& packer_opts,
-                                 const LegalizationClusterId clb_index,
-                                 const t_logical_block_type_ptr logic_block_type,
-                                 const t_pb_type* le_pb_type,
-                                 std::vector<int>& le_count,
-                                 const ClusterLegalizer& cluster_legalizer,
-                                 vtr::vector<LegalizationClusterId, std::vector<AtomNetId>>& clb_inter_blk_nets);
-
 void update_connection_gain_values(const AtomNetId net_id,
                                    const AtomBlockId clustered_blk_id,
                                    t_pb* cur_pb,
@@ -260,7 +202,7 @@ void update_timing_gain_values(const AtomNetId net_id,
                                enum e_net_relation_to_clustered_block net_relation_to_clustered_block,
                                const SetupTimingInfo& timing_info,
                                const std::unordered_set<AtomNetId>& is_global,
-                               std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input);
+                               const std::unordered_set<AtomNetId>& net_output_feeds_driving_block_input);
 
 /*
  * @brief Updates the marked data structures, and if gain_flag is GAIN, the gain
@@ -281,7 +223,7 @@ void mark_and_update_partial_gain(const AtomNetId net_id,
                                   const SetupTimingInfo& timing_info,
                                   const std::unordered_set<AtomNetId>& is_global,
                                   const int high_fanout_net_threshold,
-                                  std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input);
+                                  const std::unordered_set<AtomNetId>& net_output_feeds_driving_block_input);
 
 /*
  * @brief Updates the total  gain array to reflect the desired tradeoff between
@@ -309,24 +251,7 @@ void update_cluster_stats(const t_pack_molecule* molecule,
                           const int high_fanout_net_threshold,
                           const SetupTimingInfo& timing_info,
                           AttractionInfo& attraction_groups,
-                          std::unordered_map<AtomNetId, int>& net_output_feeds_driving_block_input);
-
-/*
- * @brief Given a starting seed block, start_new_cluster determines the next
- *        cluster type to use.
- *
- * It expands the FPGA if it cannot find a legal cluster for the atom block
- */
-void start_new_cluster(ClusterLegalizer& cluster_legalizer,
-                       LegalizationClusterId& legalization_cluster_id,
-                       t_pack_molecule* molecule,
-                       std::map<t_logical_block_type_ptr, size_t>& num_used_type_instances,
-                       const float target_device_utilization,
-                       const t_arch* arch,
-                       const std::string& device_layout_name,
-                       const std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types,
-                       int verbosity,
-                       bool balance_block_type_utilization);
+                          const std::unordered_set<AtomNetId>& net_output_feeds_driving_block_input);
 
 /*
  * @brief Get candidate molecule to pack into currently open cluster
@@ -351,7 +276,7 @@ t_pack_molecule* get_highest_gain_molecule(t_pb* cur_pb,
                                            bool prioritize_transitive_connectivity,
                                            int transitive_fanout_threshold,
                                            const int feasible_block_array_size,
-                                           std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types);
+                                           const std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types);
 
 /*
  * @brief Add molecules with strong connectedness to the current cluster to the
@@ -392,7 +317,7 @@ void add_cluster_molecule_candidates_by_attraction_group(t_pb* cur_pb,
                                                          AttractionInfo& attraction_groups,
                                                          const int feasible_block_array_size,
                                                          LegalizationClusterId clb_index,
-                                                         std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types);
+                                                         const std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types);
 
 /*
  * @brief Add molecules based on transitive connections (eg. 2 hops away) with
@@ -421,7 +346,7 @@ t_pack_molecule* get_molecule_for_cluster(t_pb* cur_pb,
                                           int verbosity,
                                           t_molecule_link* unclustered_list_head,
                                           const int& unclustered_list_head_size,
-                                          std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types);
+                                          const std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types);
 
 /*
  * @brief Calculates molecule statistics for a single molecule.
@@ -455,6 +380,12 @@ void load_transitive_fanout_candidates(LegalizationClusterId cluster_index,
 
 std::map<const t_model*, std::vector<t_logical_block_type_ptr>> identify_primitive_candidate_block_types();
 
+/**
+ * @brief Identify which nets in the atom netlist are driven by the same atom
+ *        block that they appear as a receiver (input) pin of.
+ */
+std::unordered_set<AtomNetId> identify_net_output_feeds_driving_block_input(const AtomNetlist& atom_netlist);
+
 /**
  * @brief This function update the pb_type_count data structure by incrementing
  *        the number of used pb_types in the given packed cluster t_pb
@@ -465,7 +396,7 @@ size_t update_pb_type_count(const t_pb* pb, std::map<t_pb_type*, int>& pb_type_c
  * @brief This function updates the le_count data structure from the given
  *        packed cluster.
  */
-void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::vector<int>& le_count);
+void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::array<int, 3>& le_count);
 
 void print_pb_type_count_recurr(t_pb_type* type, size_t max_name_chars, size_t curr_depth, std::map<t_pb_type*, int>& pb_type_count);
 
@@ -478,7 +409,7 @@ void print_pb_type_count(const ClusteredNetlist& clb_nlist);
  * @brief This function identifies the logic block type which is defined by the
  *        block type which has a lut primitive.
  */
-t_logical_block_type_ptr identify_logic_block_type(std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types);
+t_logical_block_type_ptr identify_logic_block_type(const std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types);
 
 /*
  * @brief This function returns the pb_type that is similar to Logic Element (LE)
@@ -499,7 +430,7 @@ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name);
 /*
  * @brief Print the LE count data strurture.
  */
-void print_le_count(std::vector<int>& le_count, const t_pb_type* le_pb_type);
+void print_le_count(const std::array<int, 3>& le_count, const t_pb_type* le_pb_type);
 
 /*
  * @brief Given a pointer to a pb in a cluster, this routine returns a pointer
diff --git a/vpr/src/pack/greedy_clusterer.cpp b/vpr/src/pack/greedy_clusterer.cpp
index 98ca4243625..bd1ef859a48 100644
--- a/vpr/src/pack/greedy_clusterer.cpp
+++ b/vpr/src/pack/greedy_clusterer.cpp
@@ -37,22 +37,29 @@
  */
 
 #include "greedy_clusterer.h"
+#include <array>
+#include <cstdio>
 #include <map>
+#include <string>
 #include <vector>
+#include "SetupGrid.h"
 #include "atom_netlist.h"
 #include "attraction_groups.h"
 #include "cluster_legalizer.h"
 #include "cluster_util.h"
 #include "constraints_report.h"
 #include "greedy_seed_selector.h"
+#include "pack_types.h"
 #include "physical_types.h"
 #include "prepack.h"
+#include "vpr_context.h"
+#include "vtr_math.h"
 #include "vtr_vector.h"
 
 GreedyClusterer::GreedyClusterer(const t_packer_opts& packer_opts,
                                  const t_analysis_opts& analysis_opts,
                                  const AtomNetlist& atom_netlist,
-                                 const t_arch* arch,
+                                 const t_arch& arch,
                                  const t_pack_high_fanout_thresholds& high_fanout_thresholds,
                                  const std::unordered_set<AtomNetId>& is_clock,
                                  const std::unordered_set<AtomNetId>& is_global)
@@ -63,113 +70,61 @@ GreedyClusterer::GreedyClusterer(const t_packer_opts& packer_opts,
           high_fanout_thresholds_(high_fanout_thresholds),
           is_clock_(is_clock),
           is_global_(is_global),
-          primitive_candidate_block_types_(identify_primitive_candidate_block_types()) {}
+          primitive_candidate_block_types_(identify_primitive_candidate_block_types()),
+          log_verbosity_(packer_opts.pack_verbosity),
+          net_output_feeds_driving_block_input_(identify_net_output_feeds_driving_block_input(atom_netlist)) {
+
+}
 
 std::map<t_logical_block_type_ptr, size_t>
 GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer,
                                Prepacker& prepacker,
                                bool allow_unrelated_clustering,
                                bool balance_block_type_utilization,
-                               AttractionInfo& attraction_groups) {
-
-    /* Does the actual work of clustering multiple netlist blocks *
-     * into clusters.                                                  */
-
-    /* Algorithm employed
-     * 1.  Find type that can legally hold block and create cluster with pb info
-     * 2.  Populate started cluster
-     * 3.  Repeat 1 until no more blocks need to be clustered
-     *
-     */
-
-    /* This routine returns a map that details the number of used block type instances.
-     * The bool floorplan_regions_overfull also acts as a return value - it is set to
-     * true when one or more floorplan regions have more blocks assigned to them than
-     * they can fit.
-     */
+                               AttractionInfo& attraction_groups,
+                               DeviceContext& mutable_device_ctx) {
+    // This routine returns a map that details the number of used block type
+    // instances.
+    std::map<t_logical_block_type_ptr, size_t> num_used_type_instances;
 
     /****************************************************************
      * Initialization
      *****************************************************************/
-    t_clustering_data clustering_data;
-    t_cluster_progress_stats cluster_stats;
 
-    //int num_molecules, num_molecules_processed, mols_since_last_print, blocks_since_last_analysis,
-    int num_blocks_hill_added;
-
-    const int verbosity = packer_opts_.pack_verbosity;
-
-    int unclustered_list_head_size;
-    std::unordered_map<AtomNetId, int> net_output_feeds_driving_block_input;
-
-    cluster_stats.num_molecules_processed = 0;
-    cluster_stats.mols_since_last_print = 0;
-
-    std::map<t_logical_block_type_ptr, size_t> num_used_type_instances;
-
-    enum e_block_pack_status block_pack_status;
-
-    t_pack_molecule *next_molecule, *prev_molecule;
-
-    auto& device_ctx = g_vpr_ctx.mutable_device();
+    // The clustering stats holds information used for logging the progress
+    // of the clustering to the user.
+    // Reset the clustering stats in case the clusterer is called multiple times.
+    clustering_stats_ = t_cluster_progress_stats();
+    clustering_stats_.num_molecules = prepacker.get_num_molecules();
 
+    // TODO: Create a ClusteringTimingManager class.
+    //       This code relies on the prepacker, once the prepacker is moved to
+    //       the constructor, this code can also move to the constructor.
     std::shared_ptr<PreClusterDelayCalculator> clustering_delay_calc;
     std::shared_ptr<SetupTimingInfo> timing_info;
+    // Default criticalities set to zero (e.g. if not timing driven)
+    vtr::vector<AtomBlockId, float> atom_criticality(atom_netlist_.blocks().size(), 0.f);
+    if (packer_opts_.timing_driven) {
+        calc_init_packing_timing(packer_opts_, analysis_opts_, prepacker,
+                                 clustering_delay_calc, timing_info, atom_criticality);
+    }
 
-    // this data structure tracks the number of Logic Elements (LEs) used. It is
-    // populated only for architectures which has LEs. The architecture is assumed
-    // to have LEs only iff it has a logic block that contains LUT primitives and is
-    // the first pb_block to have more than one instance from the top of the hierarchy
-    // (All parent pb_block have one instance only and one mode only). Index 0 holds
-    // the number of LEs that are used for both logic (LUTs/adders) and registers.
-    // Index 1 holds the number of LEs that are used for logic (LUTs/adders) only.
-    // Index 2 holds the number of LEs that are used for registers only.
-    std::vector<int> le_count(3, 0);
-
-    int total_clb_num = 0;
+    // Calculate the max molecule stats, which is used for gain calculation.
+    const t_molecule_stats max_molecule_stats = prepacker.calc_max_molecule_stats(atom_netlist_);
 
+    // Initialize the information for the greedy candidate selector.
+    // TODO: Abstract into a candidate selector class.
     /* TODO: This is memory inefficient, fix if causes problems */
     /* Store stats on nets used by packed block, useful for determining transitively connected blocks
      * (eg. [A1, A2, ..]->[B1, B2, ..]->C implies cluster [A1, A2, ...] and C have a weak link) */
     vtr::vector<LegalizationClusterId, std::vector<AtomNetId>> clb_inter_blk_nets(atom_netlist_.blocks().size());
-
-    const t_molecule_stats max_molecule_stats = prepacker.calc_max_molecule_stats(atom_netlist_);
-
-    cluster_stats.num_molecules = prepacker.get_num_molecules();
-
-    if (packer_opts_.hill_climbing_flag) {
-        size_t max_cluster_size = cluster_legalizer.get_max_cluster_size();
-        clustering_data.hill_climbing_inputs_avail = new int[max_cluster_size + 1];
-        for (size_t i = 0; i < max_cluster_size + 1; i++)
-            clustering_data.hill_climbing_inputs_avail[i] = 0;
-    } else {
-        clustering_data.hill_climbing_inputs_avail = nullptr; /* if used, die hard */
-    }
-
-#if 0
-	check_for_duplicate_inputs ();
-#endif
-
+    // FIXME: This should be abstracted into a selector class. This is only used
+    //        for gain calculation and selecting candidate molecules.
+    t_clustering_data clustering_data;
     alloc_and_init_clustering(max_molecule_stats,
                               prepacker,
-                              clustering_data, net_output_feeds_driving_block_input,
-                              unclustered_list_head_size, cluster_stats.num_molecules);
-
-    // find the cluster type that has lut primitives
-    auto logic_block_type = identify_logic_block_type(primitive_candidate_block_types_);
-    // find a LE pb_type within the found logic_block_type
-    auto le_pb_type = identify_le_block_type(logic_block_type);
-
-    cluster_stats.blocks_since_last_analysis = 0;
-    num_blocks_hill_added = 0;
-
-    //Default criticalities set to zero (e.g. if not timing driven)
-    vtr::vector<AtomBlockId, float> atom_criticality(atom_netlist_.blocks().size(), 0.);
-
-    if (packer_opts_.timing_driven) {
-        calc_init_packing_timing(packer_opts_, analysis_opts_, prepacker,
-                                 clustering_delay_calc, timing_info, atom_criticality);
-    }
+                              clustering_data,
+                              clustering_stats_.num_molecules);
 
     // Create the greedy seed selector.
     GreedySeedSelector seed_selector(atom_netlist_,
@@ -179,195 +134,477 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer,
                                      atom_criticality);
 
     // Pick the first seed molecule.
-    t_pack_molecule* istart = seed_selector.get_next_seed(prepacker,
-                                                          cluster_legalizer);
-
-    print_pack_status_header();
+    t_pack_molecule* seed_mol = seed_selector.get_next_seed(prepacker,
+                                                            cluster_legalizer);
 
     /****************************************************************
      * Clustering
      *****************************************************************/
 
-    while (istart != nullptr) {
-        bool is_cluster_legal = false;
+    print_pack_status_header();
+
+    // Continue clustering as long as a valid seed is returned from the seed
+    // selector.
+    while (seed_mol != nullptr) {
+        // Check to ensure that this molecule is unclustered.
+        VTR_ASSERT(!cluster_legalizer.is_mol_clustered(seed_mol));
+
         // The basic algorithm:
         // 1) Try to put all the molecules in that you can without doing the
         //    full intra-lb route. Then do full legalization at the end.
         // 2) If the legalization at the end fails, try again, but this time
         //    do full legalization for each molecule added to the cluster.
-        const ClusterLegalizationStrategy legalization_strategies[] = {ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE,
-                                                                       ClusterLegalizationStrategy::FULL};
-        for (const ClusterLegalizationStrategy strategy : legalization_strategies) {
-            // If the cluster is legal, no need to try a stronger cluster legalizer
-            // mode.
-            if (is_cluster_legal)
-                break;
-            // Set the legalization strategy of the cluster legalizer.
-            cluster_legalizer.set_legalization_strategy(strategy);
-
-            LegalizationClusterId legalization_cluster_id;
-
-            VTR_LOGV(verbosity > 2, "Complex block %d:\n", total_clb_num);
-
-            start_new_cluster(cluster_legalizer,
-                              legalization_cluster_id,
-                              istart,
-                              num_used_type_instances,
-                              packer_opts_.target_device_utilization,
-                              arch_, packer_opts_.device_layout,
-                              primitive_candidate_block_types_,
-                              verbosity,
-                              balance_block_type_utilization);
-
-            //initial molecule in cluster has been processed
-            cluster_stats.num_molecules_processed++;
-            cluster_stats.mols_since_last_print++;
-            print_pack_status(total_clb_num,
-                              cluster_stats.num_molecules,
-                              cluster_stats.num_molecules_processed,
-                              cluster_stats.mols_since_last_print,
-                              device_ctx.grid.width(),
-                              device_ctx.grid.height(),
+
+        // Try to grow a cluster from the seed molecule without doing intra-lb
+        // route for each molecule.
+        LegalizationClusterId new_cluster_id = try_grow_cluster(seed_mol,
+                                        ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE,
+                                        cluster_legalizer,
+                                        prepacker,
+                                        allow_unrelated_clustering,
+                                        balance_block_type_utilization,
+                                        *timing_info,
+                                        clb_inter_blk_nets,
+                                        clustering_data,
+                                        attraction_groups,
+                                        num_used_type_instances,
+                                        mutable_device_ctx);
+
+        if (!new_cluster_id.is_valid()) {
+            // If the previous strategy failed, try to grow the cluster again,
+            // but this time perform full legalization for each molecule added
+            // to the cluster.
+            new_cluster_id = try_grow_cluster(seed_mol,
+                                       ClusterLegalizationStrategy::FULL,
+                                       cluster_legalizer,
+                                       prepacker,
+                                       allow_unrelated_clustering,
+                                       balance_block_type_utilization,
+                                       *timing_info,
+                                       clb_inter_blk_nets,
+                                       clustering_data,
+                                       attraction_groups,
+                                       num_used_type_instances,
+                                       mutable_device_ctx);
+        }
+
+        // Ensure that at the seed was packed successfully.
+        VTR_ASSERT(new_cluster_id.is_valid());
+        VTR_ASSERT(cluster_legalizer.is_mol_clustered(seed_mol));
+
+        // Pick new seed.
+        seed_mol = seed_selector.get_next_seed(prepacker,
+                                               cluster_legalizer);
+    }
+
+    // If this architecture has LE physical block, report its usage.
+    report_le_physical_block_usage(cluster_legalizer);
+
+    // Free the clustering data.
+    // FIXME: This struct should use standard data structures so it does not
+    //        have to be freed like this. This is also specific to the candidate
+    //        gain calculation.
+    free_clustering_data(clustering_data);
+
+    return num_used_type_instances;
+}
+
+LegalizationClusterId GreedyClusterer::try_grow_cluster(
+                                       t_pack_molecule* seed_mol,
+                                       ClusterLegalizationStrategy strategy,
+                                       ClusterLegalizer& cluster_legalizer,
+                                       Prepacker& prepacker,
+                                       bool allow_unrelated_clustering,
+                                       bool balance_block_type_utilization,
+                                       SetupTimingInfo& timing_info,
+                                       vtr::vector<LegalizationClusterId, std::vector<AtomNetId>>& clb_inter_blk_nets,
+                                       t_clustering_data& clustering_data,
+                                       AttractionInfo& attraction_groups,
+                                       std::map<t_logical_block_type_ptr, size_t>& num_used_type_instances,
+                                       DeviceContext& mutable_device_ctx) {
+
+    // Check to ensure that this molecule is unclustered.
+    VTR_ASSERT(!cluster_legalizer.is_mol_clustered(seed_mol));
+
+    // Set the legalization strategy of the cluster legalizer.
+    cluster_legalizer.set_legalization_strategy(strategy);
+
+    // Use the seed to start a new cluster.
+    LegalizationClusterId legalization_cluster_id = start_new_cluster(seed_mol,
+                                                                      cluster_legalizer,
+                                                                      balance_block_type_utilization,
+                                                                      num_used_type_instances,
+                                                                      mutable_device_ctx);
+
+    //initial molecule in cluster has been processed
+    print_pack_status(clustering_stats_.num_molecules,
+                      clustering_stats_.num_molecules_processed,
+                      clustering_stats_.mols_since_last_print,
+                      mutable_device_ctx.grid.width(),
+                      mutable_device_ctx.grid.height(),
+                      attraction_groups,
+                      cluster_legalizer);
+
+    int high_fanout_threshold = high_fanout_thresholds_.get_threshold(cluster_legalizer.get_cluster_type(legalization_cluster_id)->name);
+    update_cluster_stats(seed_mol,
+                         cluster_legalizer,
+                         is_clock_,  //Set of clock nets
+                         is_global_, //Set of global nets (currently all clocks)
+                         packer_opts_.global_clocks,
+                         packer_opts_.alpha, packer_opts_.beta,
+                         packer_opts_.timing_driven, packer_opts_.connection_driven,
+                         high_fanout_threshold,
+                         timing_info,
+                         attraction_groups,
+                         net_output_feeds_driving_block_input_);
+
+    int num_unrelated_clustering_attempts = 0;
+    t_pack_molecule *candidate_mol;
+    candidate_mol = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id),
+                                             attraction_groups,
+                                             allow_unrelated_clustering,
+                                             packer_opts_.prioritize_transitive_connectivity,
+                                             packer_opts_.transitive_fanout_threshold,
+                                             packer_opts_.feasible_block_array_size,
+                                             &num_unrelated_clustering_attempts,
+                                             prepacker,
+                                             cluster_legalizer,
+                                             clb_inter_blk_nets,
+                                             legalization_cluster_id,
+                                             log_verbosity_,
+                                             clustering_data.unclustered_list_head,
+                                             clustering_data.unclustered_list_head_size,
+                                             primitive_candidate_block_types_);
+
+    /*
+     * When attraction groups are created, the purpose is to pack more densely by adding more molecules
+     * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are
+     * not on), the cluster keeps being packed until the get_molecule routines return either a repeated
+     * molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the
+     * cluster until a nullptr is returned. So, the number of repeated molecules allowed is increased to a
+     * large value.
+     */
+    int max_num_repeated_molecules = 1;
+    if (attraction_groups.num_attraction_groups() > 0)
+        max_num_repeated_molecules = attraction_groups_max_repeated_molecules_;
+
+    // Continuously try to cluster candidate molecules into the cluster
+    // until one of the following occurs:
+    //  1) No candidate molecule is proposed.
+    //  2) The same candidate was proposed multiple times.
+    int num_repeated_molecules = 0;
+    while (candidate_mol != nullptr && num_repeated_molecules < max_num_repeated_molecules) {
+        // Try to cluster the candidate molecule into the cluster.
+        bool success = try_add_candidate_mol_to_cluster(candidate_mol,
+                                                        legalization_cluster_id,
+                                                        cluster_legalizer);
+
+        // If the candidate molecule was clustered successfully, update
+        // the cluster stats.
+        if (success) {
+            print_pack_status(clustering_stats_.num_molecules,
+                              clustering_stats_.num_molecules_processed,
+                              clustering_stats_.mols_since_last_print,
+                              mutable_device_ctx.grid.width(),
+                              mutable_device_ctx.grid.height(),
                               attraction_groups,
                               cluster_legalizer);
 
-            VTR_LOGV(verbosity > 2,
-                     "Complex block %d: '%s' (%s) ", total_clb_num,
-                     cluster_legalizer.get_cluster_pb(legalization_cluster_id)->name,
-                     cluster_legalizer.get_cluster_type(legalization_cluster_id)->name.c_str());
-            VTR_LOGV(verbosity > 2, ".");
-            //Progress dot for seed-block
-            fflush(stdout);
-
-            int high_fanout_threshold = high_fanout_thresholds_.get_threshold(cluster_legalizer.get_cluster_type(legalization_cluster_id)->name);
-            update_cluster_stats(istart,
+            update_cluster_stats(candidate_mol,
                                  cluster_legalizer,
-                                 is_clock_,  //Set of clock nets
-                                 is_global_, //Set of global nets (currently all clocks)
+                                 is_clock_,  //Set of all clocks
+                                 is_global_, //Set of all global signals (currently clocks)
                                  packer_opts_.global_clocks,
-                                 packer_opts_.alpha, packer_opts_.beta,
-                                 packer_opts_.timing_driven, packer_opts_.connection_driven,
+                                 packer_opts_.alpha,
+                                 packer_opts_.beta,
+                                 packer_opts_.timing_driven,
+                                 packer_opts_.connection_driven,
                                  high_fanout_threshold,
-                                 *timing_info,
-                                 attraction_groups,
-                                 net_output_feeds_driving_block_input);
-            total_clb_num++;
-
-            if (packer_opts_.timing_driven) {
-                cluster_stats.blocks_since_last_analysis++;
-                /*it doesn't make sense to do a timing analysis here since there*
-                 *is only one atom block clustered it would not change anything      */
-            }
-            cluster_stats.num_unrelated_clustering_attempts = 0;
-            next_molecule = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id),
-                                                     attraction_groups,
-                                                     allow_unrelated_clustering,
-                                                     packer_opts_.prioritize_transitive_connectivity,
-                                                     packer_opts_.transitive_fanout_threshold,
-                                                     packer_opts_.feasible_block_array_size,
-                                                     &cluster_stats.num_unrelated_clustering_attempts,
-                                                     prepacker,
-                                                     cluster_legalizer,
-                                                     clb_inter_blk_nets,
-                                                     legalization_cluster_id,
-                                                     verbosity,
-                                                     clustering_data.unclustered_list_head,
-                                                     unclustered_list_head_size,
-                                                     primitive_candidate_block_types_);
-            prev_molecule = istart;
-
-            /*
-             * When attraction groups are created, the purpose is to pack more densely by adding more molecules
-             * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are
-             * not on), the cluster keeps being packed until the get_molecule routines return either a repeated
-             * molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the
-             * cluster until a nullptr is returned. So, the number of repeated molecules allowed is increased to a
-             * large value.
-             */
-            int max_num_repeated_molecules = 0;
-            if (attraction_groups.num_attraction_groups() > 0) {
-                max_num_repeated_molecules = attraction_groups_max_repeated_molecules_;
-            } else {
-                max_num_repeated_molecules = 1;
-            }
-            int num_repeated_molecules = 0;
-
-            while (next_molecule != nullptr && num_repeated_molecules < max_num_repeated_molecules) {
-                prev_molecule = next_molecule;
-
-                try_fill_cluster(cluster_legalizer,
-                                 prepacker,
-                                 packer_opts_,
-                                 prev_molecule,
-                                 next_molecule,
-                                 num_repeated_molecules,
-                                 cluster_stats,
-                                 total_clb_num,
-                                 legalization_cluster_id,
-                                 attraction_groups,
-                                 clb_inter_blk_nets,
-                                 allow_unrelated_clustering,
-                                 high_fanout_threshold,
-                                 is_clock_,
-                                 is_global_,
                                  timing_info,
-                                 block_pack_status,
-                                 clustering_data.unclustered_list_head,
-                                 unclustered_list_head_size,
-                                 net_output_feeds_driving_block_input,
-                                 primitive_candidate_block_types_);
-            }
-
-            if (strategy == ClusterLegalizationStrategy::FULL) {
-                // If the legalizer fully legalized for every molecule added,
-                // the cluster should be legal.
-                is_cluster_legal = true;
-            } else {
-                // If the legalizer did not check everything for every molecule,
-                // need to check that the full cluster is legal (need to perform
-                // intra-lb routing).
-                is_cluster_legal = cluster_legalizer.check_cluster_legality(legalization_cluster_id);
-            }
-
-            if (is_cluster_legal) {
-                // Pick new seed.
-                istart = seed_selector.get_next_seed(prepacker,
-                                                     cluster_legalizer);
-                // Update cluster stats.
-                if (packer_opts_.timing_driven && num_blocks_hill_added > 0)
-                    cluster_stats.blocks_since_last_analysis += num_blocks_hill_added;
-
-                store_cluster_info_and_free(packer_opts_, legalization_cluster_id, logic_block_type, le_pb_type, le_count, cluster_legalizer, clb_inter_blk_nets);
-                // Since the cluster will no longer be added to beyond this point,
-                // clean the cluster of any data not strictly necessary for
-                // creating the clustered netlist.
-                cluster_legalizer.clean_cluster(legalization_cluster_id);
-            } else {
-                // If the cluster is not legal, requeue used mols.
-                num_used_type_instances[cluster_legalizer.get_cluster_type(legalization_cluster_id)]--;
-                total_clb_num--;
-                // Destroy the illegal cluster.
-                cluster_legalizer.destroy_cluster(legalization_cluster_id);
-                cluster_legalizer.compress();
-            }
+                                 attraction_groups,
+                                 net_output_feeds_driving_block_input_);
+            num_unrelated_clustering_attempts = 0;
         }
+
+        // Get the next candidate molecule.
+        t_pack_molecule* prev_candidate_mol = candidate_mol;
+        candidate_mol = get_molecule_for_cluster(cluster_legalizer.get_cluster_pb(legalization_cluster_id),
+                                                 attraction_groups,
+                                                 allow_unrelated_clustering,
+                                                 packer_opts_.prioritize_transitive_connectivity,
+                                                 packer_opts_.transitive_fanout_threshold,
+                                                 packer_opts_.feasible_block_array_size,
+                                                 &num_unrelated_clustering_attempts,
+                                                 prepacker,
+                                                 cluster_legalizer,
+                                                 clb_inter_blk_nets,
+                                                 legalization_cluster_id,
+                                                 log_verbosity_,
+                                                 clustering_data.unclustered_list_head,
+                                                 clustering_data.unclustered_list_head_size,
+                                                 primitive_candidate_block_types_);
+
+        // If the next candidate molecule is the same as the previous
+        // candidate molecule, increment the number of repreated
+        // molecules counter.
+        if (candidate_mol == prev_candidate_mol)
+            num_repeated_molecules++;
     }
 
-    // if this architecture has LE physical block, report its usage
-    if (le_pb_type) {
-        print_le_count(le_count, le_pb_type);
+    // Ensure that the cluster is legal. When the cluster legalization
+    // strategy is full, it must be legal.
+    if (strategy != ClusterLegalizationStrategy::FULL) {
+        // If the legalizer did not check everything for every molecule,
+        // need to check that the full cluster is legal (need to perform
+        // intra-lb routing).
+        bool is_cluster_legal = cluster_legalizer.check_cluster_legality(legalization_cluster_id);
+
+        if (!is_cluster_legal) {
+            // If the cluster is not legal, undo the cluster.
+            // Update the used type instances.
+            num_used_type_instances[cluster_legalizer.get_cluster_type(legalization_cluster_id)]--;
+            // Destroy the illegal cluster.
+            cluster_legalizer.destroy_cluster(legalization_cluster_id);
+            cluster_legalizer.compress();
+            // Cluster failed to grow.
+            return LegalizationClusterId();
+        }
     }
 
-    // Ensure that we have kept track of the number of clusters correctly.
-    // TODO: The total_clb_num variable could probably just be replaced by
-    //       clusters().size().
-    VTR_ASSERT(cluster_legalizer.clusters().size() == (size_t)total_clb_num);
+    VTR_ASSERT(legalization_cluster_id.is_valid());
+
+    // Legal cluster was created. Store cluster info and clean cluster.
+
+    // store info that will be used later in packing from pb_stats.
+    // FIXME: If this is used for gain, it should be moved into the selector
+    //        class. Perhaps a finalize_cluster_gain method.
+    t_pb* cur_pb = cluster_legalizer.get_cluster_pb(legalization_cluster_id);
+    t_pb_stats* pb_stats = cur_pb->pb_stats;
+    for (const AtomNetId mnet_id : pb_stats->marked_nets) {
+        int external_terminals = atom_netlist_.net_pins(mnet_id).size() - pb_stats->num_pins_of_net_in_pb[mnet_id];
+        // Check if external terminals of net is within the fanout limit and
+        // that there exists external terminals.
+        if (external_terminals < packer_opts_.transitive_fanout_threshold && external_terminals > 0) {
+            clb_inter_blk_nets[legalization_cluster_id].push_back(mnet_id);
+        }
+    }
 
-    // Free the clustering data.
-    // FIXME: This struct should use standard data structures so it does not
-    //        have to be freed like this.
-    free_clustering_data(packer_opts_, clustering_data);
+    // Since the cluster will no longer be added to beyond this point,
+    // clean the cluster of any data not strictly necessary for
+    // creating the clustered netlist.
+    cluster_legalizer.clean_cluster(legalization_cluster_id);
 
-    return num_used_type_instances;
+    // Cluster has been grown successfully.
+    return legalization_cluster_id;
+}
+
+LegalizationClusterId GreedyClusterer::start_new_cluster(
+            t_pack_molecule* seed_mol,
+            ClusterLegalizer& cluster_legalizer,
+            bool balance_block_type_utilization,
+            std::map<t_logical_block_type_ptr, size_t>& num_used_type_instances,
+            DeviceContext& mutable_device_ctx) {
+
+    /* Allocate a dummy initial cluster and load a atom block as a seed and check if it is legal */
+    AtomBlockId root_atom = seed_mol->atom_block_ids[seed_mol->root];
+    const std::string& root_atom_name = atom_netlist_.block_name(root_atom);
+    const t_model* root_model = atom_netlist_.block_model(root_atom);
+
+    auto itr = primitive_candidate_block_types_.find(root_model);
+    VTR_ASSERT(itr != primitive_candidate_block_types_.end());
+    std::vector<t_logical_block_type_ptr> candidate_types = itr->second;
+
+    if (balance_block_type_utilization) {
+        //We sort the candidate types in ascending order by their current utilization.
+        //This means that the packer will prefer to use types with lower utilization.
+        //This is a naive approach to try balancing utilization when multiple types can
+        //support the same primitive(s).
+        std::stable_sort(candidate_types.begin(), candidate_types.end(),
+                         [&](t_logical_block_type_ptr lhs, t_logical_block_type_ptr rhs) {
+                             int lhs_num_instances = 0;
+                             int rhs_num_instances = 0;
+                             // Count number of instances for each type
+                             for (auto type : lhs->equivalent_tiles)
+                                 lhs_num_instances += mutable_device_ctx.grid.num_instances(type, -1);
+                             for (auto type : rhs->equivalent_tiles)
+                                 rhs_num_instances += mutable_device_ctx.grid.num_instances(type, -1);
+
+                             float lhs_util = vtr::safe_ratio<float>(num_used_type_instances[lhs], lhs_num_instances);
+                             float rhs_util = vtr::safe_ratio<float>(num_used_type_instances[rhs], rhs_num_instances);
+                             //Lower util first
+                             return lhs_util < rhs_util;
+                         });
+    }
+
+    if (log_verbosity_ > 2) {
+        VTR_LOG("\tSeed: '%s' (%s)", root_atom_name.c_str(), root_model->name);
+        VTR_LOGV(seed_mol->pack_pattern, " molecule_type %s molecule_size %zu",
+                 seed_mol->pack_pattern->name, seed_mol->atom_block_ids.size());
+        VTR_LOG("\n");
+    }
+
+    //Try packing into each candidate type
+    bool success = false;
+    t_logical_block_type_ptr block_type;
+    LegalizationClusterId new_cluster_id;
+    for (auto type : candidate_types) {
+        //Try packing into each mode
+        e_block_pack_status pack_result = e_block_pack_status::BLK_STATUS_UNDEFINED;
+        for (int j = 0; j < type->pb_graph_head->pb_type->num_modes && !success; j++) {
+            std::tie(pack_result, new_cluster_id) = cluster_legalizer.start_new_cluster(seed_mol, type, j);
+            success = (pack_result == e_block_pack_status::BLK_PASSED);
+        }
+
+        if (success) {
+            VTR_LOGV(log_verbosity_ > 2, "\tPASSED_SEED: Block Type %s\n", type->name.c_str());
+            // If clustering succeeds return the new_cluster_id and type.
+            block_type = type;
+            break;
+        } else {
+            VTR_LOGV(log_verbosity_ > 2, "\tFAILED_SEED: Block Type %s\n", type->name.c_str());
+        }
+    }
+
+    if (!success) {
+        //Explored all candidates
+        if (seed_mol->type == MOLECULE_FORCED_PACK) {
+            VPR_FATAL_ERROR(VPR_ERROR_PACK,
+                            "Can not find any logic block that can implement molecule.\n"
+                            "\tPattern %s %s\n",
+                            seed_mol->pack_pattern->name,
+                            root_atom_name.c_str());
+        } else {
+            VPR_FATAL_ERROR(VPR_ERROR_PACK,
+                            "Can not find any logic block that can implement molecule.\n"
+                            "\tAtom %s (%s)\n",
+                            root_atom_name.c_str(), root_model->name);
+        }
+    }
+
+    VTR_ASSERT(success);
+    VTR_ASSERT(new_cluster_id.is_valid());
+
+    VTR_LOGV(log_verbosity_ > 2,
+             "Complex block %zu: '%s' (%s) ", size_t(new_cluster_id),
+             cluster_legalizer.get_cluster_pb(new_cluster_id)->name,
+             cluster_legalizer.get_cluster_type(new_cluster_id)->name.c_str());
+    VTR_LOGV(log_verbosity_ > 2, ".");
+    //Progress dot for seed-block
+    fflush(stdout);
+
+    // Update the clustering progress stats.
+    clustering_stats_.num_molecules_processed++;
+    clustering_stats_.mols_since_last_print++;
+
+    // TODO: Below may make more sense in its own method.
+
+    // Successfully created cluster
+    num_used_type_instances[block_type]++;
+
+    /* Expand FPGA size if needed */
+    // Check used type instances against the possible equivalent physical locations
+    unsigned int num_instances = 0;
+    for (auto equivalent_tile : block_type->equivalent_tiles) {
+        num_instances += mutable_device_ctx.grid.num_instances(equivalent_tile, -1);
+    }
+
+    if (num_used_type_instances[block_type] > num_instances) {
+        mutable_device_ctx.grid = create_device_grid(packer_opts_.device_layout,
+                                                     arch_.grid_layouts,
+                                                     num_used_type_instances,
+                                                     packer_opts_.target_device_utilization);
+    }
+
+    return new_cluster_id;
+}
+
+bool GreedyClusterer::try_add_candidate_mol_to_cluster(t_pack_molecule* candidate_mol,
+                                                       LegalizationClusterId legalization_cluster_id,
+                                                       ClusterLegalizer& cluster_legalizer) {
+    VTR_ASSERT(candidate_mol != nullptr);
+    VTR_ASSERT(!cluster_legalizer.is_mol_clustered(candidate_mol));
+    VTR_ASSERT(legalization_cluster_id.is_valid());
+
+    e_block_pack_status pack_status = cluster_legalizer.add_mol_to_cluster(candidate_mol,
+                                                                      legalization_cluster_id);
+
+    // Print helpful debugging log messages.
+    if (log_verbosity_ > 2) {
+        switch (pack_status) {
+            case e_block_pack_status::BLK_PASSED:
+                VTR_LOG("\tPassed: ");
+                break;
+            case e_block_pack_status::BLK_FAILED_ROUTE:
+                VTR_LOG("\tNO_ROUTE: ");
+                break;
+            case e_block_pack_status::BLK_FAILED_FLOORPLANNING:
+                VTR_LOG("\tFAILED_FLOORPLANNING_CONSTRAINTS_CHECK: ");
+                break;
+            case e_block_pack_status::BLK_FAILED_FEASIBLE:
+                VTR_LOG("\tFAILED_FEASIBILITY_CHECK: ");
+                break;
+            case e_block_pack_status::BLK_FAILED_NOC_GROUP:
+                VTR_LOG("\tFAILED_NOC_GROUP_CHECK: ");
+                break;
+            default:
+                VPR_FATAL_ERROR(VPR_ERROR_PACK, "Unknown pack status thrown.");
+                break;
+        }
+        // Get the block name and model name
+        AtomBlockId blk_id = candidate_mol->atom_block_ids[candidate_mol->root];
+        VTR_ASSERT(blk_id.is_valid());
+        std::string blk_name = atom_netlist_.block_name(blk_id);
+        const t_model* blk_model = atom_netlist_.block_model(blk_id);
+        VTR_LOG("'%s' (%s)", blk_name.c_str(), blk_model->name);
+        VTR_LOGV(candidate_mol->pack_pattern, " molecule %s molecule_size %zu",
+                 candidate_mol->pack_pattern->name,
+                 candidate_mol->atom_block_ids.size());
+        VTR_LOG("\n");
+        fflush(stdout);
+    }
+
+    // If candidate molecule was successfully added, update the clustering
+    // progress stats.
+    if (pack_status == e_block_pack_status::BLK_PASSED) {
+        clustering_stats_.num_molecules_processed++;
+        clustering_stats_.mols_since_last_print++;
+    }
+
+    return pack_status == e_block_pack_status::BLK_PASSED;
+}
+
+void GreedyClusterer::report_le_physical_block_usage(const ClusterLegalizer& cluster_legalizer) {
+    // find the cluster type that has lut primitives
+    auto logic_block_type = identify_logic_block_type(primitive_candidate_block_types_);
+    // find a LE pb_type within the found logic_block_type
+    auto le_pb_type = identify_le_block_type(logic_block_type);
+
+    // If this architecture does not have an LE physical block, cannot report
+    // its usage.
+    if (le_pb_type == nullptr)
+        return;
+
+    // this data structure tracks the number of Logic Elements (LEs) used. It is
+    // populated only for architectures which has LEs. The architecture is assumed
+    // to have LEs only iff it has a logic block that contains LUT primitives and is
+    // the first pb_block to have more than one instance from the top of the hierarchy
+    // (All parent pb_block have one instance only and one mode only). Index 0 holds
+    // the number of LEs that are used for both logic (LUTs/adders) and registers.
+    // Index 1 holds the number of LEs that are used for logic (LUTs/adders) only.
+    // Index 2 holds the number of LEs that are used for registers only.
+    std::array<int, 3> le_count = {0, 0, 0};
+
+    for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) {
+        // Update the data structure holding the LE counts
+        update_le_count(cluster_legalizer.get_cluster_pb(cluster_id),
+                        logic_block_type,
+                        le_pb_type,
+                        le_count);
+    }
+
+    // if this architecture has LE physical block, report its usage
+    if (le_pb_type) {
+        print_le_count(le_count, le_pb_type);
+    }
 }
 
diff --git a/vpr/src/pack/greedy_clusterer.h b/vpr/src/pack/greedy_clusterer.h
index 816043c91b4..3afbb290f0e 100644
--- a/vpr/src/pack/greedy_clusterer.h
+++ b/vpr/src/pack/greedy_clusterer.h
@@ -10,19 +10,41 @@
 
 #include <map>
 #include <unordered_set>
+#include <vector>
+#include "cluster_legalizer.h"
 #include "physical_types.h"
+#include "vtr_vector.h"
 
 // Forward declarations
 class AtomNetId;
 class AtomNetlist;
 class AttractionInfo;
-class ClusterLegalizer;
+class DeviceContext;
 class Prepacker;
+class SetupTimingInfo;
+class t_pack_high_fanout_thresholds;
+class t_pack_molecule;
 struct t_analysis_opts;
 struct t_clustering_data;
-struct t_pack_high_fanout_thresholds;
 struct t_packer_opts;
 
+/**
+ * @brief Struct to hold statistics on the progress of clustering.
+ *
+ * FIXME: These numbers only ever go up! This is a problem since some clusters
+ *        may be reclustered, leading to double counting. This is only a logging
+ *        bug, but should be thought about.
+ */
+struct t_cluster_progress_stats {
+    // The total number of molecules in the design.
+    int num_molecules = 0;
+    // The number of molecules which have been clustered.
+    int num_molecules_processed = 0;
+    // The number of molecules clustered since the last time the status was
+    // logged.
+    int mols_since_last_print = 0;
+};
+
 /**
  * @brief A clusterer that generates clusters by greedily choosing the clusters
  *        which appear to have the best gain for a given neighbor.
@@ -75,7 +97,7 @@ class GreedyClusterer {
     GreedyClusterer(const t_packer_opts& packer_opts,
                     const t_analysis_opts& analysis_opts,
                     const AtomNetlist& atom_netlist,
-                    const t_arch* arch,
+                    const t_arch& arch,
                     const t_pack_high_fanout_thresholds& high_fanout_thresholds,
                     const std::unordered_set<AtomNetId>& is_clock,
                     const std::unordered_set<AtomNetId>& is_global);
@@ -102,13 +124,16 @@ class GreedyClusterer {
      *              have multiple logical block types to which they can cluster,
      *              e.g. multiple sizes of physical RAMs exist on the chip.
      *  @param attraction_groups
-     *              Information on the attraction groups used during the
      *              clustering process. These are groups of primitives that have
      *              extra attraction to each other; currently they are used to
      *              guide the clusterer when it must cluster some parts of a
      *              design densely due to user placement/floorplanning
      *              constraints. They are created if some floorplan regions are
      *              overfilled after a clustering attempt.
+     *  @param mutable_device_ctx
+     *              The mutable device context. The clusterer will modify the
+     *              device context by potentially increasing the size of the
+     *              device to fit the clustering.
      *
      *  @return num_used_type_instances
      *              The number of used logical blocks of each type by the
@@ -120,9 +145,72 @@ class GreedyClusterer {
                   Prepacker& prepacker,
                   bool allow_unrelated_clustering,
                   bool balance_block_type_utilization,
-                  AttractionInfo& attraction_groups);
+                  AttractionInfo& attraction_groups,
+                  DeviceContext& mutable_device_ctx);
 
 private:
+    /**
+     * @brief Given a seed molecule and a legalization strategy, tries to grow
+     *        a cluster greedily. Will return the ID of the cluster created.
+     *
+     * If the strategy is set to SKIP_INTRA_LB_ROUTE, the cluster will grow
+     * without performing intra-lb route every time a molecule is added to the
+     * cluster. It will perfrom intra-lb route at the end, after all molecules
+     * have been added. If this final intra-lb route fails, the cluster will be
+     * destroyed and an invalid cluster ID will be returned.
+     *
+     * If the strategy is set to FULL, the cluster will grow using the full
+     * legalizer for each molecule added. This cannot fail (assuming the seed
+     * can exist in a cluster), so it will always return a valid cluster ID.
+     */
+    LegalizationClusterId try_grow_cluster(t_pack_molecule* seed_mol,
+                                           ClusterLegalizationStrategy strategy,
+                                           ClusterLegalizer& cluster_legalizer,
+                                           Prepacker& prepacker,
+                                           bool allow_unrelated_clustering,
+                                           bool balance_block_type_utilization,
+                                           SetupTimingInfo& timing_info,
+                                           vtr::vector<LegalizationClusterId, std::vector<AtomNetId>>& clb_inter_blk_nets,
+                                           t_clustering_data& clustering_data,
+                                           AttractionInfo& attraction_groups,
+                                           std::map<t_logical_block_type_ptr, size_t>& num_used_type_instances,
+                                           DeviceContext& mutable_device_ctx);
+
+    /**
+     * @brief Given a seed molecule, starts a new cluster by trying to find a
+     *        good logical block type and mode to put it in. This method cannot
+     *        fail (only crash if the seed cannot be clustered), so should
+     *        always return a valid ID to the cluster created.
+     *
+     * When balance_block_type_utilization is set to true, this method will try
+     * to select less used logical block types if it has the option to in order
+     * to balance logical block type utilization.
+     *
+     * This method will try to grow the device grid if it find thats more
+     * clusters of specific logical block types have been created than the
+     * device can support.
+     */
+    LegalizationClusterId start_new_cluster(t_pack_molecule* seed_mol,
+                                            ClusterLegalizer& cluster_legalizer,
+                                            bool balance_block_type_utilization,
+                                            std::map<t_logical_block_type_ptr, size_t>& num_used_type_instances,
+                                            DeviceContext& mutable_device_ctx);
+
+    /**
+     * @brief Try to add the given candidate molecule to the given cluster.
+     *        Returns true if the molecule was clustered successfully, false
+     *        otherwise.
+     */
+    bool try_add_candidate_mol_to_cluster(t_pack_molecule* candidate_mol,
+                                          LegalizationClusterId legalization_cluster_id,
+                                          ClusterLegalizer& cluster_legalizer);
+
+    /**
+     * @brief Log the physical block usage of the logic element in the
+     *        architecture (if it has one).
+     */
+    void report_le_physical_block_usage(const ClusterLegalizer& cluster_legalizer);
+
     /*
      * When attraction groups are created, the purpose is to pack more densely by adding more molecules
      * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are
@@ -144,7 +232,7 @@ class GreedyClusterer {
     const AtomNetlist& atom_netlist_;
 
     /// @brief The device architecture to cluster onto.
-    const t_arch* arch_ = nullptr;
+    const t_arch& arch_;
 
     /// @brief The high-fanout thresholds per logical block type. Used to ignore
     ///        certain nets when calculating the gain for the next candidate
@@ -158,6 +246,25 @@ class GreedyClusterer {
     const std::unordered_set<AtomNetId>& is_global_;
 
     /// @brief Pre-computed logical block types for each model in the architecture.
-    std::map<const t_model*, std::vector<t_logical_block_type_ptr>> primitive_candidate_block_types_;
+    const std::map<const t_model*, std::vector<t_logical_block_type_ptr>> primitive_candidate_block_types_;
+
+    /// @brief The verbosity of log messages produced by the clusterer.
+    ///
+    /// Numbers larger than 2 will print info on the status of the packing for
+    /// each molecule.
+    const int log_verbosity_;
+
+    /* Does the atom block that drives the output of this atom net also appear as a   *
+     * receiver (input) pin of the atom net?
+     *
+     * This is used in the gain routines to avoid double counting the connections from   *
+     * the current cluster to other blocks (hence yielding better clusterings). *
+     * The only time an atom block should connect to the same atom net *
+     * twice is when one connection is an output and the other is an input, *
+     * so this should take care of all multiple connections.                */
+    const std::unordered_set<AtomNetId> net_output_feeds_driving_block_input_;
+
+    /// @brief The current progress of the clustering. Used for logging.
+    t_cluster_progress_stats clustering_stats_;
 };
 
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index 73970032131..cb27a23e831 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -28,6 +28,9 @@ bool try_pack(t_packer_opts* packer_opts,
               std::vector<t_lb_type_rr_node>* lb_type_rr_graphs) {
     const AtomContext& atom_ctx = g_vpr_ctx.atom();
     const DeviceContext& device_ctx = g_vpr_ctx.device();
+    // The clusterer modifies the device context by increasing the size of the
+    // device if needed.
+    DeviceContext& mutable_device_ctx = g_vpr_ctx.mutable_device();
 
     std::unordered_set<AtomNetId> is_clock, is_global;
     VTR_LOG("Begin packing '%s'.\n", packer_opts->circuit_file_name.c_str());
@@ -113,7 +116,7 @@ bool try_pack(t_packer_opts* packer_opts,
     GreedyClusterer clusterer(*packer_opts,
                               *analysis_opts,
                               atom_ctx.nlist,
-                              arch,
+                              *arch,
                               high_fanout_thresholds,
                               is_clock,
                               is_global);
@@ -127,7 +130,8 @@ bool try_pack(t_packer_opts* packer_opts,
                                                           prepacker,
                                                           allow_unrelated_clustering,
                                                           balance_block_type_util,
-                                                          attraction_groups);
+                                                          attraction_groups,
+                                                          mutable_device_ctx);
 
         //Try to size/find a device
         bool fits_on_device = try_size_device_grid(*arch, num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout);
diff --git a/vpr/src/pack/pack_types.h b/vpr/src/pack/pack_types.h
index 3c587bcb464..95a460751b1 100644
--- a/vpr/src/pack/pack_types.h
+++ b/vpr/src/pack/pack_types.h
@@ -14,6 +14,8 @@
 #include "atom_netlist_fwd.h"
 #include "attraction_groups.h"
 
+struct t_pack_molecule;
+
 /**************************************************************************
  * Packing Algorithm Enumerations
  ***************************************************************************/

From 0f4842dea2becea50b1817d1dd3c3fc1221aa6be Mon Sep 17 00:00:00 2001
From: AlexandreSinger <alex.singer@mail.utoronto.ca>
Date: Mon, 25 Nov 2024 16:40:41 -0500
Subject: [PATCH 2/2] [Packer] Updated Clustering Algorithm Per Vaughn's
 Comments

Changed when the clustering progress stats are updated and printed to
make them more accurate.

Changed how the LE block counts are stored to make the code easier to
read.

Fixed some comments.
---
 vpr/src/pack/cluster_legalizer.h  |  7 +++
 vpr/src/pack/cluster_util.cpp     | 39 ++++++++----
 vpr/src/pack/cluster_util.h       | 12 +++-
 vpr/src/pack/greedy_clusterer.cpp | 98 +++++++++++++++++--------------
 vpr/src/pack/greedy_clusterer.h   | 48 +++++----------
 5 files changed, 111 insertions(+), 93 deletions(-)

diff --git a/vpr/src/pack/cluster_legalizer.h b/vpr/src/pack/cluster_legalizer.h
index 1b0756cce32..61de0587a78 100644
--- a/vpr/src/pack/cluster_legalizer.h
+++ b/vpr/src/pack/cluster_legalizer.h
@@ -421,6 +421,13 @@ class ClusterLegalizer {
         return cluster.pr;
     }
 
+    /// @brief Gets the current number of molecules in the cluster.
+    inline size_t get_num_molecules_in_cluster(LegalizationClusterId cluster_id) const {
+        VTR_ASSERT_SAFE(cluster_id.is_valid() && (size_t)cluster_id < legalization_clusters_.size());
+        const LegalizationCluster& cluster = legalization_clusters_[cluster_id];
+        return cluster.molecules.size();
+    }
+
     /// @brief Gets the ID of the cluster that contains the given atom block.
     inline LegalizationClusterId get_atom_cluster(AtomBlockId blk_id) const {
         VTR_ASSERT_SAFE(blk_id.is_valid() && (size_t)blk_id < atom_cluster_.size());
diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index c2cc00424be..0978817a0ce 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -172,7 +172,8 @@ void print_pack_status(int tot_num_molecules,
 
     int num_clusters_created = cluster_legalizer.clusters().size();
 
-    if (mols_since_last_print == int_molecule_increment) {
+    if (mols_since_last_print >= int_molecule_increment ||
+        num_molecules_processed == tot_num_molecules) {
         VTR_LOG(
             "%6d/%-6d  %3d%%   "
             "%26d   "
@@ -1493,7 +1494,12 @@ t_pb_type* identify_le_block_type(t_logical_block_type_ptr logic_block_type) {
     return nullptr;
 }
 
-void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::array<int, 3>& le_count) {
+void update_le_count(const t_pb* pb,
+                     const t_logical_block_type_ptr logic_block_type,
+                     const t_pb_type* le_pb_type,
+                     int& num_logic_le,
+                     int& num_reg_le,
+                     int& num_logic_and_reg_le) {
     // if this cluster doesn't contain LEs or there
     // are no les in this architecture, ignore it
     if (!logic_block_type || pb->pb_graph_node != logic_block_type->pb_graph_head || !le_pb_type)
@@ -1519,15 +1525,15 @@ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_
         auto has_used_adder = pb_used_for_blif_model(&parent_pb->child_pbs[0][ile], adder);
         auto has_used_ff = pb_used_for_blif_model(&parent_pb->child_pbs[0][ile], ff);
 
-        // First type of LEs: used for logic and registers
         if ((has_used_lut || has_used_adder) && has_used_ff) {
-            le_count[0]++;
-            // Second type of LEs: used for logic only
+            // First type of LEs: used for logic and registers
+            num_logic_and_reg_le++;
         } else if (has_used_lut || has_used_adder) {
-            le_count[1]++;
-            // Third type of LEs: used for registers only
+            // Second type of LEs: used for logic only
+            num_logic_le++;
         } else if (has_used_ff) {
-            le_count[2]++;
+            // Third type of LEs: used for registers only
+            num_reg_le++;
         }
     }
 }
@@ -1559,12 +1565,19 @@ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name)
     return false;
 }
 
-void print_le_count(const std::array<int, 3>& le_count, const t_pb_type* le_pb_type) {
+void print_le_count(int num_logic_le,
+                    int num_reg_le,
+                    int num_logic_and_reg_le,
+                    const t_pb_type* le_pb_type) {
+    VTR_ASSERT(le_pb_type != nullptr);
+
+    int num_total_le = num_logic_and_reg_le + num_logic_le + num_reg_le;
+
     VTR_LOG("\nLogic Element (%s) detailed count:\n", le_pb_type->name);
-    VTR_LOG("  Total number of Logic Elements used : %d\n", le_count[0] + le_count[1] + le_count[2]);
-    VTR_LOG("  LEs used for logic and registers    : %d\n", le_count[0]);
-    VTR_LOG("  LEs used for logic only             : %d\n", le_count[1]);
-    VTR_LOG("  LEs used for registers only         : %d\n\n", le_count[2]);
+    VTR_LOG("  Total number of Logic Elements used : %d\n", num_total_le);
+    VTR_LOG("  LEs used for logic and registers    : %d\n", num_logic_and_reg_le);
+    VTR_LOG("  LEs used for logic only             : %d\n", num_logic_le);
+    VTR_LOG("  LEs used for registers only         : %d\n\n", num_reg_le);
 }
 
 t_pb* get_top_level_pb(t_pb* pb) {
diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h
index b4f4e3a9440..ca54c4046a1 100644
--- a/vpr/src/pack/cluster_util.h
+++ b/vpr/src/pack/cluster_util.h
@@ -396,7 +396,12 @@ size_t update_pb_type_count(const t_pb* pb, std::map<t_pb_type*, int>& pb_type_c
  * @brief This function updates the le_count data structure from the given
  *        packed cluster.
  */
-void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_type, const t_pb_type* le_pb_type, std::array<int, 3>& le_count);
+void update_le_count(const t_pb* pb,
+                     const t_logical_block_type_ptr logic_block_type,
+                     const t_pb_type* le_pb_type,
+                     int& num_logic_le,
+                     int& num_reg_le,
+                     int& num_logic_and_reg_le);
 
 void print_pb_type_count_recurr(t_pb_type* type, size_t max_name_chars, size_t curr_depth, std::map<t_pb_type*, int>& pb_type_count);
 
@@ -430,7 +435,10 @@ bool pb_used_for_blif_model(const t_pb* pb, const std::string& blif_model_name);
 /*
  * @brief Print the LE count data strurture.
  */
-void print_le_count(const std::array<int, 3>& le_count, const t_pb_type* le_pb_type);
+void print_le_count(int num_logic_le,
+                    int num_reg_le,
+                    int num_logic_and_reg_le,
+                    const t_pb_type* le_pb_type);
 
 /*
  * @brief Given a pointer to a pb in a cluster, this routine returns a pointer
diff --git a/vpr/src/pack/greedy_clusterer.cpp b/vpr/src/pack/greedy_clusterer.cpp
index bd1ef859a48..d8612897069 100644
--- a/vpr/src/pack/greedy_clusterer.cpp
+++ b/vpr/src/pack/greedy_clusterer.cpp
@@ -37,7 +37,6 @@
  */
 
 #include "greedy_clusterer.h"
-#include <array>
 #include <cstdio>
 #include <map>
 #include <string>
@@ -56,6 +55,23 @@
 #include "vtr_math.h"
 #include "vtr_vector.h"
 
+namespace {
+
+/**
+ * @brief Struct to hold statistics on the progress of clustering.
+ */
+struct t_cluster_progress_stats {
+    // The total number of molecules in the design.
+    int num_molecules = 0;
+    // The number of molecules which have been clustered.
+    int num_molecules_processed = 0;
+    // The number of molecules clustered since the last time the status was
+    // logged.
+    int mols_since_last_print = 0;
+};
+
+} // namespace
+
 GreedyClusterer::GreedyClusterer(const t_packer_opts& packer_opts,
                                  const t_analysis_opts& analysis_opts,
                                  const AtomNetlist& atom_netlist,
@@ -93,9 +109,8 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer,
 
     // The clustering stats holds information used for logging the progress
     // of the clustering to the user.
-    // Reset the clustering stats in case the clusterer is called multiple times.
-    clustering_stats_ = t_cluster_progress_stats();
-    clustering_stats_.num_molecules = prepacker.get_num_molecules();
+    t_cluster_progress_stats clustering_stats;
+    clustering_stats.num_molecules = prepacker.get_num_molecules();
 
     // TODO: Create a ClusteringTimingManager class.
     //       This code relies on the prepacker, once the prepacker is moved to
@@ -124,7 +139,7 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer,
     alloc_and_init_clustering(max_molecule_stats,
                               prepacker,
                               clustering_data,
-                              clustering_stats_.num_molecules);
+                              clustering_stats.num_molecules);
 
     // Create the greedy seed selector.
     GreedySeedSelector seed_selector(atom_netlist_,
@@ -156,7 +171,8 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer,
         //    do full legalization for each molecule added to the cluster.
 
         // Try to grow a cluster from the seed molecule without doing intra-lb
-        // route for each molecule.
+        // route for each molecule (i.e. just use faster but not fully
+        // conservative legality checks).
         LegalizationClusterId new_cluster_id = try_grow_cluster(seed_mol,
                                         ClusterLegalizationStrategy::SKIP_INTRA_LB_ROUTE,
                                         cluster_legalizer,
@@ -192,6 +208,21 @@ GreedyClusterer::do_clustering(ClusterLegalizer& cluster_legalizer,
         VTR_ASSERT(new_cluster_id.is_valid());
         VTR_ASSERT(cluster_legalizer.is_mol_clustered(seed_mol));
 
+        // Update the clustering progress stats.
+        size_t num_molecules_in_cluster = cluster_legalizer.get_num_molecules_in_cluster(new_cluster_id);
+        clustering_stats.num_molecules_processed += num_molecules_in_cluster;
+        clustering_stats.mols_since_last_print += num_molecules_in_cluster;
+
+        // Print the current progress of the packing after a cluster has been
+        // successfully created.
+        print_pack_status(clustering_stats.num_molecules,
+                          clustering_stats.num_molecules_processed,
+                          clustering_stats.mols_since_last_print,
+                          mutable_device_ctx.grid.width(),
+                          mutable_device_ctx.grid.height(),
+                          attraction_groups,
+                          cluster_legalizer);
+
         // Pick new seed.
         seed_mol = seed_selector.get_next_seed(prepacker,
                                                cluster_legalizer);
@@ -236,15 +267,6 @@ LegalizationClusterId GreedyClusterer::try_grow_cluster(
                                                                       num_used_type_instances,
                                                                       mutable_device_ctx);
 
-    //initial molecule in cluster has been processed
-    print_pack_status(clustering_stats_.num_molecules,
-                      clustering_stats_.num_molecules_processed,
-                      clustering_stats_.mols_since_last_print,
-                      mutable_device_ctx.grid.width(),
-                      mutable_device_ctx.grid.height(),
-                      attraction_groups,
-                      cluster_legalizer);
-
     int high_fanout_threshold = high_fanout_thresholds_.get_threshold(cluster_legalizer.get_cluster_type(legalization_cluster_id)->name);
     update_cluster_stats(seed_mol,
                          cluster_legalizer,
@@ -302,14 +324,6 @@ LegalizationClusterId GreedyClusterer::try_grow_cluster(
         // If the candidate molecule was clustered successfully, update
         // the cluster stats.
         if (success) {
-            print_pack_status(clustering_stats_.num_molecules,
-                              clustering_stats_.num_molecules_processed,
-                              clustering_stats_.mols_since_last_print,
-                              mutable_device_ctx.grid.width(),
-                              mutable_device_ctx.grid.height(),
-                              attraction_groups,
-                              cluster_legalizer);
-
             update_cluster_stats(candidate_mol,
                                  cluster_legalizer,
                                  is_clock_,  //Set of all clocks
@@ -492,10 +506,6 @@ LegalizationClusterId GreedyClusterer::start_new_cluster(
     //Progress dot for seed-block
     fflush(stdout);
 
-    // Update the clustering progress stats.
-    clustering_stats_.num_molecules_processed++;
-    clustering_stats_.mols_since_last_print++;
-
     // TODO: Below may make more sense in its own method.
 
     // Successfully created cluster
@@ -563,13 +573,6 @@ bool GreedyClusterer::try_add_candidate_mol_to_cluster(t_pack_molecule* candidat
         fflush(stdout);
     }
 
-    // If candidate molecule was successfully added, update the clustering
-    // progress stats.
-    if (pack_status == e_block_pack_status::BLK_PASSED) {
-        clustering_stats_.num_molecules_processed++;
-        clustering_stats_.mols_since_last_print++;
-    }
-
     return pack_status == e_block_pack_status::BLK_PASSED;
 }
 
@@ -584,27 +587,32 @@ void GreedyClusterer::report_le_physical_block_usage(const ClusterLegalizer& clu
     if (le_pb_type == nullptr)
         return;
 
-    // this data structure tracks the number of Logic Elements (LEs) used. It is
-    // populated only for architectures which has LEs. The architecture is assumed
-    // to have LEs only iff it has a logic block that contains LUT primitives and is
-    // the first pb_block to have more than one instance from the top of the hierarchy
-    // (All parent pb_block have one instance only and one mode only). Index 0 holds
-    // the number of LEs that are used for both logic (LUTs/adders) and registers.
-    // Index 1 holds the number of LEs that are used for logic (LUTs/adders) only.
-    // Index 2 holds the number of LEs that are used for registers only.
-    std::array<int, 3> le_count = {0, 0, 0};
+    // Track the number of Logic Elements (LEs) used. This is populated only for
+    // architectures which has LEs. The architecture is assumed to have LEs iff
+    // it has a logic block that contains LUT primitives and is the first
+    // pb_block to have more than one instance from the top of the hierarchy
+    // (All parent pb_block have one instance only and one mode only).
+
+    // The number of LEs that are used for logic (LUTs/adders) only.
+    int num_logic_le = 0;
+    // The number of LEs that are used for registers only.
+    int num_reg_le = 0;
+    // The number of LEs that are used for both logic (LUTs/adders) and registers.
+    int num_logic_and_reg_le = 0;
 
     for (LegalizationClusterId cluster_id : cluster_legalizer.clusters()) {
         // Update the data structure holding the LE counts
         update_le_count(cluster_legalizer.get_cluster_pb(cluster_id),
                         logic_block_type,
                         le_pb_type,
-                        le_count);
+                        num_logic_le,
+                        num_reg_le,
+                        num_logic_and_reg_le);
     }
 
     // if this architecture has LE physical block, report its usage
     if (le_pb_type) {
-        print_le_count(le_count, le_pb_type);
+        print_le_count(num_logic_le, num_reg_le, num_logic_and_reg_le, le_pb_type);
     }
 }
 
diff --git a/vpr/src/pack/greedy_clusterer.h b/vpr/src/pack/greedy_clusterer.h
index 3afbb290f0e..6df695b3336 100644
--- a/vpr/src/pack/greedy_clusterer.h
+++ b/vpr/src/pack/greedy_clusterer.h
@@ -28,23 +28,6 @@ struct t_analysis_opts;
 struct t_clustering_data;
 struct t_packer_opts;
 
-/**
- * @brief Struct to hold statistics on the progress of clustering.
- *
- * FIXME: These numbers only ever go up! This is a problem since some clusters
- *        may be reclustered, leading to double counting. This is only a logging
- *        bug, but should be thought about.
- */
-struct t_cluster_progress_stats {
-    // The total number of molecules in the design.
-    int num_molecules = 0;
-    // The number of molecules which have been clustered.
-    int num_molecules_processed = 0;
-    // The number of molecules clustered since the last time the status was
-    // logged.
-    int mols_since_last_print = 0;
-};
-
 /**
  * @brief A clusterer that generates clusters by greedily choosing the clusters
  *        which appear to have the best gain for a given neighbor.
@@ -151,7 +134,9 @@ class GreedyClusterer {
 private:
     /**
      * @brief Given a seed molecule and a legalization strategy, tries to grow
-     *        a cluster greedily. Will return the ID of the cluster created.
+     *        a cluster greedily, starting with the provided seed and adding
+     *        whatever other molecules seem beneficial and legal. Will return
+     *        the ID of the cluster created.
      *
      * If the strategy is set to SKIP_INTRA_LB_ROUTE, the cluster will grow
      * without performing intra-lb route every time a molecule is added to the
@@ -179,16 +164,16 @@ class GreedyClusterer {
     /**
      * @brief Given a seed molecule, starts a new cluster by trying to find a
      *        good logical block type and mode to put it in. This method cannot
-     *        fail (only crash if the seed cannot be clustered), so should
+     *        fail (only crash if the seed cannot be clustered), so it should
      *        always return a valid ID to the cluster created.
      *
      * When balance_block_type_utilization is set to true, this method will try
      * to select less used logical block types if it has the option to in order
      * to balance logical block type utilization.
      *
-     * This method will try to grow the device grid if it find thats more
-     * clusters of specific logical block types have been created than the
-     * device can support.
+     * If the device is to be auto-sized, this method will try to grow the
+     * device grid if it find thats more clusters of specific logical block
+     * types have been created than the device can support.
      */
     LegalizationClusterId start_new_cluster(t_pack_molecule* seed_mol,
                                             ClusterLegalizer& cluster_legalizer,
@@ -254,17 +239,14 @@ class GreedyClusterer {
     /// each molecule.
     const int log_verbosity_;
 
-    /* Does the atom block that drives the output of this atom net also appear as a   *
-     * receiver (input) pin of the atom net?
-     *
-     * This is used in the gain routines to avoid double counting the connections from   *
-     * the current cluster to other blocks (hence yielding better clusterings). *
-     * The only time an atom block should connect to the same atom net *
-     * twice is when one connection is an output and the other is an input, *
-     * so this should take care of all multiple connections.                */
+    /// @brief Does the atom block that drives the output of this atom net also
+    /// appear as a receiver (input) pin of the atom net?
+    ///
+    /// This is used in the gain routines to avoid double counting the
+    /// connections from the current cluster to other blocks (hence yielding
+    /// better clusterings). The only time an atom block should connect to the
+    /// same atom net twice is when one connection is an output and the other
+    /// is an input, so this should take care of all multiple connections.
     const std::unordered_set<AtomNetId> net_output_feeds_driving_block_input_;
-
-    /// @brief The current progress of the clustering. Used for logging.
-    t_cluster_progress_stats clustering_stats_;
 };