From f0f2ad55d220e16d489746abaf1ab3df1e0ee8b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fahrican=20Ko=C5=9Far?= <duck2@protonmail.com>
Date: Thu, 5 Oct 2023 21:10:08 -0400
Subject: [PATCH] reorganize router code, add NetlistRouter

also: clip high-fanout BBs by original BBs to avoid
data races in the parallel case
---
 utils/route_diag/src/main.cpp                 |    5 +-
 vpr/src/base/SetupVPR.cpp                     |    1 +
 vpr/src/base/place_and_route.cpp              |   54 +-
 vpr/src/base/read_options.cpp                 |    2 -
 vpr/src/base/vpr_api.cpp                      |   37 +-
 vpr/src/base/vpr_types.h                      |    2 +
 vpr/src/draw/draw.cpp                         |    2 +-
 vpr/src/draw/draw_basic.cpp                   |    2 +-
 vpr/src/draw/draw_rr.h                        |    2 +-
 vpr/src/draw/draw_rr_edges.h                  |    2 +-
 vpr/src/draw/draw_searchbar.h                 |    2 +-
 vpr/src/draw/draw_toggle_functions.h          |    2 +-
 vpr/src/draw/draw_triangle.h                  |    2 +-
 vpr/src/draw/search_bar.cpp                   |    2 +-
 vpr/src/place/place_timing_update.h           |    2 +
 vpr/src/place/timing_place_lookup.cpp         |    2 +-
 vpr/src/route/ParallelNetlistRouter.h         |   93 +
 vpr/src/route/ParallelNetlistRouter.tpp       |  104 +
 vpr/src/route/SerialNetlistRouter.h           |   71 +
 vpr/src/route/SerialNetlistRouter.tpp         |   72 +
 vpr/src/route/channel_stats.cpp               |    2 +-
 vpr/src/route/connection_based_routing.cpp    |    1 -
 vpr/src/route/connection_router.cpp           |  114 +-
 vpr/src/route/connection_router.h             |   10 +-
 vpr/src/route/connection_router_interface.h   |   12 +-
 vpr/src/route/netlist_routers.h               |  168 ++
 vpr/src/route/partition_tree.h                |   18 -
 vpr/src/route/route.cpp                       |  625 +++++
 vpr/src/route/route.h                         |   33 +
 vpr/src/route/route_budgets.cpp               |    1 -
 vpr/src/route/route_common.cpp                |  254 +-
 vpr/src/route/route_common.h                  |   15 +-
 vpr/src/route/route_debug.cpp                 |   30 +
 vpr/src/route/route_debug.h                   |   20 +
 vpr/src/route/route_export.h                  |   41 +-
 vpr/src/route/route_net.cpp                   |  338 +++
 vpr/src/route/route_net.h                     |  119 +
 vpr/src/route/route_net.tpp                   |  478 ++++
 vpr/src/route/route_parallel.cpp              | 1059 --------
 vpr/src/route/route_parallel.h                |   33 -
 vpr/src/route/route_timing.cpp                | 2263 -----------------
 vpr/src/route/route_timing.h                  |  291 ---
 vpr/src/route/route_tree.cpp                  |    4 +-
 .../{route_util.cpp => route_utilization.cpp} |    2 +-
 .../{route_util.h => route_utilization.h}     |    0
 vpr/src/route/route_utils.cpp                 |  536 ++++
 vpr/src/route/route_utils.h                   |  148 ++
 vpr/src/route/router_delay_profiling.cpp      |    5 +-
 vpr/src/route/router_delay_profiling.h        |    1 -
 vpr/src/route/router_lookahead.cpp            |    2 +-
 .../route/router_lookahead_extended_map.cpp   |    3 +-
 vpr/src/route/router_lookahead_map.cpp        |    2 +-
 vpr/src/route/router_lookahead_map_utils.cpp  |    8 +-
 vpr/src/route/router_stats.h                  |   24 +
 vpr/test/test_connection_router.cpp           |    4 +-
 vtr_flow/scripts/python_libs/vtr/task.py      |    8 +-
 vtr_flow/scripts/python_libs/vtr/util.py      |    9 +-
 57 files changed, 3028 insertions(+), 4114 deletions(-)
 create mode 100644 vpr/src/route/ParallelNetlistRouter.h
 create mode 100644 vpr/src/route/ParallelNetlistRouter.tpp
 create mode 100644 vpr/src/route/SerialNetlistRouter.h
 create mode 100644 vpr/src/route/SerialNetlistRouter.tpp
 create mode 100644 vpr/src/route/netlist_routers.h
 create mode 100644 vpr/src/route/route.cpp
 create mode 100644 vpr/src/route/route.h
 create mode 100644 vpr/src/route/route_debug.cpp
 create mode 100644 vpr/src/route/route_debug.h
 create mode 100644 vpr/src/route/route_net.cpp
 create mode 100644 vpr/src/route/route_net.h
 create mode 100644 vpr/src/route/route_net.tpp
 delete mode 100644 vpr/src/route/route_parallel.cpp
 delete mode 100644 vpr/src/route/route_parallel.h
 delete mode 100644 vpr/src/route/route_timing.cpp
 delete mode 100644 vpr/src/route/route_timing.h
 rename vpr/src/route/{route_util.cpp => route_utilization.cpp} (99%)
 rename vpr/src/route/{route_util.h => route_utilization.h} (100%)
 create mode 100644 vpr/src/route/route_utils.cpp
 create mode 100644 vpr/src/route/route_utils.h

diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp
index 0cf1c901d23..d322890e8ec 100644
--- a/utils/route_diag/src/main.cpp
+++ b/utils/route_diag/src/main.cpp
@@ -33,7 +33,7 @@
 #include "router_delay_profiling.h"
 #include "route_tree.h"
 #include "route_common.h"
-#include "route_timing.h"
+#include "route_net.h"
 #include "route_export.h"
 #include "rr_graph.h"
 #include "rr_graph2.h"
@@ -124,8 +124,7 @@ static void do_one_route(const Netlist<>& net_list,
                                                                                                     cost_params,
                                                                                                     bounding_box,
                                                                                                     router_stats,
-                                                                                                    conn_params,
-                                                                                                    true);
+                                                                                                    conn_params);
 
     if (found_path) {
         VTR_ASSERT(cheapest.index == sink_node);
diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index e596bd51c43..eecec4d39ce 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -481,6 +481,7 @@ static void SetupRouterOpts(const t_options& Options, t_router_opts* RouterOpts)
     RouterOpts->generate_rr_node_overuse_report = Options.generate_rr_node_overuse_report;
     RouterOpts->flat_routing = Options.flat_routing;
     RouterOpts->has_choking_spot = Options.has_choking_spot;
+    RouterOpts->with_timing_analysis = Options.timing_analysis;
 }
 
 static void SetupAnnealSched(const t_options& Options,
diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp
index c34cd9cfbae..b1916852a34 100644
--- a/vpr/src/base/place_and_route.cpp
+++ b/vpr/src/base/place_and_route.cpp
@@ -20,6 +20,7 @@
 #include "place.h"
 #include "read_place.h"
 #include "read_route.h"
+#include "route.h"
 #include "route_export.h"
 #include "draw.h"
 #include "stats.h"
@@ -191,19 +192,19 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
                       arch->num_directs,
                       false);
         }
-        success = try_route(router_net_list,
-                            current,
-                            router_opts,
-                            analysis_opts,
-                            det_routing_arch, segment_inf,
-                            net_delay,
-                            timing_info,
-                            delay_calc,
-                            arch->Chans,
-                            arch->Directs,
-                            arch->num_directs,
-                            (attempt_count == 0) ? ScreenUpdatePriority::MAJOR : ScreenUpdatePriority::MINOR,
-                            is_flat);
+        success = route(router_net_list,
+                        current,
+                        router_opts,
+                        analysis_opts,
+                        det_routing_arch, segment_inf,
+                        net_delay,
+                        timing_info,
+                        delay_calc,
+                        arch->Chans,
+                        arch->Directs,
+                        arch->num_directs,
+                        (attempt_count == 0) ? ScreenUpdatePriority::MAJOR : ScreenUpdatePriority::MINOR,
+                        is_flat);
 
         attempt_count++;
         fflush(stdout);
@@ -331,19 +332,20 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
                           false);
             }
 
-            success = try_route(router_net_list,
-                                current,
-                                router_opts,
-                                analysis_opts,
-                                det_routing_arch, segment_inf,
-                                net_delay,
-                                timing_info,
-                                delay_calc,
-                                arch->Chans,
-                                arch->Directs,
-                                arch->num_directs,
-                                ScreenUpdatePriority::MINOR,
-                                is_flat);
+            success = route(router_net_list,
+                            current,
+                            router_opts,
+                            analysis_opts,
+                            det_routing_arch,
+                            segment_inf,
+                            net_delay,
+                            timing_info,
+                            delay_calc,
+                            arch->Chans,
+                            arch->Directs,
+                            arch->num_directs,
+                            ScreenUpdatePriority::MINOR,
+                            is_flat);
 
             if (success && Fc_clipped == false) {
                 final = current;
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 32929c4fc9a..0764b76f947 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2986,8 +2986,6 @@ void set_conditional_defaults(t_options& args) {
      */
     //Base cost type
     if (args.base_cost_type.provenance() != Provenance::SPECIFIED) {
-        VTR_ASSERT(args.RouterAlgorithm == TIMING_DRIVEN || args.RouterAlgorithm == PARALLEL);
-
         if (args.RouteType == DETAILED) {
             if (args.timing_analysis) {
                 args.base_cost_type.set(DELAY_NORMALIZED_LENGTH, Provenance::INFERRED);
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index 65519d5775f..1e4684ae683 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -53,6 +53,7 @@
 #include "pb_type_graph.h"
 #include "route_common.h"
 #include "timing_place_lookup.h"
+#include "route.h"
 #include "route_export.h"
 #include "vpr_api.h"
 #include "read_sdc.h"
@@ -61,9 +62,9 @@
 #include "lb_type_rr_graph.h"
 #include "read_activity.h"
 #include "net_delay.h"
-#include "AnalysisDelayCalculator.h"
 #include "concrete_timing_info.h"
 #include "netlist_writer.h"
+#include "AnalysisDelayCalculator.h"
 #include "RoutingDelayCalculator.h"
 #include "check_route.h"
 #include "constant_nets.h"
@@ -367,7 +368,6 @@ bool vpr_flow(t_vpr_setup& vpr_setup, t_arch& arch) {
     }
 
 #ifdef VPR_USE_TBB
-
     /* Set this here, because tbb::global_control doesn't control anything once it's out of scope
      * (contrary to the name). */
     tbb::global_control c(tbb::global_control::max_allowed_parallelism, vpr_setup.num_workers);
@@ -805,10 +805,11 @@ RouteStatus vpr_route_flow(const Netlist<>& net_list,
         std::shared_ptr<RoutingDelayCalculator> routing_delay_calc = nullptr;
         if (vpr_setup.Timing.timing_analysis_enabled) {
             auto& atom_ctx = g_vpr_ctx.atom();
-
             routing_delay_calc = std::make_shared<RoutingDelayCalculator>(atom_ctx.nlist, atom_ctx.lookup, net_delay, is_flat);
-
             timing_info = make_setup_hold_timing_info(routing_delay_calc, router_opts.timing_update_type);
+        } else {
+            /* No delay calculator (segfault if the code calls into it) and wirelength driven routing */
+            timing_info = make_constant_timing_info(0);
         }
 
         if (router_opts.doRouting == STAGE_DO) {
@@ -922,20 +923,20 @@ RouteStatus vpr_route_fixed_W(const Netlist<>& net_list,
         VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Fixed channel width must be specified when routing at fixed channel width (was %d)", fixed_channel_width);
     }
     bool status = false;
-    status = try_route(net_list,
-                       fixed_channel_width,
-                       vpr_setup.RouterOpts,
-                       vpr_setup.AnalysisOpts,
-                       &vpr_setup.RoutingArch,
-                       vpr_setup.Segments,
-                       net_delay,
-                       timing_info,
-                       delay_calc,
-                       arch.Chans,
-                       arch.Directs,
-                       arch.num_directs,
-                       ScreenUpdatePriority::MAJOR,
-                       is_flat);
+    status = route(net_list,
+                   fixed_channel_width,
+                   vpr_setup.RouterOpts,
+                   vpr_setup.AnalysisOpts,
+                   &vpr_setup.RoutingArch,
+                   vpr_setup.Segments,
+                   net_delay,
+                   timing_info,
+                   delay_calc,
+                   arch.Chans,
+                   arch.Directs,
+                   arch.num_directs,
+                   ScreenUpdatePriority::MAJOR,
+                   is_flat);
 
     return RouteStatus(status, fixed_channel_width);
 }
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 2784c5e63da..438b81086cc 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1449,6 +1449,8 @@ struct t_router_opts {
     bool flat_routing;
     bool has_choking_spot;
 
+    bool with_timing_analysis;
+
     // Options related to rr_node reordering, for testing and possible cache optimization
     e_rr_node_reorder_algorithm reorder_rr_graph_nodes_algorithm = DONT_REORDER;
     int reorder_rr_graph_nodes_threshold = 0;
diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp
index d4e22cd3c95..7f66de9a951 100644
--- a/vpr/src/draw/draw.cpp
+++ b/vpr/src/draw/draw.cpp
@@ -86,7 +86,7 @@
 #    endif
 
 #    include "rr_graph.h"
-#    include "route_util.h"
+#    include "route_utilization.h"
 #    include "place_macro.h"
 #    include "buttons.h"
 #    include "draw_rr.h"
diff --git a/vpr/src/draw/draw_basic.cpp b/vpr/src/draw/draw_basic.cpp
index e35ebcef0a6..e3f8c2adcab 100644
--- a/vpr/src/draw/draw_basic.cpp
+++ b/vpr/src/draw/draw_basic.cpp
@@ -60,7 +60,7 @@
 #    endif
 
 #    include "rr_graph.h"
-#    include "route_util.h"
+#    include "route_utilization.h"
 #    include "place_macro.h"
 #    include "buttons.h"
 
diff --git a/vpr/src/draw/draw_rr.h b/vpr/src/draw/draw_rr.h
index 49e0949271b..7ed40ffd61b 100644
--- a/vpr/src/draw/draw_rr.h
+++ b/vpr/src/draw/draw_rr.h
@@ -43,7 +43,7 @@
 #    include "manual_moves.h"
 
 #    include "rr_graph.h"
-#    include "route_util.h"
+#    include "route_utilization.h"
 #    include "place_macro.h"
 #    include "buttons.h"
 
diff --git a/vpr/src/draw/draw_rr_edges.h b/vpr/src/draw/draw_rr_edges.h
index 81077e22d01..9068b31b4fe 100644
--- a/vpr/src/draw/draw_rr_edges.h
+++ b/vpr/src/draw/draw_rr_edges.h
@@ -43,7 +43,7 @@
 #    include "manual_moves.h"
 
 #    include "rr_graph.h"
-#    include "route_util.h"
+#    include "route_utilization.h"
 #    include "place_macro.h"
 #    include "buttons.h"
 
diff --git a/vpr/src/draw/draw_searchbar.h b/vpr/src/draw/draw_searchbar.h
index 6fc1092afd3..e4dade9bb58 100644
--- a/vpr/src/draw/draw_searchbar.h
+++ b/vpr/src/draw/draw_searchbar.h
@@ -44,7 +44,7 @@
 #    include "manual_moves.h"
 
 #    include "rr_graph.h"
-#    include "route_util.h"
+#    include "route_utilization.h"
 #    include "place_macro.h"
 #    include "buttons.h"
 
diff --git a/vpr/src/draw/draw_toggle_functions.h b/vpr/src/draw/draw_toggle_functions.h
index 6c256cc2310..7b8330396b7 100644
--- a/vpr/src/draw/draw_toggle_functions.h
+++ b/vpr/src/draw/draw_toggle_functions.h
@@ -49,7 +49,7 @@
 #    include "manual_moves.h"
 
 #    include "rr_graph.h"
-#    include "route_util.h"
+#    include "route_utilization.h"
 #    include "place_macro.h"
 #    include "buttons.h"
 
diff --git a/vpr/src/draw/draw_triangle.h b/vpr/src/draw/draw_triangle.h
index 51eab896244..0ddf12218ec 100644
--- a/vpr/src/draw/draw_triangle.h
+++ b/vpr/src/draw/draw_triangle.h
@@ -44,7 +44,7 @@
 #    include "manual_moves.h"
 
 #    include "rr_graph.h"
-#    include "route_util.h"
+#    include "route_utilization.h"
 #    include "place_macro.h"
 #    include "buttons.h"
 
diff --git a/vpr/src/draw/search_bar.cpp b/vpr/src/draw/search_bar.cpp
index 590e4981d61..bd7e160d4ad 100644
--- a/vpr/src/draw/search_bar.cpp
+++ b/vpr/src/draw/search_bar.cpp
@@ -59,7 +59,7 @@
 #    endif
 
 #    include "rr_graph.h"
-#    include "route_util.h"
+#    include "route_utilization.h"
 #    include "place_macro.h"
 
 extern std::string rr_highlight_message;
diff --git a/vpr/src/place/place_timing_update.h b/vpr/src/place/place_timing_update.h
index 4ff180002bf..67fca81b3ee 100644
--- a/vpr/src/place/place_timing_update.h
+++ b/vpr/src/place/place_timing_update.h
@@ -7,6 +7,8 @@
 #include "timing_place.h"
 #include "place_util.h"
 
+#include "NetPinTimingInvalidator.h"
+
 ///@brief Initialize the timing information and structures in the placer.
 void initialize_timing_info(const PlaceCritParams& crit_params,
                             const PlaceDelayModel* delay_model,
diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp
index fba8b1e9c46..543376201be 100644
--- a/vpr/src/place/timing_place_lookup.cpp
+++ b/vpr/src/place/timing_place_lookup.cpp
@@ -20,7 +20,7 @@
 #include "globals.h"
 #include "place_and_route.h"
 #include "route_common.h"
-#include "route_timing.h"
+#include "route_net.h"
 #include "route_export.h"
 #include "rr_graph.h"
 #include "timing_place_lookup.h"
diff --git a/vpr/src/route/ParallelNetlistRouter.h b/vpr/src/route/ParallelNetlistRouter.h
new file mode 100644
index 00000000000..e562da15627
--- /dev/null
+++ b/vpr/src/route/ParallelNetlistRouter.h
@@ -0,0 +1,93 @@
+#pragma once
+
+/** @file Parallel case for NetlistRouter. Builds a \ref PartitionTree from the
+ * netlist according to net bounding boxes. Tree nodes are then routed in parallel
+ * using tbb::task_group. Each task routes the nets inside a node serially and then adds
+ * its child nodes to the task queue. This approach is serially equivalent & deterministic,
+ * but it can reduce QoR in congested cases [0].
+ *
+ * Note that the parallel router does not support graphical router breakpoints.
+ *
+ * [0]: F. Koşar, "A net-decomposing parallel FPGA router", MS thesis, UofT ECE, 2023 */
+#include "netlist_routers.h"
+
+#include <tbb/task_group.h>
+
+/** Parallel impl for NetlistRouter.
+ * Holds enough context members to glue together ConnectionRouter and net routing functions,
+ * such as \ref route_net. Keeps the members in thread-local storage where needed,
+ * i.e. ConnectionRouters and RouteIterResults-es.
+ * See \ref route_net. */
+template<typename HeapType>
+class ParallelNetlistRouter : public NetlistRouter {
+  public:
+    ParallelNetlistRouter(
+        const Netlist<>& net_list,
+        const RouterLookahead* router_lookahead,
+        const t_router_opts& router_opts,
+        CBRR& connections_inf,
+        NetPinsMatrix<float>& net_delay,
+        const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+        std::shared_ptr<SetupHoldTimingInfo> timing_info,
+        NetPinTimingInvalidator* pin_timing_invalidator,
+        route_budgets& budgeting_inf,
+        const RoutingPredictor& routing_predictor,
+        const vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>>& choking_spots,
+        bool is_flat)
+        : _routers_th(_make_router(router_lookahead, is_flat))
+        , _net_list(net_list)
+        , _router_opts(router_opts)
+        , _connections_inf(connections_inf)
+        , _net_delay(net_delay)
+        , _netlist_pin_lookup(netlist_pin_lookup)
+        , _timing_info(timing_info)
+        , _pin_timing_invalidator(pin_timing_invalidator)
+        , _budgeting_inf(budgeting_inf)
+        , _routing_predictor(routing_predictor)
+        , _choking_spots(choking_spots)
+        , _is_flat(is_flat) {}
+    ~ParallelNetlistRouter() {}
+
+    /** Run a single iteration of netlist routing for this->_net_list. This usually means calling
+     * \ref route_net for each net, which will handle other global updates.
+     * \return RouteIterResults for this iteration. */
+    RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
+    void set_rcv_enabled(bool x);
+    void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info);
+
+  private:
+    /** A single task to route nets inside a PartitionTree node and add tasks for its child nodes to task group \p g. */
+    void route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node, int itry, float pres_fac, float worst_neg_slack);
+
+    ConnectionRouter<HeapType> _make_router(const RouterLookahead* router_lookahead, bool is_flat) {
+        auto& device_ctx = g_vpr_ctx.device();
+        auto& route_ctx = g_vpr_ctx.mutable_routing();
+
+        return ConnectionRouter<HeapType>(
+            device_ctx.grid,
+            *router_lookahead,
+            device_ctx.rr_graph.rr_nodes(),
+            &device_ctx.rr_graph,
+            device_ctx.rr_rc_data,
+            device_ctx.rr_graph.rr_switch(),
+            route_ctx.rr_node_route_inf,
+            is_flat);
+    }
+
+    /* Context fields */
+    tbb::enumerable_thread_specific<ConnectionRouter<HeapType>> _routers_th;
+    const Netlist<>& _net_list;
+    const t_router_opts& _router_opts;
+    CBRR& _connections_inf;
+    tbb::enumerable_thread_specific<RouteIterResults> _results_th;
+    NetPinsMatrix<float>& _net_delay;
+    const ClusteredPinAtomPinsLookup& _netlist_pin_lookup;
+    std::shared_ptr<SetupHoldTimingInfo> _timing_info;
+    NetPinTimingInvalidator* _pin_timing_invalidator;
+    route_budgets& _budgeting_inf;
+    const RoutingPredictor& _routing_predictor;
+    const vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>>& _choking_spots;
+    bool _is_flat;
+};
+
+#include "ParallelNetlistRouter.tpp"
diff --git a/vpr/src/route/ParallelNetlistRouter.tpp b/vpr/src/route/ParallelNetlistRouter.tpp
new file mode 100644
index 00000000000..3c73b784b6e
--- /dev/null
+++ b/vpr/src/route/ParallelNetlistRouter.tpp
@@ -0,0 +1,104 @@
+#pragma once
+
+/** @file Impls for ParallelNetlistRouter */
+
+#include "netlist_routers.h"
+#include "route_net.h"
+#include "vtr_time.h"
+
+template<typename HeapType>
+inline RouteIterResults ParallelNetlistRouter<HeapType>::route_netlist(int itry, float pres_fac, float worst_neg_slack) {
+    /* Reset results for each thread */
+    for (auto& results : _results_th) {
+        results = RouteIterResults();
+    }
+
+    /* Organize netlist into a PartitionTree.
+     * Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */
+    PartitionTree tree(_net_list);
+
+    /* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */
+    tbb::task_group g;
+    route_partition_tree_node(g, tree.root(), itry, pres_fac, worst_neg_slack);
+    g.wait();
+
+    /* Combine results from threads */
+    RouteIterResults out;
+    for (auto& results : _results_th) {
+        out.stats.combine(results.stats);
+        out.rerouted_nets.insert(out.rerouted_nets.end(), results.rerouted_nets.begin(), results.rerouted_nets.end());
+        out.is_routable &= results.is_routable;
+    }
+    return out;
+}
+
+template<typename HeapType>
+void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node, int itry, float pres_fac, float worst_neg_slack) {
+    auto& route_ctx = g_vpr_ctx.mutable_routing();
+
+    /* Sort so net with most sinks is routed first. */
+    std::sort(node.nets.begin(), node.nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool {
+        return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size();
+    });
+
+    vtr::Timer t;
+    for (auto net_id : node.nets) {
+        auto flags = route_net(
+            _routers_th.local(),
+            _net_list,
+            net_id,
+            itry,
+            pres_fac,
+            _router_opts,
+            _connections_inf,
+            _results_th.local().stats,
+            _net_delay,
+            _netlist_pin_lookup,
+            _timing_info.get(),
+            _pin_timing_invalidator,
+            _budgeting_inf,
+            worst_neg_slack,
+            _routing_predictor,
+            _choking_spots[net_id],
+            _is_flat);
+
+        if (!flags.success && !flags.retry_with_full_bb) {
+            /* Disconnected RRG and ConnectionRouter doesn't think growing the BB will work */
+            _results_th.local().is_routable = false;
+            return;
+        }
+        if (flags.retry_with_full_bb) {
+            /* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */
+            route_ctx.route_bb[net_id] = full_device_bb();
+            continue;
+        }
+        if (flags.was_rerouted) {
+            _results_th.local().rerouted_nets.push_back(net_id);
+        }
+    }
+    PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets routed in " + std::to_string(t.elapsed_sec()) + " s");
+
+    /* This node is finished: add left & right branches to the task queue */
+    if (node.left && node.right) {
+        g.run([&]() {
+            route_partition_tree_node(g, *node.left, itry, pres_fac, worst_neg_slack);
+        });
+        g.run([&]() {
+            route_partition_tree_node(g, *node.right, itry, pres_fac, worst_neg_slack);
+        });
+    } else {
+        VTR_ASSERT(!node.left && !node.right); // there shouldn't be a node with a single branch
+    }
+}
+
+template<typename HeapType>
+void ParallelNetlistRouter<HeapType>::set_rcv_enabled(bool x) {
+    for (auto& router : _routers_th) {
+        router.set_rcv_enabled(x);
+    }
+}
+
+template<typename HeapType>
+void ParallelNetlistRouter<HeapType>::set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info) {
+    _timing_info = timing_info;
+}
diff --git a/vpr/src/route/SerialNetlistRouter.h b/vpr/src/route/SerialNetlistRouter.h
new file mode 100644
index 00000000000..5bb59df1998
--- /dev/null
+++ b/vpr/src/route/SerialNetlistRouter.h
@@ -0,0 +1,71 @@
+#pragma once
+
+/** @file Serial case for \ref NetlistRouter: just loop through nets */
+
+#include "netlist_routers.h"
+
+template<typename HeapType>
+class SerialNetlistRouter : public NetlistRouter {
+  public:
+    SerialNetlistRouter(
+        const Netlist<>& net_list,
+        const RouterLookahead* router_lookahead,
+        const t_router_opts& router_opts,
+        CBRR& connections_inf,
+        NetPinsMatrix<float>& net_delay,
+        const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+        std::shared_ptr<SetupHoldTimingInfo> timing_info,
+        NetPinTimingInvalidator* pin_timing_invalidator,
+        route_budgets& budgeting_inf,
+        const RoutingPredictor& routing_predictor,
+        const vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>>& choking_spots,
+        bool is_flat)
+        : _router(_make_router(router_lookahead, is_flat))
+        , _net_list(net_list)
+        , _router_opts(router_opts)
+        , _connections_inf(connections_inf)
+        , _net_delay(net_delay)
+        , _netlist_pin_lookup(netlist_pin_lookup)
+        , _timing_info(timing_info)
+        , _pin_timing_invalidator(pin_timing_invalidator)
+        , _budgeting_inf(budgeting_inf)
+        , _routing_predictor(routing_predictor)
+        , _choking_spots(choking_spots)
+        , _is_flat(is_flat) {}
+    ~SerialNetlistRouter() {}
+
+    RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
+    void set_rcv_enabled(bool x);
+    void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info);
+
+  private:
+    ConnectionRouter<HeapType> _make_router(const RouterLookahead* router_lookahead, bool is_flat) {
+        auto& device_ctx = g_vpr_ctx.device();
+        auto& route_ctx = g_vpr_ctx.mutable_routing();
+
+        return ConnectionRouter<HeapType>(
+            device_ctx.grid,
+            *router_lookahead,
+            device_ctx.rr_graph.rr_nodes(),
+            &device_ctx.rr_graph,
+            device_ctx.rr_rc_data,
+            device_ctx.rr_graph.rr_switch(),
+            route_ctx.rr_node_route_inf,
+            is_flat);
+    }
+    /* Context fields */
+    ConnectionRouter<HeapType> _router;
+    const Netlist<>& _net_list;
+    const t_router_opts& _router_opts;
+    CBRR& _connections_inf;
+    NetPinsMatrix<float>& _net_delay;
+    const ClusteredPinAtomPinsLookup& _netlist_pin_lookup;
+    std::shared_ptr<SetupHoldTimingInfo> _timing_info;
+    NetPinTimingInvalidator* _pin_timing_invalidator;
+    route_budgets& _budgeting_inf;
+    const RoutingPredictor& _routing_predictor;
+    const vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>>& _choking_spots;
+    bool _is_flat;
+};
+
+#include "SerialNetlistRouter.tpp"
diff --git a/vpr/src/route/SerialNetlistRouter.tpp b/vpr/src/route/SerialNetlistRouter.tpp
new file mode 100644
index 00000000000..714426a1920
--- /dev/null
+++ b/vpr/src/route/SerialNetlistRouter.tpp
@@ -0,0 +1,72 @@
+#pragma once
+
+/** @file Templated implementations for SerialNetlistRouter */
+
+#include "SerialNetlistRouter.h"
+#include "route_net.h"
+
+template<typename HeapType>
+inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, float pres_fac, float worst_neg_slack) {
+    auto& route_ctx = g_vpr_ctx.mutable_routing();
+    RouteIterResults out;
+
+    /* Sort so net with most sinks is routed first */
+    auto sorted_nets = std::vector<ParentNetId>(_net_list.nets().begin(), _net_list.nets().end());
+    std::sort(sorted_nets.begin(), sorted_nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool {
+        return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size();
+    });
+
+    for (size_t inet = 0; inet < sorted_nets.size(); inet++) {
+        ParentNetId net_id = sorted_nets[inet];
+        NetResultFlags flags = route_net(
+            _router,
+            _net_list,
+            net_id,
+            itry,
+            pres_fac,
+            _router_opts,
+            _connections_inf,
+            out.stats,
+            _net_delay,
+            _netlist_pin_lookup,
+            _timing_info.get(),
+            _pin_timing_invalidator,
+            _budgeting_inf,
+            worst_neg_slack,
+            _routing_predictor,
+            _choking_spots[net_id],
+            _is_flat);
+
+        if (!flags.success && !flags.retry_with_full_bb) {
+            /* Disconnected RRG and ConnectionRouter doesn't think growing the BB will work */
+            out.is_routable = false;
+            return out;
+        }
+
+        if (flags.retry_with_full_bb) {
+            /* Grow the BB and retry this net right away. */
+            route_ctx.route_bb[net_id] = full_device_bb();
+            inet--;
+            continue;
+        }
+
+        if (flags.was_rerouted) {
+            out.rerouted_nets.push_back(net_id);
+#ifndef NO_GRAPHICS
+            update_router_info_and_check_bp(BP_NET_ID, size_t(net_id));
+#endif
+        }
+    }
+
+    return out;
+}
+
+template<typename HeapType>
+void SerialNetlistRouter<HeapType>::set_rcv_enabled(bool x) {
+    _router.set_rcv_enabled(x);
+}
+
+template<typename HeapType>
+void SerialNetlistRouter<HeapType>::set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info) {
+    _timing_info = timing_info;
+}
diff --git a/vpr/src/route/channel_stats.cpp b/vpr/src/route/channel_stats.cpp
index e5a2f1703e2..065a6e5b7a0 100644
--- a/vpr/src/route/channel_stats.cpp
+++ b/vpr/src/route/channel_stats.cpp
@@ -1,5 +1,5 @@
 #include "channel_stats.h"
-#include "route_util.h"
+#include "route_utilization.h"
 #include "histogram.h"
 #include "globals.h"
 
diff --git a/vpr/src/route/connection_based_routing.cpp b/vpr/src/route/connection_based_routing.cpp
index eaa4c85ff26..a8f3d1b0a86 100644
--- a/vpr/src/route/connection_based_routing.cpp
+++ b/vpr/src/route/connection_based_routing.cpp
@@ -1,6 +1,5 @@
 #include "connection_based_routing.h"
 
-#include "route_timing.h"
 #include "route_profiling.h"
 
 // incremental rerouting resources class definitions
diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index b99fb60b650..a883d611493 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -39,14 +39,13 @@ std::tuple<bool, bool, t_heap> ConnectionRouter<Heap>::timing_driven_route_conne
     const t_conn_cost_params cost_params,
     t_bb bounding_box,
     RouterStats& router_stats,
-    const ConnectionParameters& conn_params,
-    bool can_grow_bb) {
+    const ConnectionParameters& conn_params) {
     router_stats_ = &router_stats;
     conn_params_ = &conn_params;
 
     bool retry = false;
     t_heap* cheapest;
-    std::tie(retry, cheapest) = timing_driven_route_connection_common_setup(rt_root, sink_node, cost_params, bounding_box, can_grow_bb);
+    std::tie(retry, cheapest) = timing_driven_route_connection_common_setup(rt_root, sink_node, cost_params, bounding_box);
 
     if (cheapest != nullptr) {
         rcv_path_manager.update_route_tree_set(cheapest->path_data);
@@ -70,8 +69,7 @@ std::tuple<bool, t_heap*> ConnectionRouter<Heap>::timing_driven_route_connection
     const RouteTreeNode& rt_root,
     RRNodeId sink_node,
     const t_conn_cost_params cost_params,
-    t_bb bounding_box,
-    bool can_grow_bb) {
+    t_bb bounding_box) {
     //Re-add route nodes from the existing route tree to the heap.
     //They need to be repushed onto the heap since each node's cost is target specific.
 
@@ -100,60 +98,16 @@ std::tuple<bool, t_heap*> ConnectionRouter<Heap>::timing_driven_route_connection
         if (bounding_box.xmin == 0
             && bounding_box.ymin == 0
             && bounding_box.xmax == (int)(grid_.width() - 1)
-            && bounding_box.ymax == (int)(grid_.height() - 1)) {
+            && bounding_box.ymax == (int)(grid_.height() - 1)
+            && bounding_box.layer_min == 0
+            && bounding_box.layer_max == (int)(grid_.get_num_layers() - 1)) {
             VTR_LOG("%s\n", describe_unrouteable_connection(source_node, sink_node, is_flat_).c_str());
             return std::make_tuple(false, nullptr);
         }
 
-        // If we cannot grow the bounding box, leave unrouted and bubble up a signal
-        // to retry this net with a full-device bounding box. If we are already at full device extents,
-        // just fail
-        if (!can_grow_bb) {
-            VTR_LOG_WARN("No routing path for connection to sink_rr %d, leaving unrouted to retry on next iteration\n", sink_node);
-            return std::make_tuple(true, nullptr);
-        }
-
-        // Otherwise, try again with full-device bounding box.
-        //
-        // Note that the additional run-time overhead of re-trying only occurs
-        // when we were otherwise going to give up -- the typical case (route
-        // found with the bounding box) remains fast and never re-tries .
-        VTR_LOG_WARN("No routing path for connection to sink_rr %d, retrying with full device bounding box\n", sink_node);
-
-        t_bb full_device_bounding_box;
-        full_device_bounding_box.xmin = 0;
-        full_device_bounding_box.ymin = 0;
-        full_device_bounding_box.xmax = grid_.width() - 1;
-        full_device_bounding_box.ymax = grid_.height() - 1;
-        full_device_bounding_box.layer_min = 0;
-        full_device_bounding_box.layer_max = grid_.get_num_layers() - 1;
-
-        //
-        //TODO: potential future optimization
-        //      We have already explored the RR nodes accessible within the regular
-        //      BB (which are stored in modified_rr_node_inf), and so already know
-        //      their cost from the source. Instead of re-starting the path search
-        //      from scratch (i.e. from the previous route tree as we do below), we
-        //      could just re-add all the explored nodes to the heap and continue
-        //      expanding.
-        //
-
-        //Reset any previously recorded node costs so that when we call
-        //add_route_tree_to_heap() the nodes in the route tree actually
-        //make it back into the heap.
-        reset_path_costs();
-        modified_rr_node_inf_.clear();
-        heap_.empty_heap();
-
-        //Re-initialize the heap since it was emptied by the previous call to
-        //timing_driven_route_connection_from_heap()
-        add_route_tree_to_heap(rt_root, sink_node, cost_params, false);
-        heap_.build_heap(); // via sifting down everything
-
-        //Try finding the path again with the relaxed bounding box
-        cheapest = timing_driven_route_connection_from_heap(sink_node,
-                                                            cost_params,
-                                                            full_device_bounding_box);
+        // Otherwise, leave unrouted and bubble up a signal to retry this net with a full-device bounding box
+        VTR_LOG_WARN("No routing path for connection to sink_rr %d, leaving unrouted to retry later\n", sink_node);
+        return std::make_tuple(true, nullptr);
     }
 
     if (cheapest == nullptr) {
@@ -177,8 +131,7 @@ std::tuple<bool, bool, t_heap> ConnectionRouter<Heap>::timing_driven_route_conne
     t_bb net_bounding_box,
     const SpatialRouteTreeLookup& spatial_rt_lookup,
     RouterStats& router_stats,
-    const ConnectionParameters& conn_params,
-    bool can_grow_bb) {
+    const ConnectionParameters& conn_params) {
     router_stats_ = &router_stats;
     conn_params_ = &conn_params;
 
@@ -218,8 +171,7 @@ std::tuple<bool, bool, t_heap> ConnectionRouter<Heap>::timing_driven_route_conne
         std::tie(retry_with_full_bb, cheapest) = timing_driven_route_connection_common_setup(rt_root,
                                                                                              sink_node,
                                                                                              cost_params,
-                                                                                             net_bounding_box,
-                                                                                             can_grow_bb);
+                                                                                             net_bounding_box);
     }
 
     if (cheapest == nullptr) {
@@ -1003,19 +955,26 @@ void ConnectionRouter<Heap>::add_route_tree_node_to_heap(
     }
 }
 
-static t_bb adjust_highfanout_bounding_box(t_bb highfanout_bb) {
-    t_bb bb = highfanout_bb;
+/* Expand bb by inode's extents and clip against net_bb */
+inline void expand_highfanout_bounding_box(t_bb& bb, const t_bb& net_bb, RRNodeId inode, const RRGraphView* rr_graph) {
+    bb.xmin = std::max<int>(net_bb.xmin, std::min<int>(bb.xmin, rr_graph->node_xlow(inode)));
+    bb.ymin = std::max<int>(net_bb.ymin, std::min<int>(bb.ymin, rr_graph->node_ylow(inode)));
+    bb.xmax = std::min<int>(net_bb.xmax, std::max<int>(bb.xmax, rr_graph->node_xhigh(inode)));
+    bb.ymax = std::min<int>(net_bb.ymax, std::max<int>(bb.ymax, rr_graph->node_yhigh(inode)));
+    bb.layer_min = std::min<int>(bb.layer_min, rr_graph->node_layer(inode));
+    bb.layer_max = std::max<int>(bb.layer_max, rr_graph->node_layer(inode));
+}
 
+/* Expand bb by HIGH_FANOUT_BB_FAC and clip against net_bb */
+inline void adjust_highfanout_bounding_box(t_bb& bb, const t_bb& net_bb) {
     constexpr int HIGH_FANOUT_BB_FAC = 3;
-    bb.xmin -= HIGH_FANOUT_BB_FAC;
-    bb.ymin -= HIGH_FANOUT_BB_FAC;
-    bb.xmax += HIGH_FANOUT_BB_FAC;
-    bb.ymax += HIGH_FANOUT_BB_FAC;
-
-    bb.layer_min = highfanout_bb.layer_min;
-    bb.layer_max = highfanout_bb.layer_max;
 
-    return bb;
+    bb.xmin = std::max<int>(net_bb.xmin, bb.xmin - HIGH_FANOUT_BB_FAC);
+    bb.ymin = std::max<int>(net_bb.ymin, bb.ymin - HIGH_FANOUT_BB_FAC);
+    bb.xmax = std::min<int>(net_bb.xmax, bb.xmax + HIGH_FANOUT_BB_FAC);
+    bb.ymax = std::min<int>(net_bb.ymax, bb.ymax + HIGH_FANOUT_BB_FAC);
+    bb.layer_min = std::min<int>(net_bb.layer_min, bb.layer_min);
+    bb.layer_max = std::min<int>(net_bb.layer_min, bb.layer_max);
 }
 
 template<typename Heap>
@@ -1079,13 +1038,9 @@ t_bb ConnectionRouter<Heap>::add_high_fanout_route_tree_to_heap(
                 // Put the node onto the heap
                 add_route_tree_node_to_heap(rt_node, target_node, cost_params, true);
 
-                // Update Bounding Box
-                highfanout_bb.xmin = std::min<int>(highfanout_bb.xmin, rr_graph_->node_xlow(rr_node_to_add));
-                highfanout_bb.ymin = std::min<int>(highfanout_bb.ymin, rr_graph_->node_ylow(rr_node_to_add));
-                highfanout_bb.xmax = std::max<int>(highfanout_bb.xmax, rr_graph_->node_xhigh(rr_node_to_add));
-                highfanout_bb.ymax = std::max<int>(highfanout_bb.ymax, rr_graph_->node_yhigh(rr_node_to_add));
-                highfanout_bb.layer_min = std::min<int>(highfanout_bb.layer_min, rr_graph_->node_layer(rr_node_to_add));
-                highfanout_bb.layer_max = std::max<int>(highfanout_bb.layer_max, rr_graph_->node_layer(rr_node_to_add));
+                // Expand HF BB to include the node (clip by original BB)
+                expand_highfanout_bounding_box(highfanout_bb, net_bounding_box, rr_node_to_add, rr_graph_);
+
                 if (is_flat_) {
                     if (rr_graph_->node_type(rr_node_to_add) == CHANY || rr_graph_->node_type(rr_node_to_add) == CHANX) {
                         chan_nodes_added++;
@@ -1111,15 +1066,14 @@ t_bb ConnectionRouter<Heap>::add_high_fanout_route_tree_to_heap(
         if (done) break;
     }
 
-    t_bb bounding_box = net_bounding_box;
     if (nodes_added == 0) { //If the target bin, and it's surrounding bins were empty, just add the full route tree
         add_route_tree_to_heap(rt_root, target_node, cost_params, true);
+        return net_bounding_box;
     } else {
         //We found nearby routing, replace original bounding box to be localized around that routing
-        bounding_box = adjust_highfanout_bounding_box(highfanout_bb);
+        adjust_highfanout_bounding_box(highfanout_bb, net_bounding_box);
+        return highfanout_bb;
     }
-
-    return bounding_box;
 }
 
 static inline bool has_path_to_sink(const t_rr_graph_view& rr_nodes,
diff --git a/vpr/src/route/connection_router.h b/vpr/src/route/connection_router.h
index 093ab8fed83..b2806f41775 100644
--- a/vpr/src/route/connection_router.h
+++ b/vpr/src/route/connection_router.h
@@ -75,8 +75,7 @@ class ConnectionRouter : public ConnectionRouterInterface {
         const t_conn_cost_params cost_params,
         t_bb bounding_box,
         RouterStats& router_stats,
-        const ConnectionParameters& conn_params,
-        bool can_grow_bb) final;
+        const ConnectionParameters& conn_params) final;
 
     /** Finds a path from the route tree rooted at rt_root to sink_node for a
      * high fanout net.
@@ -95,8 +94,7 @@ class ConnectionRouter : public ConnectionRouterInterface {
         t_bb net_bounding_box,
         const SpatialRouteTreeLookup& spatial_rt_lookup,
         RouterStats& router_stats,
-        const ConnectionParameters& conn_params,
-        bool can_grow_bb) final;
+        const ConnectionParameters& conn_params) final;
 
     // Finds a path from the route tree rooted at rt_root to all sinks
     // available.
@@ -160,15 +158,13 @@ class ConnectionRouter : public ConnectionRouterInterface {
      * @param[in] sink_node Sink node ID to route to
      * @param[in] cost_params
      * @param[in] bounding_box Keep search confined to this bounding box
-     * @param[in] can_grow_bb Can this fn grow the given bounding box? 
      * @return bool Signal to retry this connection with a full-device bounding box,
      * @return t_heap* Heap element describing the path found. */
     std::tuple<bool, t_heap*> timing_driven_route_connection_common_setup(
         const RouteTreeNode& rt_root,
         RRNodeId sink_node,
         const t_conn_cost_params cost_params,
-        t_bb bounding_box,
-        bool can_grow_bb);
+        t_bb bounding_box);
 
     // Finds a path to sink_node, starting from the elements currently in the
     // heap.
diff --git a/vpr/src/route/connection_router_interface.h b/vpr/src/route/connection_router_interface.h
index 2180dbe76f3..d6b0baafab5 100644
--- a/vpr/src/route/connection_router_interface.h
+++ b/vpr/src/route/connection_router_interface.h
@@ -37,7 +37,7 @@ class ConnectionRouterInterface {
   public:
     virtual ~ConnectionRouterInterface() {}
 
-    // Clear's the modified list.  Should be called after reset_path_costs
+    // Clears the modified list.  Should be called after reset_path_costs
     // have been called.
     virtual void clear_modified_rr_node_info() = 0;
 
@@ -50,7 +50,7 @@ class ConnectionRouterInterface {
      *
      * Returns a tuple of:
      * bool: path exists? (hard failure, rr graph disconnected)
-     * bool: should retry with full bounding box? (only used in parallel routing)
+     * bool: should retry with full bounding box?
      * t_heap: heap element of cheapest path */
     virtual std::tuple<bool, bool, t_heap> timing_driven_route_connection_from_route_tree(
         const RouteTreeNode& rt_root,
@@ -58,8 +58,7 @@ class ConnectionRouterInterface {
         const t_conn_cost_params cost_params,
         t_bb bounding_box,
         RouterStats& router_stats,
-        const ConnectionParameters& conn_params,
-        bool can_grow_bb)
+        const ConnectionParameters& conn_params)
         = 0;
 
     /** Finds a path from the route tree rooted at rt_root to sink_node for a
@@ -70,7 +69,7 @@ class ConnectionRouterInterface {
      *
      * Returns a tuple of:
      * bool: path exists? (hard failure, rr graph disconnected)
-     * bool: should retry with full bounding box? (only used in parallel routing)
+     * bool: should retry with full bounding box?
      * t_heap: heap element of cheapest path */
     virtual std::tuple<bool, bool, t_heap> timing_driven_route_connection_from_route_tree_high_fanout(
         const RouteTreeNode& rt_root,
@@ -79,8 +78,7 @@ class ConnectionRouterInterface {
         t_bb bounding_box,
         const SpatialRouteTreeLookup& spatial_rt_lookup,
         RouterStats& router_stats,
-        const ConnectionParameters& conn_params,
-        bool can_grow_bb)
+        const ConnectionParameters& conn_params)
         = 0;
 
     // Finds a path from the route tree rooted at rt_root to all sinks
diff --git a/vpr/src/route/netlist_routers.h b/vpr/src/route/netlist_routers.h
new file mode 100644
index 00000000000..db51ff01b23
--- /dev/null
+++ b/vpr/src/route/netlist_routers.h
@@ -0,0 +1,168 @@
+#pragma once
+
+/** @file Interface for a netlist router.
+ *
+ * A NetlistRouter manages the required bits of state to complete the netlist routing process,
+ * which requires finding a path for every connection in the netlist using a ConnectionRouter.
+ * This needs to be an interface because there may be different netlist routing schedules,
+ * i.e. parallel or net-decomposing routers.
+ *
+ * Includes derived classes of NetlistRouter and a fn to provide the correct NetlistRouter
+ * for given router options.
+ *
+ * NetlistRouter impls are typically templated by HeapType, since the single implementation
+ * of ConnectionRouterInterface is templated by a heap type at the moment. Any templated
+ * NetlistRouter-derived class is still a NetlistRouter, so that is transparent to the user
+ * of this interface. */
+
+#include "NetPinTimingInvalidator.h"
+#include "binary_heap.h"
+#include "bucket.h"
+#include "clustered_netlist_utils.h"
+#include "connection_based_routing_fwd.h"
+#include "connection_router.h"
+#include "globals.h"
+#include "heap_type.h"
+#include "netlist_fwd.h"
+#include "partition_tree.h"
+#include "routing_predictor.h"
+#include "route_budgets.h"
+#include "route_utils.h"
+#include "router_stats.h"
+#include "timing_info.h"
+#include "vpr_net_pins_matrix.h"
+#include "vpr_types.h"
+
+/** Results for a single netlist routing run inside a routing iteration. */
+struct RouteIterResults {
+    /** Are there any connections impossible to route due to a disconnected rr_graph? */
+    bool is_routable = true;
+    /** Net IDs with changed routing */
+    std::vector<ParentNetId> rerouted_nets;
+    /** RouterStats for this iteration */
+    RouterStats stats;
+};
+
+/** Route a given netlist. Takes a big context and passes it around to net & sink routing fns.
+ * route_netlist only needs to call the functions in route_net.h/tpp: they handle the global
+ * bookkeeping. */
+class NetlistRouter {
+  public:
+    virtual ~NetlistRouter() {}
+
+    /** Run a single iteration of netlist routing for this->_net_list. This usually means calling
+     * route_net for each net, which will handle other global updates.
+     * \return RouteIterResults for this iteration. */
+    virtual RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack) = 0;
+
+    /** Enable RCV for each of the ConnectionRouters this NetlistRouter manages.*/
+    virtual void set_rcv_enabled(bool x) = 0;
+
+    /** Set this NetlistRouter's timing_info ptr. We sometimes change timing_info
+     * throughout iterations, but not frequently enough to make it a public member. */
+    virtual void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info) = 0;
+};
+
+/* Include the derived classes here to get the HeapType-templated impls */
+#include "SerialNetlistRouter.h"
+#ifdef VPR_USE_TBB
+#    include "ParallelNetlistRouter.h"
+#endif
+
+template<typename HeapType>
+inline std::unique_ptr<NetlistRouter> make_netlist_router_with_heap(
+    const Netlist<>& net_list,
+    const RouterLookahead* router_lookahead,
+    const t_router_opts& router_opts,
+    CBRR& connections_inf,
+    NetPinsMatrix<float>& net_delay,
+    const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+    std::shared_ptr<SetupHoldTimingInfo> timing_info,
+    NetPinTimingInvalidator* pin_timing_invalidator,
+    route_budgets& budgeting_inf,
+    const RoutingPredictor& routing_predictor,
+    const vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>>& choking_spots,
+    bool is_flat) {
+    if (router_opts.router_algorithm == e_router_algorithm::TIMING_DRIVEN) {
+        return std::make_unique<SerialNetlistRouter<HeapType>>(
+            net_list,
+            router_lookahead,
+            router_opts,
+            connections_inf,
+            net_delay,
+            netlist_pin_lookup,
+            timing_info,
+            pin_timing_invalidator,
+            budgeting_inf,
+            routing_predictor,
+            choking_spots,
+            is_flat);
+    } else if (router_opts.router_algorithm == e_router_algorithm::PARALLEL) {
+#ifdef VPR_USE_TBB
+        return std::make_unique<ParallelNetlistRouter<HeapType>>(
+            net_list,
+            router_lookahead,
+            router_opts,
+            connections_inf,
+            net_delay,
+            netlist_pin_lookup,
+            timing_info,
+            pin_timing_invalidator,
+            budgeting_inf,
+            routing_predictor,
+            choking_spots,
+            is_flat);
+#else
+        VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "VPR isn't compiled with TBB support required for parallel routing");
+#endif
+    } else {
+        VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown router algorithm %d", router_opts.router_algorithm);
+    }
+}
+
+/** Make a NetlistRouter depending on router_algorithm and router_heap in \p router_opts. */
+inline std::unique_ptr<NetlistRouter> make_netlist_router(
+    const Netlist<>& net_list,
+    const RouterLookahead* router_lookahead,
+    const t_router_opts& router_opts,
+    CBRR& connections_inf,
+    NetPinsMatrix<float>& net_delay,
+    const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+    std::shared_ptr<SetupHoldTimingInfo> timing_info,
+    NetPinTimingInvalidator* pin_timing_invalidator,
+    route_budgets& budgeting_inf,
+    const RoutingPredictor& routing_predictor,
+    const vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>>& choking_spots,
+    bool is_flat) {
+    if (router_opts.router_heap == e_heap_type::BINARY_HEAP) {
+        return make_netlist_router_with_heap<BinaryHeap>(
+            net_list,
+            router_lookahead,
+            router_opts,
+            connections_inf,
+            net_delay,
+            netlist_pin_lookup,
+            timing_info,
+            pin_timing_invalidator,
+            budgeting_inf,
+            routing_predictor,
+            choking_spots,
+            is_flat);
+    } else if (router_opts.router_heap == e_heap_type::BUCKET_HEAP_APPROXIMATION) {
+        return make_netlist_router_with_heap<Bucket>(
+            net_list,
+            router_lookahead,
+            router_opts,
+            connections_inf,
+            net_delay,
+            netlist_pin_lookup,
+            timing_info,
+            pin_timing_invalidator,
+            budgeting_inf,
+            routing_predictor,
+            choking_spots,
+            is_flat);
+    } else {
+        VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown heap type %d", router_opts.router_heap);
+    }
+}
diff --git a/vpr/src/route/partition_tree.h b/vpr/src/route/partition_tree.h
index 08eb668a88f..aef794f0f08 100644
--- a/vpr/src/route/partition_tree.h
+++ b/vpr/src/route/partition_tree.h
@@ -25,16 +25,6 @@ inline Side operator!(const Side& rhs) {
     return Side(!size_t(rhs));
 }
 
-/** Routing iteration results per thread. (for a subset of the input netlist) */
-struct RouteIterResults {
-    /** Are there any connections impossible to route due to a disconnected rr_graph? */
-    bool is_routable = true;
-    /** Net IDs for which timing_driven_route_net() actually got called */
-    std::vector<ParentNetId> rerouted_nets;
-    /** RouterStats collected from my subset of nets */
-    RouterStats stats;
-};
-
 /** Spatial partition tree for routing.
  *
  * This divides the netlist into a tree of regions, so that nets with non-overlapping
@@ -56,10 +46,6 @@ class PartitionTreeNode {
     std::unique_ptr<PartitionTreeNode> left = nullptr;
     /** Right subtree. */
     std::unique_ptr<PartitionTreeNode> right = nullptr;
-    /** Are there any connections impossible to route due to a disconnected rr_graph? */
-    bool is_routable = false;
-    /** Net IDs for which timing_driven_route_net() actually got called */
-    std::vector<ParentNetId> rerouted_nets;
     /* Axis of the cutline. */
     Axis cutline_axis = Axis::X;
     /* Position of the cutline. It's a float, because cutlines are considered to be "between" integral coordinates. */
@@ -92,11 +78,7 @@ class PartitionTree {
 /** Log PartitionTree-related messages. Can handle multiple threads. */
 class PartitionTreeDebug {
   public:
-#    ifdef VPR_USE_TBB
     static inline tbb::concurrent_vector<std::string> lines;
-#    else
-    static inline std::vector<std::string> lines;
-#    endif
     /** Add msg to the log buffer (with a thread ID header) */
     static inline void log(std::string msg) {
         auto thread_id = std::hash<std::thread::id>()(std::this_thread::get_id());
diff --git a/vpr/src/route/route.cpp b/vpr/src/route/route.cpp
new file mode 100644
index 00000000000..469a0455006
--- /dev/null
+++ b/vpr/src/route/route.cpp
@@ -0,0 +1,625 @@
+#include "concrete_timing_info.h"
+#include "connection_based_routing.h"
+#include "draw.h"
+#include "netlist_routers.h"
+#include "place_and_route.h"
+#include "read_route.h"
+#include "route.h"
+#include "route_common.h"
+#include "route_debug.h"
+#include "route_export.h"
+#include "route_profiling.h"
+#include "route_utils.h"
+#include "vtr_time.h"
+
+bool route(const Netlist<>& net_list,
+           int width_fac,
+           const t_router_opts& router_opts,
+           const t_analysis_opts& analysis_opts,
+           t_det_routing_arch* det_routing_arch,
+           std::vector<t_segment_inf>& segment_inf,
+           NetPinsMatrix<float>& net_delay,
+           std::shared_ptr<SetupHoldTimingInfo> timing_info,
+           std::shared_ptr<RoutingDelayCalculator> delay_calc,
+           t_chan_width_dist chan_width_dist,
+           t_direct_inf* directs,
+           int num_directs,
+           ScreenUpdatePriority first_iteration_priority,
+           bool is_flat) {
+    auto& device_ctx = g_vpr_ctx.mutable_device();
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& atom_ctx = g_vpr_ctx.atom();
+    auto& route_ctx = g_vpr_ctx.mutable_routing();
+
+    if (net_list.nets().empty()) {
+        VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "No nets to route\n");
+    }
+
+    t_graph_type graph_type;
+    t_graph_type graph_directionality;
+    if (router_opts.route_type == GLOBAL) {
+        graph_type = GRAPH_GLOBAL;
+        graph_directionality = GRAPH_BIDIR;
+    } else {
+        graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
+        graph_directionality = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
+    }
+
+    /* Set the channel widths */
+    t_chan_width chan_width = init_chan(width_fac, chan_width_dist, graph_directionality);
+
+    /* Set up the routing resource graph defined by this FPGA architecture. */
+    int warning_count;
+
+    create_rr_graph(graph_type,
+                    device_ctx.physical_tile_types,
+                    device_ctx.grid,
+                    chan_width,
+                    det_routing_arch,
+                    segment_inf,
+                    router_opts,
+                    directs,
+                    num_directs,
+                    &warning_count,
+                    is_flat);
+
+    //Initialize drawing, now that we have an RR graph
+    init_draw_coords(width_fac);
+
+    /* Allocate and load additional rr_graph information needed only by the router. */
+    alloc_and_load_rr_node_route_structs();
+
+    init_route_structs(net_list,
+                       router_opts.bb_factor,
+                       router_opts.has_choking_spot,
+                       is_flat);
+
+    IntraLbPbPinLookup intra_lb_pb_pin_lookup(device_ctx.logical_block_types);
+    ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, intra_lb_pb_pin_lookup);
+
+    auto choking_spots = set_nets_choking_spots(net_list,
+                                                route_ctx.net_terminal_groups,
+                                                route_ctx.net_terminal_group_num,
+                                                router_opts.has_choking_spot,
+                                                is_flat);
+
+    //Initially, the router runs normally trying to reduce congestion while
+    //balancing other metrics (timing, wirelength, run-time etc.)
+    RouterCongestionMode router_congestion_mode = RouterCongestionMode::NORMAL;
+
+    //Initialize and properly size the lookups for profiling
+    profiling::profiling_initialization(get_max_pins_per_net(net_list));
+
+    /*
+     * Configure the routing predictor
+     */
+    RoutingPredictor routing_predictor;
+    float abort_iteration_threshold = std::numeric_limits<float>::infinity(); //Default no early abort
+    if (router_opts.routing_failure_predictor == SAFE) {
+        abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_SAFE * router_opts.max_router_iterations;
+    } else if (router_opts.routing_failure_predictor == AGGRESSIVE) {
+        abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_AGGRESSIVE * router_opts.max_router_iterations;
+    } else {
+        VTR_ASSERT_MSG(router_opts.routing_failure_predictor == OFF, "Unrecognized routing failure predictor setting");
+    }
+
+    float high_effort_congestion_mode_iteration_threshold = router_opts.congested_routing_iteration_threshold_frac * router_opts.max_router_iterations;
+
+    /* Set delay of ignored signals to zero. Non-ignored net delays are set by
+     * update_net_delays_from_route_tree() inside timing_driven_route_net(),
+     * which is only called for non-ignored nets. */
+    for (auto net_id : net_list.nets()) {
+        if (net_list.net_is_ignored(net_id)) {
+            for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) {
+                net_delay[net_id][ipin] = 0.;
+            }
+        }
+    }
+
+    CBRR connections_inf{net_list, route_ctx.net_rr_terminals, is_flat};
+
+    route_budgets budgeting_inf(net_list, is_flat);
+
+    // This needs to be called before filling intra-cluster lookahead maps to ensure that the intra-cluster lookahead maps are initialized.
+    const RouterLookahead* router_lookahead = get_cached_router_lookahead(*det_routing_arch,
+                                                                          router_opts.lookahead_type,
+                                                                          router_opts.write_router_lookahead,
+                                                                          router_opts.read_router_lookahead,
+                                                                          segment_inf,
+                                                                          is_flat);
+
+    if (is_flat) {
+        // If is_flat is true, the router lookahead maps related to intra-cluster resources should be initialized since
+        // they haven't been initialized when the map related to global resources was initialized.
+        auto cache_key = route_ctx.router_lookahead_cache_key_;
+        std::unique_ptr<RouterLookahead> mut_router_lookahead(route_ctx.cached_router_lookahead_.release());
+        VTR_ASSERT(mut_router_lookahead);
+        route_ctx.cached_router_lookahead_.clear();
+        if (!router_opts.read_intra_cluster_router_lookahead.empty()) {
+            mut_router_lookahead->read_intra_cluster(router_opts.read_intra_cluster_router_lookahead);
+        } else {
+            mut_router_lookahead->compute_intra_tile();
+        }
+        route_ctx.cached_router_lookahead_.set(cache_key, std::move(mut_router_lookahead));
+        router_lookahead = get_cached_router_lookahead(*det_routing_arch,
+                                                       router_opts.lookahead_type,
+                                                       router_opts.write_router_lookahead,
+                                                       router_opts.read_router_lookahead,
+                                                       segment_inf,
+                                                       is_flat);
+        if (!router_opts.write_intra_cluster_router_lookahead.empty()) {
+            router_lookahead->write_intra_cluster(router_opts.write_intra_cluster_router_lookahead);
+        }
+    }
+
+    VTR_ASSERT(router_lookahead != nullptr);
+
+    /* Routing parameters */
+    float pres_fac = update_draw_pres_fac(router_opts.first_iter_pres_fac); /* Typically 0 -> ignore cong. */
+    int bb_fac = router_opts.bb_factor;
+
+    /* When routing conflicts are detected the bounding boxes are scaled
+     * by BB_SCALE_FACTOR every BB_SCALE_ITER_COUNT iterations */
+    constexpr float BB_SCALE_FACTOR = 2;
+    constexpr int BB_SCALE_ITER_COUNT = 5;
+
+    size_t available_wirelength = calculate_wirelength_available();
+
+    /* Routing status and metrics */
+    bool success = false;
+    WirelengthInfo wirelength_info;
+    OveruseInfo overuse_info(device_ctx.rr_graph.num_nodes());
+    tatum::TimingPathInfo critical_path;
+    int itry; //Routing iteration number
+    int itry_conflicted_mode = 0;
+
+    /* Best result so far */
+    vtr::vector<ParentNetId, vtr::optional<RouteTree>> best_routing;
+    t_clb_opins_used best_clb_opins_used_locally;
+    RoutingMetrics best_routing_metrics;
+    int legal_convergence_count = 0;
+
+    /* Get initial criticalities from the lookahead */
+    if (router_opts.with_timing_analysis && router_opts.initial_timing == e_router_initial_timing::LOOKAHEAD) {
+        vtr::ScopedStartFinishTimer init_timing_timer("Initializing router criticalities");
+        //Estimate initial connection delays from the router lookahead
+        init_net_delay_from_lookahead(*router_lookahead,
+                                      net_list,
+                                      route_ctx.net_rr_terminals,
+                                      net_delay,
+                                      device_ctx.rr_graph,
+                                      is_flat);
+
+        //Run STA to get estimated criticalities
+        timing_info->update();
+        VTR_LOG("Initial Net Connection Criticality Histogram:\n");
+        print_router_criticality_histogram(net_list, *timing_info, netlist_pin_lookup, is_flat);
+    }
+
+    std::unique_ptr<NetPinTimingInvalidator> pin_timing_invalidator;
+    pin_timing_invalidator = make_net_pin_timing_invalidator(
+        router_opts.timing_update_type,
+        net_list,
+        netlist_pin_lookup,
+        atom_ctx.nlist,
+        atom_ctx.lookup,
+        *timing_info->timing_graph(),
+        is_flat);
+
+    std::unique_ptr<NetlistRouter> netlist_router = make_netlist_router(
+        net_list,
+        router_lookahead,
+        router_opts,
+        connections_inf,
+        net_delay,
+        netlist_pin_lookup,
+        timing_info,
+        pin_timing_invalidator.get(),
+        budgeting_inf,
+        routing_predictor,
+        choking_spots,
+        is_flat);
+
+    RouterStats router_stats;
+    float prev_iter_cumm_time = 0;
+    vtr::Timer iteration_timer;
+    int num_net_bounding_boxes_updated = 0;
+    int itry_since_last_convergence = -1;
+
+    // This heap is used for reserve_locally_used_opins.
+    BinaryHeap small_heap;
+    small_heap.init_heap(device_ctx.grid);
+
+    // When RCV is enabled the router will not stop unless negative hold slack is 0
+    // In some cases this isn't doable, due to global nets or intracluster routing issues
+    // In these cases RCV will finish early if it goes RCV_FINISH_EARLY_COUNTDOWN iterations without detecting resolvable negative hold slack
+    // Increasing this will make the router fail occasionally, decreasing will sometimes not let all hold violations be resolved
+    constexpr int RCV_FINISH_EARLY_COUNTDOWN = 15;
+
+    int rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN;
+
+    print_route_status_header();
+    for (itry = 1; itry <= router_opts.max_router_iterations; ++itry) {
+        /* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */
+        for (auto net_id : net_list.nets()) {
+            route_ctx.net_status.set_is_routed(net_id, false);
+            route_ctx.net_status.set_is_fixed(net_id, false);
+        }
+
+        if (itry_since_last_convergence >= 0) {
+            ++itry_since_last_convergence;
+        }
+
+        // Calculate this once and pass it into net routing to check if should reroute for hold
+        float worst_negative_slack = 0;
+        if (budgeting_inf.if_set()) {
+            worst_negative_slack = timing_info->hold_total_negative_slack();
+        }
+
+        /* Initial criticalities: set to 1 on the first iter if the user asked for it */
+        if (router_opts.initial_timing == e_router_initial_timing::ALL_CRITICAL && itry == 1)
+            netlist_router->set_timing_info(make_constant_timing_info(1));
+        else
+            netlist_router->set_timing_info(timing_info);
+
+        /* Route each net */
+        RouteIterResults iter_results = netlist_router->route_netlist(itry, pres_fac, worst_negative_slack);
+
+        if (!iter_results.is_routable) { /* Disconnected RRG */
+            return false;
+        }
+
+        // Make sure any CLB OPINs used up by subblocks being hooked directly to them are reserved for that purpose
+        bool rip_up_local_opins = (itry == 1 ? false : true);
+        if (!is_flat) {
+            reserve_locally_used_opins(&small_heap, pres_fac,
+                                       router_opts.acc_fac, rip_up_local_opins, is_flat);
+        }
+
+        /*
+         * Calculate metrics for the current routing
+         */
+        bool routing_is_feasible = feasible_routing();
+        float est_success_iteration = routing_predictor.estimate_success_iteration();
+
+        //Update resource costs and overuse info
+        if (itry == 1) {
+            pathfinder_update_acc_cost_and_overuse_info(0., overuse_info); /* Acc_fac=0 for first iter. */
+        } else {
+            pathfinder_update_acc_cost_and_overuse_info(router_opts.acc_fac, overuse_info);
+        }
+
+        wirelength_info = calculate_wirelength_info(net_list, available_wirelength);
+        routing_predictor.add_iteration_overuse(itry, overuse_info.overused_nodes);
+
+        //Update timing based on the new routing
+        //Note that the net delays have already been updated by timing_driven_route_net
+        timing_info->update();
+        timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during routing
+        pin_timing_invalidator->reset();
+
+        critical_path = timing_info->least_slack_critical_path();
+
+        VTR_ASSERT_SAFE(!router_opts.with_timing_analysis || check_net_delays(net_list, net_delay));
+
+        if (itry == 1 && router_opts.with_timing_analysis) {
+            generate_route_timing_reports(router_opts, analysis_opts, *timing_info, *delay_calc, is_flat);
+        }
+
+        float iter_cumm_time = iteration_timer.elapsed_sec();
+        float iter_elapsed_time = iter_cumm_time - prev_iter_cumm_time;
+
+        //Output progress
+        print_route_status(itry, iter_elapsed_time, pres_fac, num_net_bounding_boxes_updated, iter_results.stats, overuse_info, wirelength_info, timing_info, est_success_iteration);
+
+        prev_iter_cumm_time = iter_cumm_time;
+
+        //Update graphics
+        if (itry == 1) {
+            update_screen(first_iteration_priority, "Routing...", ROUTING, timing_info);
+        } else {
+            update_screen(ScreenUpdatePriority::MINOR, "Routing...", ROUTING, timing_info);
+        }
+
+        if (router_opts.save_routing_per_iteration) {
+            std::string filename = vtr::string_fmt("iteration_%03d.route", itry);
+            print_route(net_list, nullptr, filename.c_str(), is_flat);
+        }
+
+        //Update router stats (total)
+        router_stats.combine(iter_results.stats);
+
+        /*
+         * Are we finished?
+         */
+        if (is_iteration_complete(routing_is_feasible, router_opts, itry, timing_info, rcv_finished_count == 0)) {
+            auto& router_ctx = g_vpr_ctx.routing();
+
+            if (is_better_quality_routing(best_routing, best_routing_metrics, wirelength_info, timing_info)) {
+                //Save routing
+                best_routing = router_ctx.route_trees;
+                best_clb_opins_used_locally = router_ctx.clb_opins_used_locally;
+
+                success = true;
+
+                //Update best metrics
+                if (router_opts.with_timing_analysis) {
+                    check_net_delays(net_list, net_delay);
+                    best_routing_metrics.sTNS = timing_info->setup_total_negative_slack();
+                    best_routing_metrics.sWNS = timing_info->setup_worst_negative_slack();
+                    best_routing_metrics.hTNS = timing_info->hold_total_negative_slack();
+                    best_routing_metrics.hWNS = timing_info->hold_worst_negative_slack();
+                    best_routing_metrics.critical_path = critical_path;
+                }
+                best_routing_metrics.used_wirelength = wirelength_info.used_wirelength();
+            }
+
+            //Decrease pres_fac so that critical connections will take more direct routes
+            //Note that we use first_iter_pres_fac here (typically zero), and switch to
+            //use initial_pres_fac on the next iteration.
+            pres_fac = update_draw_pres_fac(router_opts.first_iter_pres_fac);
+
+            //Reduce timing tolerances to re-route more delay-suboptimal signals
+            connections_inf.set_connection_criticality_tolerance(0.7);
+            connections_inf.set_connection_delay_tolerance(1.01);
+
+            ++legal_convergence_count;
+            itry_since_last_convergence = 0;
+
+            VTR_ASSERT(success);
+        }
+
+        if (itry_since_last_convergence == 1) {
+            //We used first_iter_pres_fac when we started routing again
+            //after the first routing convergence. Since that is often zero,
+            //we want to set pres_fac to a reasonable (i.e. typically non-zero)
+            //value afterwards -- so it grows when multiplied by pres_fac_mult
+            pres_fac = update_draw_pres_fac(router_opts.initial_pres_fac);
+        }
+
+        //Have we converged the maximum number of times, did not make any changes, or does it seem
+        //unlikely additional convergences will improve QoR?
+        if (legal_convergence_count >= router_opts.max_convergence_count
+            || iter_results.stats.connections_routed == 0
+            || early_reconvergence_exit_heuristic(router_opts, itry_since_last_convergence, timing_info, best_routing_metrics)) {
+#ifndef NO_GRAPHICS
+            update_router_info_and_check_bp(BP_ROUTE_ITER, -1);
+#endif
+            break; //Done routing
+        }
+
+        /*
+         * Abort checks: Should we give-up because this routing problem is unlikely to converge to a legal routing?
+         */
+        if (itry == 1 && early_exit_heuristic(router_opts, wirelength_info)) {
+#ifndef NO_GRAPHICS
+            update_router_info_and_check_bp(BP_ROUTE_ITER, -1);
+#endif
+            //Abort
+            break;
+        }
+
+        //Estimate at what iteration we will converge to a legal routing
+        if (overuse_info.overused_nodes > ROUTING_PREDICTOR_MIN_ABSOLUTE_OVERUSE_THRESHOLD) {
+            //Only consider aborting if we have a significant number of overused resources
+
+            if (!std::isnan(est_success_iteration) && est_success_iteration > abort_iteration_threshold && router_opts.routing_budgets_algorithm != YOYO) {
+                VTR_LOG("Routing aborted, the predicted iteration for a successful route (%.1f) is too high.\n", est_success_iteration);
+#ifndef NO_GRAPHICS
+                update_router_info_and_check_bp(BP_ROUTE_ITER, -1);
+#endif
+                break; //Abort
+            }
+        }
+
+        if (itry == 1 && router_opts.exit_after_first_routing_iteration) {
+            VTR_LOG("Exiting after first routing iteration as requested\n");
+#ifndef NO_GRAPHICS
+            update_router_info_and_check_bp(BP_ROUTE_ITER, -1);
+#endif
+            break;
+        }
+
+        /*
+         * Prepare for the next iteration
+         */
+
+        if (router_opts.route_bb_update == e_route_bb_update::DYNAMIC) {
+            num_net_bounding_boxes_updated = dynamic_update_bounding_boxes(iter_results.rerouted_nets);
+        }
+
+        if (itry >= high_effort_congestion_mode_iteration_threshold) {
+            //We are approaching the maximum number of routing iterations,
+            //and still do not have a legal routing. Switch to a mode which
+            //focuses more on attempting to resolve routing conflicts.
+            router_congestion_mode = RouterCongestionMode::CONFLICTED;
+        }
+
+        //Update pres_fac
+        if (itry == 1) {
+            pres_fac = update_draw_pres_fac(router_opts.initial_pres_fac);
+        } else {
+            pres_fac *= router_opts.pres_fac_mult;
+
+            /* Avoid overflow for high iteration counts, even if acc_cost is big */
+            pres_fac = update_draw_pres_fac(std::min(pres_fac, static_cast<float>(HUGE_POSITIVE_FLOAT / 1e5)));
+
+            // Increase short path criticality if it's having a hard time resolving hold violations due to congestion
+            if (budgeting_inf.if_set()) {
+                bool rcv_finished = false;
+
+                /* This constant represents how much extra delay the budget increaser adds to the minimum and maximum delay budgets
+                 * Experimentally this value delivers fast hold slack resolution, while not overwhelming the router
+                 * Increasing this will make it resolve hold faster, but could result in lower circuit quality */
+                constexpr float budget_increase_factor = 300e-12;
+
+                if (itry > 5 && worst_negative_slack != 0)
+                    rcv_finished = budgeting_inf.increase_min_budgets_if_struggling(budget_increase_factor, timing_info, worst_negative_slack, netlist_pin_lookup);
+                if (rcv_finished)
+                    rcv_finished_count--;
+                else
+                    rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN;
+            }
+        }
+
+        if (router_congestion_mode == RouterCongestionMode::CONFLICTED) {
+            //The design appears to have routing conflicts which are difficult to resolve:
+            //  1) Don't re-route legal connections due to delay. This allows
+            //     the router to focus on the actual conflicts
+            //  2) Increase the net bounding boxes. This potentially allows
+            //     the router to route around otherwise congested regions
+            //     (at the cost of high run-time).
+
+            //Increase the size of the net bounding boxes to give the router more
+            //freedom to find alternate paths.
+            //
+            //In the case of routing conflicts there are multiple connections competing
+            //for the same resources which can not resolve the congestion themselves.
+            //In normal routing mode we try to keep the bounding boxes small to minimize
+            //run-time, but this can limits how far signals can detour (i.e. they can't
+            //route outside the bounding box), which can cause conflicts to oscillate back
+            //and forth without resolving.
+            //
+            //By scaling the bounding boxes here, we slowly increase the router's search
+            //space in hopes of it allowing signals to move further out of the way to
+            //alleviate the conflicts.
+            if (itry_conflicted_mode % BB_SCALE_ITER_COUNT == 0) {
+                //We scale the bounding boxes by BB_SCALE_FACTOR,
+                //every BB_SCALE_ITER_COUNT iterations. This ensures
+                //that we give the router some time (BB_SCALE_ITER_COUNT) to try
+                //resolve/negotiate congestion at the new BB factor.
+                //
+                //Note that we increase the BB factor slowly to try and minimize
+                //the bounding box size (since larger bounding boxes slow the router down).
+                auto& grid = g_vpr_ctx.device().grid;
+                int max_grid_dim = std::max(grid.width(), grid.height());
+
+                //Scale by BB_SCALE_FACTOR but clip to grid size to avoid overflow
+                bb_fac = std::min<int>(max_grid_dim, bb_fac * BB_SCALE_FACTOR);
+
+                route_ctx.route_bb = load_route_bb(net_list, bb_fac);
+            }
+
+            ++itry_conflicted_mode;
+        }
+
+        if (router_opts.with_timing_analysis) {
+            if (itry == 1) {
+                // first iteration sets up the lower bound connection delays since only timing is optimized for
+                connections_inf.set_stable_critical_path_delay(critical_path.delay());
+                connections_inf.set_lower_bound_connection_delays(net_delay);
+
+                //load budgets using information from uncongested delay information
+                budgeting_inf.load_route_budgets(net_delay, timing_info, netlist_pin_lookup, router_opts);
+
+                if (router_opts.routing_budgets_algorithm == YOYO)
+                    netlist_router->set_rcv_enabled(true);
+            } else {
+                bool stable_routing_configuration = true;
+
+                /*
+                 * Determine if any connection need to be forcibly re-routed due to timing
+                 */
+
+                //Yes, if explicitly enabled
+                bool should_ripup_for_delay = (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::ON);
+
+                //Or, if things are not too congested
+                should_ripup_for_delay |= (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::AUTO
+                                           && router_congestion_mode == RouterCongestionMode::NORMAL);
+
+                if (should_ripup_for_delay) {
+                    if (connections_inf.critical_path_delay_grew_significantly(critical_path.delay())) {
+                        // only need to forcibly reroute if critical path grew significantly
+                        stable_routing_configuration = connections_inf.forcibly_reroute_connections(router_opts.max_criticality,
+                                                                                                    timing_info,
+                                                                                                    netlist_pin_lookup,
+                                                                                                    net_delay);
+                    }
+                }
+
+                // not stable if any connection needs to be forcibly rerouted
+                if (stable_routing_configuration) {
+                    connections_inf.set_stable_critical_path_delay(critical_path.delay());
+                }
+            }
+        } else {
+            /* If timing analysis is not enabled, make sure that the criticalities and the
+             * net_delays stay as 0 so that wirelength can be optimized. */
+
+            for (auto net_id : net_list.nets()) {
+                for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) {
+                    net_delay[net_id][ipin] = 0.;
+                }
+            }
+        }
+
+        if (router_opts.congestion_analysis) profiling::congestion_analysis();
+        if (router_opts.fanout_analysis) profiling::time_on_fanout_analysis();
+        // profiling::time_on_criticality_analysis();
+    }
+
+    if (success) {
+        VTR_LOG("Restoring best routing\n");
+
+        auto& router_ctx = g_vpr_ctx.mutable_routing();
+
+        /* Restore congestion from best route */
+        for (auto net_id : net_list.nets()) {
+            if (route_ctx.route_trees[net_id])
+                pathfinder_update_cost_from_route_tree(route_ctx.route_trees[net_id]->root(), -1);
+            if (best_routing[net_id])
+                pathfinder_update_cost_from_route_tree(best_routing[net_id]->root(), 1);
+        }
+        router_ctx.route_trees = best_routing;
+        router_ctx.clb_opins_used_locally = best_clb_opins_used_locally;
+
+        prune_unused_non_configurable_nets(connections_inf, net_list);
+
+        if (router_opts.with_timing_analysis) {
+            VTR_LOG("Critical path: %g ns\n", 1e9 * best_routing_metrics.critical_path.delay());
+        }
+
+        VTR_LOG("Successfully routed after %d routing iterations.\n", itry);
+    } else {
+        VTR_LOG("Routing failed.\n");
+
+        //If the routing fails, print the overused info
+        print_overused_nodes_status(router_opts, overuse_info);
+
+#ifdef VTR_ENABLE_DEBUG_LOGGING
+        if (f_router_debug)
+            print_invalid_routing_info(net_list, is_flat);
+#endif
+    }
+
+    if (router_opts.with_timing_analysis) {
+        VTR_LOG("Final Net Connection Criticality Histogram:\n");
+        print_router_criticality_histogram(net_list, *timing_info, netlist_pin_lookup, is_flat);
+    }
+
+    VTR_ASSERT(router_stats.heap_pushes >= router_stats.intra_cluster_node_pushes);
+    VTR_ASSERT(router_stats.heap_pops >= router_stats.intra_cluster_node_pops);
+    VTR_LOG(
+        "Router Stats: total_nets_routed: %zu total_connections_routed: %zu total_heap_pushes: %zu total_heap_pops: %zu "
+        "total_internal_heap_pushes: %zu total_internal_heap_pops: %zu total_external_heap_pushes: %zu total_external_heap_pops: %zu ",
+        router_stats.nets_routed, router_stats.connections_routed, router_stats.heap_pushes, router_stats.heap_pops,
+        router_stats.intra_cluster_node_pushes, router_stats.intra_cluster_node_pops,
+        router_stats.inter_cluster_node_pushes, router_stats.inter_cluster_node_pops);
+    for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) {
+        VTR_LOG("total_external_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]);
+        VTR_LOG("total_external_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pops[node_type_idx]);
+        VTR_LOG("total_internal_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]);
+        VTR_LOG("total_internal_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pops[node_type_idx]);
+        VTR_LOG("rt_node_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_pushes[node_type_idx]);
+        VTR_LOG("rt_node_%s_high_fanout_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_high_fanout_pushes[node_type_idx]);
+        VTR_LOG("rt_node_%s_entire_tree_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_entire_tree_pushes[node_type_idx]);
+    }
+
+    VTR_LOG("total_number_of_adding_all_rt: %zu ", router_stats.add_all_rt);
+    VTR_LOG("total_number_of_adding_high_fanout_rt: %zu ", router_stats.add_high_fanout_rt);
+    VTR_LOG("total_number_of_adding_all_rt_from_calling_high_fanout_rt: %zu ", router_stats.add_all_rt_from_high_fanout);
+    VTR_LOG("\n");
+
+    return success;
+}
diff --git a/vpr/src/route/route.h b/vpr/src/route/route.h
new file mode 100644
index 00000000000..cf6efb26311
--- /dev/null
+++ b/vpr/src/route/route.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include "RoutingDelayCalculator.h"
+#include "timing_info.h"
+#include "vpr_net_pins_matrix.h"
+#include "vpr_types.h"
+#include "netlist.h"
+
+/** Attempts a routing via the AIR algorithm [0].
+ *
+ * \p width_fac specifies the relative width of the channels, while the members of
+ * \p router_opts determine the value of the costs assigned to routing
+ * resource node, etc. \p det_routing_arch describes the detailed routing
+ * architecture (connection and switch boxes) of the FPGA; it is used
+ * only if a DETAILED routing has been selected.
+ *
+ * [0]: K. E. Murray, S. Zhong, and V. Betz, "AIR: A fast but lazy timing-driven FPGA router", in ASPDAC 2020
+ *
+ * \return Success status. */
+bool route(const Netlist<>& net_list,
+           int width_fac,
+           const t_router_opts& router_opts,
+           const t_analysis_opts& analysis_opts,
+           t_det_routing_arch* det_routing_arch,
+           std::vector<t_segment_inf>& segment_inf,
+           NetPinsMatrix<float>& net_delay,
+           std::shared_ptr<SetupHoldTimingInfo> timing_info,
+           std::shared_ptr<RoutingDelayCalculator> delay_calc,
+           t_chan_width_dist chan_width_dist,
+           t_direct_inf* directs,
+           int num_directs,
+           ScreenUpdatePriority first_iteration_priority,
+           bool is_flat);
diff --git a/vpr/src/route/route_budgets.cpp b/vpr/src/route/route_budgets.cpp
index ff14ec752ac..00dd14ae2bc 100644
--- a/vpr/src/route/route_budgets.cpp
+++ b/vpr/src/route/route_budgets.cpp
@@ -44,7 +44,6 @@
 
 #include "vtr_assert.h"
 #include "vtr_log.h"
-#include "route_timing.h"
 #include "tatum/report/TimingPathFwd.hpp"
 #include "tatum/base/TimingType.hpp"
 #include "concrete_timing_info.h"
diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp
index 2a2f9cb787e..ccc179320fb 100644
--- a/vpr/src/route/route_common.cpp
+++ b/vpr/src/route/route_common.cpp
@@ -1,44 +1,12 @@
-#include <cstdio>
-#include <ctime>
-#include <cmath>
-#include <algorithm>
-#include <vector>
-#include <iostream>
-
-#include "route_tree.h"
-#include "vtr_assert.h"
-#include "vtr_util.h"
-#include "vtr_log.h"
-#include "vtr_digest.h"
-#include "vtr_memory.h"
-
-#include "vpr_types.h"
-#include "vpr_error.h"
-#include "vpr_utils.h"
-
-#include "stats.h"
-#include "globals.h"
-#include "route_export.h"
-#include "route_common.h"
-#include "route_parallel.h"
-#include "route_timing.h"
-#include "place_and_route.h"
-#include "rr_graph.h"
-#include "rr_graph2.h"
-#include "read_xml_arch_file.h"
-#include "draw.h"
-#include "echo_files.h"
-#include "atom_netlist_utils.h"
-
-#include "route_profiling.h"
+/** @file Impls for more router utils */
 
-#include "timing_util.h"
-#include "RoutingDelayCalculator.h"
-#include "timing_info.h"
-#include "tatum/echo_writer.hpp"
-#include "binary_heap.h"
-#include "bucket.h"
+#include "atom_netlist_utils.h"
+#include "connection_router_interface.h"
 #include "draw_global.h"
+#include "place_and_route.h"
+#include "route_common.h"
+#include "route_export.h"
+#include "rr_graph.h"
 
 /*  The numbering relation between the channels and clbs is:				*
  *																	        *
@@ -69,7 +37,7 @@
  *            chan_width_y[0]        chan_width_y[1]                        *
  *                                                                          */
 
-/******************** Subroutines local to route_common.c *******************/
+/******************** Subroutines local to route_common.cpp *******************/
 static vtr::vector<ParentNetId, std::vector<RRNodeId>> load_net_rr_terminals(const RRGraphView& rr_graph,
                                                                              const Netlist<>& net_list,
                                                                              bool is_flat);
@@ -107,7 +75,7 @@ void save_routing(vtr::vector<ParentNetId, vtr::optional<RouteTree>>& best_routi
     saved_clb_opins_used_locally = clb_opins_used_locally;
 }
 
-/* Empties route_ctx.current_rt and copies over best_routing onto it.
+/* Empties route_ctx.route_trees and copies over best_routing onto it.
  * Also restores the locally used opin data. */
 void restore_routing(vtr::vector<ParentNetId, vtr::optional<RouteTree>>& best_routing,
                      t_clb_opins_used& clb_opins_used_locally,
@@ -150,170 +118,7 @@ void get_serial_num(const Netlist<>& net_list) {
     VTR_LOG("Serial number (magic cookie) for the routing is: %d\n", serial_num);
 }
 
-void try_graph(int width_fac,
-               const t_router_opts& router_opts,
-               t_det_routing_arch* det_routing_arch,
-               std::vector<t_segment_inf>& segment_inf,
-               t_chan_width_dist chan_width_dist,
-               t_direct_inf* directs,
-               int num_directs,
-               bool is_flat) {
-    auto& device_ctx = g_vpr_ctx.mutable_device();
-
-    t_graph_type graph_type;
-    t_graph_type graph_directionality;
-    if (router_opts.route_type == GLOBAL) {
-        graph_type = GRAPH_GLOBAL;
-        graph_directionality = GRAPH_BIDIR;
-    } else {
-        graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
-        graph_directionality = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
-    }
-
-    /* Set the channel widths */
-    t_chan_width chan_width = init_chan(width_fac, chan_width_dist, graph_directionality);
-
-    /* Free any old routing graph, if one exists. */
-    free_rr_graph();
-
-    /* Set up the routing resource graph defined by this FPGA architecture. */
-    int warning_count;
-    create_rr_graph(graph_type,
-                    device_ctx.physical_tile_types,
-                    device_ctx.grid,
-                    chan_width,
-                    det_routing_arch,
-                    segment_inf,
-                    router_opts,
-                    directs, num_directs,
-                    &warning_count,
-                    is_flat);
-}
-
-bool try_route(const Netlist<>& net_list,
-               int width_fac,
-               const t_router_opts& router_opts,
-               const t_analysis_opts& analysis_opts,
-               t_det_routing_arch* det_routing_arch,
-               std::vector<t_segment_inf>& segment_inf,
-               NetPinsMatrix<float>& net_delay,
-               std::shared_ptr<SetupHoldTimingInfo> timing_info,
-               std::shared_ptr<RoutingDelayCalculator> delay_calc,
-               t_chan_width_dist chan_width_dist,
-               t_direct_inf* directs,
-               int num_directs,
-               ScreenUpdatePriority first_iteration_priority,
-               bool is_flat) {
-    /* Attempts a routing via an iterated maze router algorithm.  Width_fac *
-     * specifies the relative width of the channels, while the members of   *
-     * router_opts determine the value of the costs assigned to routing     *
-     * resource node, etc.  det_routing_arch describes the detailed routing *
-     * architecture (connection and switch boxes) of the FPGA; it is used   *
-     * only if a DETAILED routing has been selected.                        */
-
-    auto& device_ctx = g_vpr_ctx.mutable_device();
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    t_graph_type graph_type;
-    t_graph_type graph_directionality;
-    if (router_opts.route_type == GLOBAL) {
-        graph_type = GRAPH_GLOBAL;
-        graph_directionality = GRAPH_BIDIR;
-    } else {
-        graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
-        graph_directionality = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
-    }
-
-    /* Set the channel widths */
-    t_chan_width chan_width = init_chan(width_fac, chan_width_dist, graph_directionality);
-
-    /* Set up the routing resource graph defined by this FPGA architecture. */
-    int warning_count;
-
-    create_rr_graph(graph_type,
-                    device_ctx.physical_tile_types,
-                    device_ctx.grid,
-                    chan_width,
-                    det_routing_arch,
-                    segment_inf,
-                    router_opts,
-                    directs,
-                    num_directs,
-                    &warning_count,
-                    is_flat);
-
-    //Initialize drawing, now that we have an RR graph
-    init_draw_coords(width_fac);
-
-    bool success = true;
-
-    /* Allocate and load additional rr_graph information needed only by the router. */
-    alloc_and_load_rr_node_route_structs();
-
-    init_route_structs(net_list,
-                       router_opts.bb_factor,
-                       router_opts.has_choking_spot,
-                       is_flat);
-
-    if (net_list.nets().empty()) {
-        VTR_LOG_WARN("No nets to route\n");
-    }
-
-    if (router_opts.router_algorithm == PARALLEL) {
-        VTR_LOG("Confirming router algorithm: PARALLEL.\n");
-
-#ifdef VPR_USE_TBB
-        auto& atom_ctx = g_vpr_ctx.atom();
-
-        IntraLbPbPinLookup intra_lb_pb_pin_lookup(device_ctx.logical_block_types);
-        ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, intra_lb_pb_pin_lookup);
-
-        success = try_parallel_route(net_list,
-                                     *det_routing_arch,
-                                     router_opts,
-                                     analysis_opts,
-                                     segment_inf,
-                                     net_delay,
-                                     netlist_pin_lookup,
-                                     timing_info,
-                                     delay_calc,
-                                     first_iteration_priority,
-                                     is_flat);
-
-        profiling::time_on_fanout_analysis();
-#else
-        VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "VPR was not compiled with TBB support required for parallel routing\n");
-#endif
-
-    } else { /* TIMING_DRIVEN route */
-        VTR_LOG("Confirming router algorithm: TIMING_DRIVEN.\n");
-        auto& atom_ctx = g_vpr_ctx.atom();
-
-        IntraLbPbPinLookup intra_lb_pb_pin_lookup(device_ctx.logical_block_types);
-        ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, intra_lb_pb_pin_lookup);
-        success = try_timing_driven_route(net_list,
-                                          *det_routing_arch,
-                                          router_opts,
-                                          analysis_opts,
-                                          segment_inf,
-                                          net_delay,
-                                          netlist_pin_lookup,
-                                          timing_info,
-                                          delay_calc,
-                                          first_iteration_priority,
-                                          is_flat);
-
-        profiling::time_on_fanout_analysis();
-    }
-
-    return (success);
-}
-
 bool feasible_routing() {
-    /* This routine checks to see if this is a resource-feasible routing.      *
-     * That is, are all rr_node capacity limitations respected?  It assumes    *
-     * that the occupancy arrays are up to date when it is called.             */
-
     auto& device_ctx = g_vpr_ctx.device();
     const auto& rr_graph = device_ctx.rr_graph;
     auto& route_ctx = g_vpr_ctx.routing();
@@ -327,7 +132,7 @@ bool feasible_routing() {
     return (true);
 }
 
-//Returns all RR nodes in the current routing which are congested
+/** Returns all RR nodes in the current routing which are congested */
 std::vector<RRNodeId> collect_congested_rr_nodes() {
     auto& device_ctx = g_vpr_ctx.device();
     const auto& rr_graph = device_ctx.rr_graph;
@@ -364,10 +169,9 @@ vtr::vector<RRNodeId, std::set<ClusterNetId>> collect_rr_node_nets() {
     return rr_node_nets;
 }
 
+/** Updates pathfinder's occupancy by either adding or removing the
+ * usage of a resource node. */
 void pathfinder_update_single_node_occupancy(RRNodeId inode, int add_or_sub) {
-    /* Updates pathfinder's occupancy by either adding or removing the
-     * usage of a resource node. */
-
     auto& route_ctx = g_vpr_ctx.mutable_routing();
 
     int occ = route_ctx.rr_node_route_inf[inode].occ() + add_or_sub;
@@ -376,14 +180,13 @@ void pathfinder_update_single_node_occupancy(RRNodeId inode, int add_or_sub) {
     VTR_ASSERT(occ >= 0);
 }
 
+/** This routine recomputes the acc_cost (accumulated congestion cost) of each
+ * routing resource for the pathfinder algorithm after all nets have been routed.
+ * It updates the accumulated cost to by adding in the number of extra signals
+ * sharing a resource right now (i.e. after each complete iteration) times acc_fac.
+ * THIS ROUTINE ASSUMES THE OCCUPANCY VALUES IN RR_NODE ARE UP TO DATE.
+ * This routine also creates a new overuse info for the current routing iteration. */
 void pathfinder_update_acc_cost_and_overuse_info(float acc_fac, OveruseInfo& overuse_info) {
-    /* This routine recomputes the acc_cost (accumulated congestion cost) of each       *
-     * routing resource for the pathfinder algorithm after all nets have been routed.   *
-     * It updates the accumulated cost to by adding in the number of extra signals      *
-     * sharing a resource right now (i.e. after each complete iteration) times acc_fac. *
-     * THIS ROUTINE ASSUMES THE OCCUPANCY VALUES IN RR_NODE ARE UP TO DATE.             *
-     * This routine also creates a new overuse info for the current routing iteration.  */
-
     auto& device_ctx = g_vpr_ctx.device();
     const auto& rr_graph = device_ctx.rr_graph;
     auto& route_ctx = g_vpr_ctx.mutable_routing();
@@ -417,20 +220,6 @@ void pathfinder_update_cost_from_route_tree(const RouteTreeNode& root, int add_o
     }
 }
 
-float update_pres_fac(float new_pres_fac) {
-    /* This routine should take the new value of the present congestion factor *
-     * and propagate it to all the relevant data fields in the vpr flow.       *
-     * Currently, it only updates the pres_fac used by the drawing functions   */
-#ifndef NO_GRAPHICS
-
-    // Only updates the drawing pres_fac if graphics is enabled
-    get_draw_state_vars()->pres_fac = new_pres_fac;
-
-#endif // NO_GRAPHICS
-
-    return new_pres_fac;
-}
-
 /* Call this before you route any nets. It frees any old route trees and
  * sets the list of rr_nodes touched to empty. */
 void init_route_structs(const Netlist<>& net_list,
@@ -595,12 +384,9 @@ static t_clb_opins_used alloc_and_load_clb_opins_used_locally() {
     return (clb_opins_used_locally);
 }
 
-/*the trace lists are only freed after use by the timing-driven placer */
-/*Do not  free them after use by the router, since stats, and draw  */
-/*routines use the trace values */
+/* Frees the temporary storage needed only during the routing. The
+ * final routing result is not freed. */
 void free_route_structs() {
-    /* Frees the temporary storage needed only during the routing.  The  *
-     * final routing result is not freed.                                */
     auto& route_ctx = g_vpr_ctx.mutable_routing();
 
     if (route_ctx.route_bb.size() != 0) {
diff --git a/vpr/src/route/route_common.h b/vpr/src/route/route_common.h
index 68e525e10b0..203e2880059 100644
--- a/vpr/src/route/route_common.h
+++ b/vpr/src/route/route_common.h
@@ -1,14 +1,19 @@
-/************ Defines and types shared by all route files ********************/
 #pragma once
+
+/** @file Misc. router utils: some used by the connection router, some by other
+ * router files and some used globally. */
+
 #include <vector>
 #include "clustered_netlist.h"
-#include "vtr_vector.h"
-#include "heap_type.h"
 #include "rr_node_fwd.h"
 #include "router_stats.h"
 #include "globals.h"
 
-/******* Subroutines in route_common used only by other router modules ******/
+/** This routine checks to see if this is a resource-feasible routing.
+ * That is, are all rr_node capacity limitations respected?  It assumes
+ * that the occupancy arrays are up to date when it is called. */
+bool feasible_routing();
+
 vtr::vector<ParentNetId, t_bb> load_route_bb(const Netlist<>& net_list,
                                              int bb_factor);
 
@@ -23,8 +28,6 @@ void pathfinder_update_acc_cost_and_overuse_info(float acc_fac, OveruseInfo& ove
 /** Update pathfinder cost of all nodes under root (including root) */
 void pathfinder_update_cost_from_route_tree(const RouteTreeNode& root, int add_or_sub);
 
-float update_pres_fac(float new_pres_fac);
-
 void reset_path_costs(const std::vector<RRNodeId>& visited_rr_nodes);
 
 float get_rr_cong_cost(RRNodeId inode, float pres_fac);
diff --git a/vpr/src/route/route_debug.cpp b/vpr/src/route/route_debug.cpp
new file mode 100644
index 00000000000..c0c8d3dd24e
--- /dev/null
+++ b/vpr/src/route/route_debug.cpp
@@ -0,0 +1,30 @@
+#include "route_debug.h"
+
+std::atomic_bool f_router_debug = false;
+
+void enable_router_debug(
+    const t_router_opts& router_opts,
+    ParentNetId net,
+    RRNodeId sink_rr,
+    int router_iteration,
+    ConnectionRouterInterface* router) {
+    bool active_net_debug = (router_opts.router_debug_net >= -1);
+    bool active_sink_debug = (router_opts.router_debug_sink_rr >= 0);
+    bool active_iteration_debug = (router_opts.router_debug_iteration >= 0);
+
+    bool match_net = (ParentNetId(router_opts.router_debug_net) == net || router_opts.router_debug_net == -1);
+    bool match_sink = (router_opts.router_debug_sink_rr == int(size_t((sink_rr))) || router_opts.router_debug_sink_rr < 0);
+    bool match_iteration = (router_opts.router_debug_iteration == router_iteration || router_opts.router_debug_iteration < 0);
+
+    f_router_debug = active_net_debug || active_sink_debug || active_iteration_debug;
+
+    if (active_net_debug) f_router_debug = f_router_debug && match_net;
+    if (active_sink_debug) f_router_debug = f_router_debug && match_sink;
+    if (active_iteration_debug) f_router_debug = f_router_debug && match_iteration;
+
+    router->set_router_debug(f_router_debug);
+
+#ifndef VTR_ENABLE_DEBUG_LOGGING
+    VTR_LOGV_WARN(f_router_debug, "Limited router debug output provided since compiled without VTR_ENABLE_DEBUG_LOGGING defined\n");
+#endif
+}
diff --git a/vpr/src/route/route_debug.h b/vpr/src/route/route_debug.h
new file mode 100644
index 00000000000..94c874da706
--- /dev/null
+++ b/vpr/src/route/route_debug.h
@@ -0,0 +1,20 @@
+#pragma once
+
+/** @file Utils for debugging the router */
+
+#include <atomic>
+#include "connection_router_interface.h"
+#include "vpr_types.h"
+
+/** @brief Run-time flag to control when router debug information is printed
+ * Note only enables debug output if compiled with VTR_ENABLE_DEBUG_LOGGING defined
+ * f_router_debug is used to stop the router when a breakpoint is reached. When a breakpoint is reached, this flag is set to true.
+ *
+ * In addition f_router_debug is used to print additional debug information during routing, for instance lookahead expected costs
+ * information.
+ *
+ * d2: Made atomic as an attempt to make it work with parallel routing, but don't expect reliable results. */
+extern std::atomic_bool f_router_debug;
+
+/** Enable f_router_debug if specific sink/net debugging is set in \p router_opts */
+void enable_router_debug(const t_router_opts& router_opts, ParentNetId net, RRNodeId sink_rr, int router_iteration, ConnectionRouterInterface* router);
diff --git a/vpr/src/route/route_export.h b/vpr/src/route/route_export.h
index 3aa1703647e..971aeba966b 100644
--- a/vpr/src/route/route_export.h
+++ b/vpr/src/route/route_export.h
@@ -1,43 +1,20 @@
-/******** Function prototypes for functions in route_common.cpp that ***********
- ******** are used outside the router modules.                     ***********/
-#include "vpr_types.h"
+#pragma once
+
+/** @file Function prototypes for functions in route_common.cpp that
+ * are used outside the router modules. */
+
 #include <memory>
-#include "timing_info_fwd.h"
+
 #include "route_common.h"
-#include "RoutingDelayCalculator.h"
+#include "timing_info_fwd.h"
+#include "vpr_types.h"
 
-void try_graph(int width_fac,
-               const t_router_opts& router_opts,
-               t_det_routing_arch* det_routing_arch,
-               std::vector<t_segment_inf>& segment_inf,
-               t_chan_width_dist chan_width_dist,
-               t_direct_inf* directs,
-               int num_directs,
-               bool is_flat);
-
-bool try_route(const Netlist<>& net_list,
-               int width_fac,
-               const t_router_opts& router_opts,
-               const t_analysis_opts& analysis_opts,
-               t_det_routing_arch* det_routing_arch,
-               std::vector<t_segment_inf>& segment_inf,
-               NetPinsMatrix<float>& net_delay,
-               std::shared_ptr<SetupHoldTimingInfo> timing_info,
-               std::shared_ptr<RoutingDelayCalculator> delay_calc,
-               t_chan_width_dist chan_width_dist,
-               t_direct_inf* directs,
-               int num_directs,
-               ScreenUpdatePriority first_iteration_priority,
-               bool is_flat);
-
-bool feasible_routing();
+#include "RoutingDelayCalculator.h"
 
 std::vector<RRNodeId> collect_congested_rr_nodes();
 
 vtr::vector<RRNodeId, std::set<ClusterNetId>> collect_rr_node_nets();
 
-t_clb_opins_used alloc_route_structs();
-
 void free_route_structs();
 
 void save_routing(vtr::vector<ParentNetId, vtr::optional<RouteTree>>& best_routing,
diff --git a/vpr/src/route/route_net.cpp b/vpr/src/route/route_net.cpp
new file mode 100644
index 00000000000..8f36e68836b
--- /dev/null
+++ b/vpr/src/route/route_net.cpp
@@ -0,0 +1,338 @@
+/** @file Impls for non-templated net routing fns & utils */
+
+#include "route_net.h"
+#include "stats.h"
+
+bool check_hold(const t_router_opts& router_opts, float worst_neg_slack) {
+    if (router_opts.routing_budgets_algorithm != YOYO) {
+        return false;
+    } else if (worst_neg_slack != 0) {
+        return true;
+    }
+    return false;
+}
+
+void setup_routing_resources(int itry,
+                             ParentNetId net_id,
+                             const Netlist<>& net_list,
+                             unsigned num_sinks,
+                             int min_incremental_reroute_fanout,
+                             CBRR& connections_inf,
+                             const t_router_opts& router_opts,
+                             bool ripup_high_fanout_nets) {
+    auto& route_ctx = g_vpr_ctx.mutable_routing();
+
+    /* "tree" points to this net's spot in the global context here, so re-initializing it etc. changes the global state */
+    vtr::optional<RouteTree>& tree = route_ctx.route_trees[net_id];
+
+    // for nets below a certain size (min_incremental_reroute_fanout), rip up any old routing
+    // otherwise, we incrementally reroute by reusing legal parts of the previous iteration
+    if ((int)num_sinks < min_incremental_reroute_fanout || itry == 1 || ripup_high_fanout_nets) {
+        profiling::net_rerouted();
+
+        /* rip up the whole net */
+        if (tree)
+            pathfinder_update_cost_from_route_tree(tree.value().root(), -1);
+        tree = vtr::nullopt;
+
+        /* re-initialize net */
+        tree = RouteTree(net_id);
+        pathfinder_update_cost_from_route_tree(tree.value().root(), 1);
+
+        // since all connections will be rerouted for this net, clear all of net's forced reroute flags
+        connections_inf.clear_force_reroute_for_net(net_id);
+
+        // when we don't prune the tree, we also don't know the sink node indices
+        // thus we'll use functions that act on pin indices like mark_ends instead
+        // of their versions that act on node indices directly like mark_remaining_ends
+        mark_ends(net_list, net_id);
+    } else {
+        profiling::net_rebuild_start();
+
+        if (!tree) {
+            tree = RouteTree(net_id);
+            pathfinder_update_cost_from_route_tree(tree.value().root(), 1);
+        }
+
+        /* copy the existing routing
+         * prune() depends on global occ, so we can't subtract before pruning
+         * OPT: to skip this copy, return a "diff" from RouteTree::prune */
+        RouteTree tree2 = tree.value();
+
+        // Skip this check if RCV is enabled, as RCV can use another method to cause reroutes
+        VTR_ASSERT_SAFE(should_route_net(net_id, connections_inf, true) || router_opts.routing_budgets_algorithm == YOYO);
+
+        // Prune the copy (using congestion data before subtraction)
+        vtr::optional<RouteTree&> pruned_tree2 = tree2.prune(connections_inf);
+
+        // Subtract congestion using the non-pruned original
+        pathfinder_update_cost_from_route_tree(tree->root(), -1);
+
+        if (pruned_tree2) { //Partially pruned
+            profiling::route_tree_preserved();
+
+            // Add back congestion for the pruned route tree
+            pathfinder_update_cost_from_route_tree(pruned_tree2->root(), 1);
+            // pruned_tree2 is no longer required -> we can move rather than copy
+            tree = std::move(pruned_tree2.value());
+        } else { // Fully destroyed
+            profiling::route_tree_pruned();
+
+            // Initialize only to source
+            tree = RouteTree(net_id);
+            pathfinder_update_cost_from_route_tree(tree->root(), 1);
+        }
+
+        profiling::net_rebuild_end(num_sinks, tree->get_remaining_isinks().size());
+
+        // still need to calculate the tree's time delay
+        tree->reload_timing();
+
+        // check for R_upstream C_downstream and edge correctness
+        VTR_ASSERT_SAFE(tree->is_valid());
+
+        // congestion should've been pruned away
+        VTR_ASSERT_SAFE(tree->is_uncongested());
+
+        // mark remaining ends
+        mark_remaining_ends(net_id);
+
+        // mark the lookup (rr_node_route_inf) for existing tree elements as NO_PREVIOUS so add_to_path stops when it reaches one of them
+        update_rr_route_inf_from_tree(tree->root());
+    }
+
+    // completed constructing the partial route tree and updated all other data structures to match
+}
+
+void update_rr_base_costs(int fanout) {
+    auto& device_ctx = g_vpr_ctx.mutable_device();
+
+    float factor;
+    size_t index;
+
+    /* Other reasonable values for factor include fanout and 1 */
+    factor = sqrt(fanout);
+
+    for (index = CHANX_COST_INDEX_START; index < device_ctx.rr_indexed_data.size(); index++) {
+        if (device_ctx.rr_indexed_data[RRIndexedDataId(index)].T_quadratic > 0.) { /* pass transistor */
+            device_ctx.rr_indexed_data[RRIndexedDataId(index)].base_cost = device_ctx.rr_indexed_data[RRIndexedDataId(index)].saved_base_cost * factor;
+        } else {
+            device_ctx.rr_indexed_data[RRIndexedDataId(index)].base_cost = device_ctx.rr_indexed_data[RRIndexedDataId(index)].saved_base_cost;
+        }
+    }
+}
+
+void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node) {
+    auto& route_ctx = g_vpr_ctx.mutable_routing();
+
+    for (auto& node : rt_node.all_nodes()) {
+        RRNodeId inode = node.inode;
+        route_ctx.rr_node_route_inf[inode].prev_node = RRNodeId::INVALID();
+        route_ctx.rr_node_route_inf[inode].prev_edge = RREdgeId::INVALID();
+
+        // path cost should be unset
+        VTR_ASSERT(std::isinf(route_ctx.rr_node_route_inf[inode].path_cost));
+        VTR_ASSERT(std::isinf(route_ctx.rr_node_route_inf[inode].backward_path_cost));
+    }
+}
+
+bool should_route_net(ParentNetId net_id,
+                      CBRR& connections_inf,
+                      bool if_force_reroute) {
+    auto& route_ctx = g_vpr_ctx.routing();
+    auto& device_ctx = g_vpr_ctx.device();
+    const auto& rr_graph = device_ctx.rr_graph;
+
+    if (!route_ctx.route_trees[net_id]) {
+        /* No routing yet. */
+        return true;
+    }
+
+    const RouteTree& tree = route_ctx.route_trees[net_id].value();
+
+    /* Walk over all rt_nodes in the net */
+    for (auto& rt_node : tree.all_nodes()) {
+        RRNodeId inode = rt_node.inode;
+        int occ = route_ctx.rr_node_route_inf[inode].occ();
+        int capacity = rr_graph.node_capacity(inode);
+
+        if (occ > capacity) {
+            return true; /* overuse detected */
+        }
+
+        if (rt_node.is_leaf()) { //End of a branch
+            // even if net is fully routed, not complete if parts of it should get ripped up (EXPERIMENTAL)
+            if (if_force_reroute) {
+                if (connections_inf.should_force_reroute_connection(net_id, inode)) {
+                    return true;
+                }
+            }
+        }
+    }
+
+    /* If all sinks have been routed to without overuse, no need to route this */
+    if (tree.get_remaining_isinks().empty())
+        return false;
+
+    return true;
+}
+
+bool early_exit_heuristic(const t_router_opts& router_opts, const WirelengthInfo& wirelength_info) {
+    if (wirelength_info.used_wirelength_ratio() > router_opts.init_wirelength_abort_threshold) {
+        VTR_LOG("Wire length usage ratio %g exceeds limit of %g, fail routing.\n",
+                wirelength_info.used_wirelength_ratio(),
+                router_opts.init_wirelength_abort_threshold);
+        return true;
+    }
+    return false;
+}
+
+float get_net_pin_criticality(const SetupHoldTimingInfo* timing_info,
+                              const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+                              float max_criticality,
+                              float criticality_exp,
+                              ParentNetId net_id,
+                              ParentPinId pin_id,
+                              bool is_flat) {
+    float pin_criticality = 0.0;
+    const auto& route_ctx = g_vpr_ctx.routing();
+
+    if (route_ctx.is_clock_net[net_id]) {
+        pin_criticality = max_criticality;
+    } else {
+        pin_criticality = calculate_clb_net_pin_criticality(*timing_info,
+                                                            netlist_pin_lookup,
+                                                            pin_id,
+                                                            is_flat);
+    }
+
+    /* Pin criticality is between 0 and 1.
+     * Shift it downwards by 1 - max_criticality (max_criticality is 0.99 by default,
+     * so shift down by 0.01) and cut off at 0.  This means that all pins with small
+     * criticalities (<0.01) get criticality 0 and are ignored entirely, and everything
+     * else becomes a bit less critical. This effect becomes more pronounced if
+     * max_criticality is set lower. */
+    // VTR_ASSERT(pin_criticality[ipin] > -0.01 && pin_criticality[ipin] < 1.01);
+    pin_criticality = std::max(pin_criticality - (1.0 - max_criticality), 0.0);
+
+    /* Take pin criticality to some power (1 by default). */
+    pin_criticality = std::pow(pin_criticality, criticality_exp);
+
+    /* Cut off pin criticality at max_criticality. */
+    pin_criticality = std::min(pin_criticality, max_criticality);
+
+    return pin_criticality;
+}
+
+size_t calculate_wirelength_available() {
+    auto& device_ctx = g_vpr_ctx.device();
+    const auto& rr_graph = device_ctx.rr_graph;
+
+    size_t available_wirelength = 0;
+    // But really what's happening is that this for loop iterates over every node and determines the available wirelength
+    for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) {
+        const t_rr_type channel_type = rr_graph.node_type(rr_id);
+        if (channel_type == CHANX || channel_type == CHANY) {
+            available_wirelength += rr_graph.node_capacity(rr_id) * rr_graph.node_length(rr_id);
+        }
+    }
+    return available_wirelength;
+}
+
+WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t available_wirelength) {
+    size_t used_wirelength = 0;
+    VTR_ASSERT(available_wirelength > 0);
+
+    auto& route_ctx = g_vpr_ctx.routing();
+
+    for (auto net_id : net_list.nets()) {
+        if (!net_list.net_is_ignored(net_id)
+            && net_list.net_sinks(net_id).size() != 0 /* Globals don't count. */
+            && route_ctx.route_trees[net_id]) {
+            int bends = 0, wirelength = 0, segments = 0;
+            bool is_absorbed;
+            get_num_bends_and_length(net_id, &bends, &wirelength, &segments, &is_absorbed);
+
+            used_wirelength += wirelength;
+        }
+    }
+
+    return WirelengthInfo(available_wirelength, used_wirelength);
+}
+
+t_bb calc_current_bb(const RouteTree& tree) {
+    auto& device_ctx = g_vpr_ctx.device();
+    const auto& rr_graph = device_ctx.rr_graph;
+    auto& grid = device_ctx.grid;
+
+    t_bb bb;
+    bb.xmin = grid.width() - 1;
+    bb.ymin = grid.height() - 1;
+    bb.layer_min = grid.get_num_layers() - 1;
+    bb.xmax = 0;
+    bb.ymax = 0;
+    bb.layer_max = 0;
+
+    for (auto& rt_node : tree.all_nodes()) {
+        //The router interprets RR nodes which cross the boundary as being
+        //'within' of the BB. Only those which are *strictly* out side the
+        //box are excluded, hence we use the nodes xhigh/yhigh for xmin/xmax,
+        //and xlow/ylow for xmax/ymax calculations
+        bb.xmin = std::min<int>(bb.xmin, rr_graph.node_xhigh(rt_node.inode));
+        bb.ymin = std::min<int>(bb.ymin, rr_graph.node_yhigh(rt_node.inode));
+        bb.layer_min = std::min<int>(bb.layer_min, rr_graph.node_layer(rt_node.inode));
+        bb.xmax = std::max<int>(bb.xmax, rr_graph.node_xlow(rt_node.inode));
+        bb.ymax = std::max<int>(bb.ymax, rr_graph.node_ylow(rt_node.inode));
+        bb.layer_max = std::max<int>(bb.layer_max, rr_graph.node_layer(rt_node.inode));
+    }
+
+    VTR_ASSERT(bb.xmin <= bb.xmax);
+    VTR_ASSERT(bb.ymin <= bb.ymax);
+
+    return bb;
+}
+
+// Initializes net_delay based on best-case delay estimates from the router lookahead
+void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead,
+                                   const Netlist<>& net_list,
+                                   const vtr::vector<ParentNetId, std::vector<RRNodeId>>& net_rr_terminals,
+                                   NetPinsMatrix<float>& net_delay,
+                                   const RRGraphView& rr_graph,
+                                   bool is_flat) {
+    t_conn_cost_params cost_params;
+    cost_params.criticality = 1.; // Ensures lookahead returns delay value
+
+    for (auto net_id : net_list.nets()) {
+        if (net_list.net_is_ignored(net_id)) continue;
+
+        RRNodeId source_rr = net_rr_terminals[net_id][0];
+
+        for (size_t ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) {
+            RRNodeId sink_rr = net_rr_terminals[net_id][ipin];
+
+            float est_delay = get_cost_from_lookahead(router_lookahead,
+                                                      rr_graph,
+                                                      source_rr,
+                                                      sink_rr,
+                                                      0.,
+                                                      cost_params,
+                                                      is_flat);
+            VTR_ASSERT(std::isfinite(est_delay) && est_delay < std::numeric_limits<float>::max());
+
+            net_delay[net_id][ipin] = est_delay;
+        }
+    }
+}
+
+void update_net_delays_from_route_tree(float* net_delay,
+                                       const Netlist<>& net_list,
+                                       ParentNetId inet,
+                                       TimingInfo* timing_info,
+                                       NetPinTimingInvalidator* pin_timing_invalidator) {
+    auto& route_ctx = g_vpr_ctx.routing();
+    const RouteTree& tree = route_ctx.route_trees[inet].value();
+
+    for (unsigned int isink = 1; isink < net_list.net_pins(inet).size(); isink++) {
+        update_net_delay_from_isink(net_delay, tree, isink, net_list, inet, timing_info, pin_timing_invalidator);
+    }
+}
diff --git a/vpr/src/route/route_net.h b/vpr/src/route/route_net.h
new file mode 100644
index 00000000000..fc08a0ddb19
--- /dev/null
+++ b/vpr/src/route/route_net.h
@@ -0,0 +1,119 @@
+#pragma once
+
+/** @file Net and sink routing functions, and other utils used by them. */
+
+#include <unordered_map>
+#include <vector>
+
+#include "connection_based_routing.h"
+#include "connection_router_interface.h"
+#include "heap_type.h"
+#include "netlist.h"
+#include "route_budgets.h"
+#include "route_utils.h"
+#include "router_stats.h"
+#include "router_lookahead.h"
+#include "routing_predictor.h"
+#include "rr_graph_type.h"
+#include "spatial_route_tree_lookup.h"
+#include "timing_info_fwd.h"
+#include "vpr_types.h"
+#include "vpr_utils.h"
+
+#include "NetPinTimingInvalidator.h"
+
+/** Results from attempting to route a net.
+ * success: Could we route it?
+ * was_rerouted: Is the routing different from the last one? (set by try_* functions)
+ * retry_with_full_bb: Should we retry this net with a full-device bounding box? (used in the parallel router)
+ *
+ * I'm fine with returning 3 bytes from a fn: consider an enum class if this becomes too big */
+struct NetResultFlags {
+    bool success = false;
+    bool was_rerouted = false;
+    bool retry_with_full_bb = false;
+};
+
+/** When RCV is enabled, it's necessary to be able to completely ripup high fanout nets
+ * if there is still negative hold slack. Normally the router will prune the illegal branches
+ * of high fanout nets, this will bypass that */
+bool check_hold(const t_router_opts& router_opts, float worst_neg_slack);
+
+/** Return a full-device bounding box */
+inline t_bb full_device_bb(void) {
+    const auto& grid = g_vpr_ctx.device().grid;
+    return {0, (int)grid.width() - 1, 0, (int)grid.height() - 1, 0, (int)grid.get_num_layers() - 1};
+}
+
+/** Get criticality of \p pin_id in net \p net_id from 0 to 1 */
+float get_net_pin_criticality(const SetupHoldTimingInfo* timing_info,
+                              const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+                              float max_criticality,
+                              float criticality_exp,
+                              ParentNetId net_id,
+                              ParentPinId pin_id,
+                              bool is_flat);
+
+/** Returns true if the specified net fanout is classified as high fanout */
+constexpr bool is_high_fanout(int fanout, int fanout_threshold) {
+    if (fanout_threshold < 0 || fanout < fanout_threshold)
+        return false;
+    return true;
+}
+
+/** Build a partial route tree in global context for \p net_id from the legal
+ * connections from last iteration.
+ * Along the way do:
+ * - update pathfinder costs to be accurate to the partial route tree
+ * - mark the rr_node sinks as targets to be reached. */
+void setup_routing_resources(int itry,
+                             ParentNetId net_id,
+                             const Netlist<>& net_list,
+                             unsigned num_sinks,
+                             int min_incremental_reroute_fanout,
+                             CBRR& connections_inf,
+                             const t_router_opts& router_opts,
+                             bool ripup_high_fanout_nets);
+
+/** Detect if net should be routed or not */
+bool should_route_net(ParentNetId net_id,
+                      CBRR& connections_inf,
+                      bool if_force_reroute);
+
+/** Update net_delay value for a single sink in a RouteTree. */
+inline void update_net_delay_from_isink(float* net_delay,
+                                        const RouteTree& tree,
+                                        int isink,
+                                        const Netlist<>& net_list,
+                                        ParentNetId inet,
+                                        TimingInfo* timing_info,
+                                        NetPinTimingInvalidator* pin_timing_invalidator) {
+    float new_delay = tree.find_by_isink(isink)->Tdel;
+
+    if (pin_timing_invalidator && new_delay != net_delay[isink]) {
+        //Delay changed, invalidate for incremental timing update
+        VTR_ASSERT_SAFE(timing_info);
+        ParentPinId pin = net_list.net_pin(inet, isink);
+        pin_timing_invalidator->invalidate_connection(pin, timing_info);
+    }
+
+    net_delay[isink] = new_delay;
+}
+
+/** Goes through all the sinks of this net and copies their delay values from
+ * the route_tree to the net_delay array. */
+void update_net_delays_from_route_tree(float* net_delay,
+                                       const Netlist<>& net_list,
+                                       ParentNetId inet,
+                                       TimingInfo* timing_info,
+                                       NetPinTimingInvalidator* pin_timing_invalidator);
+
+/** Change the base costs of rr_nodes globally according to # of fanouts
+ * TODO: is this even thread safe? */
+void update_rr_base_costs(int fanout);
+
+/** Traverses down a route tree and updates rr_node_inf for all nodes
+ * to reflect that these nodes have already been routed to */
+void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node);
+
+#include "route_net.tpp"
diff --git a/vpr/src/route/route_net.tpp b/vpr/src/route/route_net.tpp
new file mode 100644
index 00000000000..8542d8f306d
--- /dev/null
+++ b/vpr/src/route/route_net.tpp
@@ -0,0 +1,478 @@
+#pragma once
+
+/** @file Header implementations for templated net routing fns. */
+
+#include <tuple>
+#include "route_net.h"
+
+#include "connection_router_interface.h"
+#include "describe_rr_node.h"
+#include "draw.h"
+#include "route_common.h"
+#include "route_debug.h"
+#include "route_profiling.h"
+#include "rr_graph_fwd.h"
+
+/** Attempt to route a single net.
+ *
+ * @param router The ConnectionRouter instance 
+ * @param net_list Input netlist
+ * @param net_id
+ * @param itry # of iteration
+ * @param pres_fac
+ * @param router_opts
+ * @param connections_inf
+ * @param router_stats
+ * @param pin_criticality
+ * @param rt_node_of_sink Lookup from target_pin-like indices (indicating SINK nodes) to RouteTreeNodes
+ * @param net_delay
+ * @param netlist_pin_lookup
+ * @param timing_info
+ * @param pin_timing_invalidator
+ * @param budgeting_inf
+ * @param worst_neg_slack
+ * @param routing_predictor
+ * @param choking_spots
+ * @param is_flat
+ * @return NetResultFlags for this net */
+template<typename ConnectionRouter>
+inline NetResultFlags route_net(ConnectionRouter& router,
+                                const Netlist<>& net_list,
+                                const ParentNetId& net_id,
+                                int itry,
+                                float pres_fac,
+                                const t_router_opts& router_opts,
+                                CBRR& connections_inf,
+                                RouterStats& router_stats,
+                                NetPinsMatrix<float>& net_delays,
+                                const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+                                SetupHoldTimingInfo* timing_info,
+                                NetPinTimingInvalidator* pin_timing_invalidator,
+                                route_budgets& budgeting_inf,
+                                float worst_negative_slack,
+                                const RoutingPredictor& routing_predictor,
+                                const std::vector<std::unordered_map<RRNodeId, int>>& choking_spots,
+                                bool is_flat) {
+    auto& route_ctx = g_vpr_ctx.mutable_routing();
+
+    NetResultFlags flags;
+
+    bool reroute_for_hold = false;
+    if (budgeting_inf.if_set()) {
+        reroute_for_hold = (budgeting_inf.get_should_reroute(net_id));
+        reroute_for_hold &= worst_negative_slack != 0;
+    }
+
+    flags.success = true;
+
+    if (route_ctx.net_status.is_fixed(net_id)) { /* Skip pre-routed nets. */
+        return flags;
+    } else if (net_list.net_is_ignored(net_id)) { /* Skip ignored nets. */
+        return flags;
+    } else if (!(reroute_for_hold) && !should_route_net(net_id, connections_inf, true)) {
+        return flags;
+    }
+
+    // track time spent vs fanout
+    profiling::net_fanout_start();
+
+    flags.was_rerouted = true; // Flag to record whether routing was actually changed
+
+    auto& device_ctx = g_vpr_ctx.device();
+    const auto& rr_graph = device_ctx.rr_graph;
+
+    unsigned int num_sinks = net_list.net_sinks(net_id).size();
+
+    VTR_LOGV_DEBUG(f_router_debug, "Routing Net %zu (%zu sinks)\n", size_t(net_id), num_sinks);
+
+    setup_routing_resources(
+        itry,
+        net_id,
+        net_list,
+        num_sinks,
+        router_opts.min_incremental_reroute_fanout,
+        connections_inf,
+        router_opts,
+        check_hold(router_opts, worst_negative_slack));
+
+    VTR_ASSERT(route_ctx.route_trees[net_id]);
+    RouteTree& tree = route_ctx.route_trees[net_id].value();
+
+    bool high_fanout = is_high_fanout(num_sinks, router_opts.high_fanout_threshold);
+
+    SpatialRouteTreeLookup spatial_route_tree_lookup;
+    if (high_fanout) {
+        spatial_route_tree_lookup = build_route_tree_spatial_lookup(net_list,
+                                                                    route_ctx.route_bb,
+                                                                    net_id,
+                                                                    tree.root());
+    }
+
+    /* 1-indexed! */
+    std::vector<float> pin_criticality(tree.num_sinks() + 1, 0);
+
+    // after this point the route tree is correct
+    // remaining_targets from this point on are the **pin indices** that have yet to be routed
+    std::vector<int> remaining_targets(tree.get_remaining_isinks().begin(), tree.get_remaining_isinks().end());
+
+    // calculate criticality of remaining target pins
+    for (int ipin : remaining_targets) {
+        auto pin = net_list.net_pin(net_id, ipin);
+        pin_criticality[ipin] = get_net_pin_criticality(timing_info,
+                                                        netlist_pin_lookup,
+                                                        router_opts.max_criticality,
+                                                        router_opts.criticality_exp,
+                                                        net_id,
+                                                        pin,
+                                                        is_flat);
+    }
+
+    // compare the criticality of different sink nodes
+    sort(begin(remaining_targets), end(remaining_targets), [&](int a, int b) {
+        return pin_criticality[a] > pin_criticality[b];
+    });
+
+    /* Update base costs according to fanout and criticality rules */
+    update_rr_base_costs(num_sinks);
+
+    t_conn_delay_budget conn_delay_budget;
+    t_conn_cost_params cost_params;
+    cost_params.astar_fac = router_opts.astar_fac;
+    cost_params.bend_cost = router_opts.bend_cost;
+    cost_params.pres_fac = pres_fac;
+    cost_params.delay_budget = ((budgeting_inf.if_set()) ? &conn_delay_budget : nullptr);
+
+    // Pre-route to clock source for clock nets (marked as global nets)
+    if (net_list.net_is_global(net_id) && router_opts.two_stage_clock_routing) {
+        //VTR_ASSERT(router_opts.clock_modeling == DEDICATED_NETWORK);
+        RRNodeId sink_node(device_ctx.virtual_clock_network_root_idx);
+
+        enable_router_debug(router_opts, net_id, sink_node, itry, &router);
+
+        VTR_LOGV_DEBUG(f_router_debug, "Pre-routing global net %zu\n", size_t(net_id));
+
+        // Set to the max timing criticality which should intern minimize clock insertion
+        // delay by selecting a direct route from the clock source to the virtual sink
+        cost_params.criticality = router_opts.max_criticality;
+
+        return pre_route_to_clock_root(router,
+                                       net_id,
+                                       net_list,
+                                       sink_node,
+                                       cost_params,
+                                       router_opts.high_fanout_threshold,
+                                       tree,
+                                       spatial_route_tree_lookup,
+                                       router_stats,
+                                       is_flat);
+    }
+
+    if (budgeting_inf.if_set()) {
+        budgeting_inf.set_should_reroute(net_id, false);
+    }
+
+    // explore in order of decreasing criticality (no longer need sink_order array)
+    for (unsigned itarget = 0; itarget < remaining_targets.size(); ++itarget) {
+        int target_pin = remaining_targets[itarget];
+
+        RRNodeId sink_rr = route_ctx.net_rr_terminals[net_id][target_pin];
+
+        enable_router_debug(router_opts, net_id, sink_rr, itry, &router);
+
+        cost_params.criticality = pin_criticality[target_pin];
+
+        if (budgeting_inf.if_set()) {
+            conn_delay_budget.max_delay = budgeting_inf.get_max_delay_budget(net_id, target_pin);
+            conn_delay_budget.target_delay = budgeting_inf.get_delay_target(net_id, target_pin);
+            conn_delay_budget.min_delay = budgeting_inf.get_min_delay_budget(net_id, target_pin);
+            conn_delay_budget.short_path_criticality = budgeting_inf.get_crit_short_path(net_id, target_pin);
+            conn_delay_budget.routing_budgets_algorithm = router_opts.routing_budgets_algorithm;
+        }
+
+        profiling::conn_start();
+
+        // build a branch in the route tree to the target
+        auto sink_flags = route_sink(router,
+                                     net_list,
+                                     net_id,
+                                     itarget,
+                                     target_pin,
+                                     cost_params,
+                                     router_opts,
+                                     tree,
+                                     spatial_route_tree_lookup,
+                                     router_stats,
+                                     budgeting_inf,
+                                     routing_predictor,
+                                     choking_spots,
+                                     is_flat);
+
+        flags.retry_with_full_bb |= sink_flags.retry_with_full_bb;
+
+        if (!sink_flags.success) {
+            flags.success = false;
+            VTR_LOG("Routing failed for net %d\n", net_id);
+            return flags;
+        }
+
+        profiling::conn_finish(size_t(route_ctx.net_rr_terminals[net_id][0]),
+                               size_t(sink_rr),
+                               pin_criticality[target_pin]);
+
+        ++router_stats.connections_routed;
+    } // finished all sinks
+
+    ++router_stats.nets_routed;
+    profiling::net_finish();
+
+    /* For later timing analysis. */
+
+    float* net_delay = net_delays[net_id].data();
+
+    // may have to update timing delay of the previously legally reached sinks since downstream capacitance could be changed
+    update_net_delays_from_route_tree(net_delay,
+                                      net_list,
+                                      net_id,
+                                      timing_info,
+                                      pin_timing_invalidator);
+
+    if (router_opts.update_lower_bound_delays) {
+        for (int ipin : remaining_targets) {
+            connections_inf.update_lower_bound_connection_delay(net_id, ipin, net_delay[ipin]);
+        }
+    }
+
+    VTR_ASSERT_MSG(g_vpr_ctx.routing().rr_node_route_inf[tree.root().inode].occ() <= rr_graph.node_capacity(tree.root().inode), "SOURCE should never be congested");
+    VTR_LOGV_DEBUG(f_router_debug, "Routed Net %zu (%zu sinks)\n", size_t(net_id), num_sinks);
+
+    router.empty_rcv_route_tree_set(); // ?
+
+    profiling::net_fanout_end(net_list.net_sinks(net_id).size());
+
+    route_ctx.net_status.set_is_routed(net_id, true);
+    return flags;
+}
+
+/** Route to a "virtual sink" in the netlist which corresponds to the start point
+ * of the global clock network. */
+template<typename ConnectionRouter>
+inline NetResultFlags pre_route_to_clock_root(ConnectionRouter& router,
+                                              ParentNetId net_id,
+                                              const Netlist<>& net_list,
+                                              RRNodeId sink_node,
+                                              const t_conn_cost_params cost_params,
+                                              int high_fanout_threshold,
+                                              RouteTree& tree,
+                                              SpatialRouteTreeLookup& spatial_rt_lookup,
+                                              RouterStats& router_stats,
+                                              bool is_flat) {
+    const auto& device_ctx = g_vpr_ctx.device();
+    auto& route_ctx = g_vpr_ctx.mutable_routing();
+    auto& m_route_ctx = g_vpr_ctx.mutable_routing();
+
+    NetResultFlags out;
+
+    bool high_fanout = is_high_fanout(net_list.net_sinks(net_id).size(), high_fanout_threshold);
+
+    VTR_LOGV_DEBUG(f_router_debug, "Net %zu pre-route to (%s)\n", size_t(net_id), describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, sink_node, is_flat).c_str());
+    profiling::sink_criticality_start();
+
+    t_bb bounding_box = route_ctx.route_bb[net_id];
+
+    router.clear_modified_rr_node_info();
+
+    bool found_path, retry_with_full_bb;
+    t_heap cheapest;
+    ConnectionParameters conn_params(net_id,
+                                     -1,
+                                     false,
+                                     std::unordered_map<RRNodeId, int>());
+
+    std::tie(found_path, retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree(
+        tree.root(),
+        sink_node,
+        cost_params,
+        bounding_box,
+        router_stats,
+        conn_params);
+
+    // TODO: Parts of the rest of this function are repetitive to code in route_sink. Should refactor.
+    if (!found_path) {
+        ParentBlockId src_block = net_list.net_driver_block(net_id);
+        VTR_LOG("Failed to route connection from '%s' to '%s' for net '%s' (#%zu)\n",
+                net_list.block_name(src_block).c_str(),
+                describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, sink_node, is_flat).c_str(),
+                net_list.net_name(net_id).c_str(),
+                size_t(net_id));
+        if (f_router_debug) {
+            update_screen(ScreenUpdatePriority::MAJOR, "Unable to route connection.", ROUTING, nullptr);
+        }
+        router.reset_path_costs();
+        out.success = false;
+        out.retry_with_full_bb = retry_with_full_bb;
+        return out;
+    }
+
+    profiling::sink_criticality_end(cost_params.criticality);
+
+    /* This is a special pre-route to a sink that does not correspond to any    *
+     * netlist pin, but which can be reached from the global clock root drive   *
+     * points. Therefore, we can set the net pin index of the sink node to      *
+     * OPEN (meaning illegal) as it is not meaningful for this sink.            */
+    vtr::optional<const RouteTreeNode&> new_branch, new_sink;
+    std::tie(new_branch, new_sink) = tree.update_from_heap(&cheapest, OPEN, ((high_fanout) ? &spatial_rt_lookup : nullptr), is_flat);
+
+    VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(tree.root(), spatial_rt_lookup));
+
+    if (f_router_debug) {
+        std::string msg = vtr::string_fmt("Routed Net %zu connection to RR node %d successfully", size_t(net_id), sink_node);
+        update_screen(ScreenUpdatePriority::MAJOR, msg.c_str(), ROUTING, nullptr);
+    }
+
+    if (new_branch)
+        pathfinder_update_cost_from_route_tree(new_branch.value(), 1);
+
+    // need to guarantee ALL nodes' path costs are HUGE_POSITIVE_FLOAT at the start of routing to a sink
+    // do this by resetting all the path_costs that have been touched while routing to the current sink
+    router.reset_path_costs();
+
+    // Post route cleanup:
+    // - remove sink from route tree and fix routing for all nodes leading to the sink ("freeze")
+    // - free up virtual sink occupancy
+    tree.freeze();
+    m_route_ctx.rr_node_route_inf[sink_node].set_occ(0);
+
+    // routed to a sink successfully
+    out.success = true;
+    return out;
+}
+
+/** Attempt to route a single sink (target_pin) in a net.
+ * In the process, update global pathfinder costs, rr_node_route_inf and extend the global RouteTree
+ * for this net.
+ *
+ * @param router The ConnectionRouter instance 
+ * @param net_list Input netlist
+ * @param net_id
+ * @param itarget # of this connection in the net (only used for debug output)
+ * @param target_pin # of this sink in the net (TODO: is it the same thing as itarget?)
+ * @param cost_params
+ * @param router_opts
+ * @param[in, out] tree RouteTree describing the current routing state
+ * @param rt_node_of_sink Lookup from target_pin-like indices (indicating SINK nodes) to RouteTreeNodes
+ * @param spatial_rt_lookup
+ * @param router_stats
+ * @param budgeting_inf
+ * @param routing_predictor
+ * @param choking_spots
+ * @param is_flat
+ * @return NetResultFlags for this sink to be bubbled up through route_net */
+template<typename ConnectionRouter>
+inline NetResultFlags route_sink(ConnectionRouter& router,
+                                 const Netlist<>& net_list,
+                                 ParentNetId net_id,
+                                 unsigned itarget,
+                                 int target_pin,
+                                 const t_conn_cost_params cost_params,
+                                 const t_router_opts& router_opts,
+                                 RouteTree& tree,
+                                 SpatialRouteTreeLookup& spatial_rt_lookup,
+                                 RouterStats& router_stats,
+                                 route_budgets& budgeting_inf,
+                                 const RoutingPredictor& routing_predictor,
+                                 const std::vector<std::unordered_map<RRNodeId, int>>& choking_spots,
+                                 bool is_flat) {
+    const auto& device_ctx = g_vpr_ctx.device();
+    auto& route_ctx = g_vpr_ctx.mutable_routing();
+
+    NetResultFlags flags;
+
+    profiling::sink_criticality_start();
+
+    RRNodeId sink_node = route_ctx.net_rr_terminals[net_id][target_pin];
+    VTR_LOGV_DEBUG(f_router_debug, "Net %zu Target %d (%s)\n", size_t(net_id), itarget, describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, sink_node, is_flat).c_str());
+
+    router.clear_modified_rr_node_info();
+
+    bool found_path;
+    t_heap cheapest;
+    t_bb bounding_box = route_ctx.route_bb[net_id];
+
+    bool net_is_global = net_list.net_is_global(net_id);
+    bool high_fanout = is_high_fanout(net_list.net_sinks(net_id).size(), router_opts.high_fanout_threshold);
+    constexpr float HIGH_FANOUT_CRITICALITY_THRESHOLD = 0.9;
+    bool sink_critical = (cost_params.criticality > HIGH_FANOUT_CRITICALITY_THRESHOLD);
+    bool net_is_clock = route_ctx.is_clock_net[net_id] != 0;
+
+    bool has_choking_spot = ((int)choking_spots[target_pin].size() != 0) && router_opts.has_choking_spot;
+    ConnectionParameters conn_params(net_id, target_pin, has_choking_spot, choking_spots[target_pin]);
+
+    //We normally route high fanout nets by only adding spatially close-by routing to the heap (reduces run-time).
+    //However, if the current sink is 'critical' from a timing perspective, we put the entire route tree back onto
+    //the heap to ensure it has more flexibility to find the best path.
+    if (high_fanout && !sink_critical && !net_is_global && !net_is_clock && -routing_predictor.get_slope() > router_opts.high_fanout_max_slope) {
+        std::tie(found_path, flags.retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree_high_fanout(tree.root(),
+                                                                                                                                     sink_node,
+                                                                                                                                     cost_params,
+                                                                                                                                     bounding_box,
+                                                                                                                                     spatial_rt_lookup,
+                                                                                                                                     router_stats,
+                                                                                                                                     conn_params);
+    } else {
+        std::tie(found_path, flags.retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree(tree.root(),
+                                                                                                                         sink_node,
+                                                                                                                         cost_params,
+                                                                                                                         bounding_box,
+                                                                                                                         router_stats,
+                                                                                                                         conn_params);
+    }
+
+    if (!found_path) {
+        ParentBlockId src_block = net_list.net_driver_block(net_id);
+        ParentBlockId sink_block = net_list.pin_block(*(net_list.net_pins(net_id).begin() + target_pin));
+        VTR_LOG("Failed to route connection from '%s' to '%s' for net '%s' (#%zu)\n",
+                net_list.block_name(src_block).c_str(),
+                net_list.block_name(sink_block).c_str(),
+                net_list.net_name(net_id).c_str(),
+                size_t(net_id));
+        if (f_router_debug) {
+            update_screen(ScreenUpdatePriority::MAJOR, "Unable to route connection.", ROUTING, nullptr);
+        }
+        flags.success = false;
+        router.reset_path_costs();
+        return flags;
+    }
+
+    profiling::sink_criticality_end(cost_params.criticality);
+
+    RRNodeId inode(cheapest.index);
+    route_ctx.rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */
+
+    vtr::optional<const RouteTreeNode&> new_branch, new_sink;
+    std::tie(new_branch, new_sink) = tree.update_from_heap(&cheapest, target_pin, ((high_fanout) ? &spatial_rt_lookup : nullptr), is_flat);
+
+    VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(tree.root(), spatial_rt_lookup));
+
+    if (f_router_debug) {
+        std::string msg = vtr::string_fmt("Routed Net %zu connection %d to RR node %d successfully", size_t(net_id), itarget, sink_node);
+        update_screen(ScreenUpdatePriority::MAJOR, msg.c_str(), ROUTING, nullptr);
+    }
+
+    if (budgeting_inf.if_set() && cheapest.path_data != nullptr && cost_params.delay_budget) {
+        if (cheapest.path_data->backward_delay < cost_params.delay_budget->min_delay) {
+            budgeting_inf.set_should_reroute(net_id, true);
+        }
+    }
+
+    /* update global occupancy from the new branch */
+    if (new_branch)
+        pathfinder_update_cost_from_route_tree(new_branch.value(), 1);
+
+    // need to guarantee ALL nodes' path costs are HUGE_POSITIVE_FLOAT at the start of routing to a sink
+    // do this by resetting all the path_costs that have been touched while routing to the current sink
+    router.reset_path_costs();
+
+    // routed to a sink successfully
+    flags.success = true;
+    return flags;
+}
diff --git a/vpr/src/route/route_parallel.cpp b/vpr/src/route/route_parallel.cpp
deleted file mode 100644
index b3a6dda3b72..00000000000
--- a/vpr/src/route/route_parallel.cpp
+++ /dev/null
@@ -1,1059 +0,0 @@
-/** @file Functions specific to parallel routing.
- * Reuse code from route_timing.cpp where possible. */
-
-#include <memory>
-#include <thread>
-#include <vector>
-#include <unordered_map>
-#include <algorithm>
-#include <iostream>
-#include <tuple>
-
-#include "binary_heap.h"
-#include "bucket.h"
-#include "concrete_timing_info.h"
-#include "connection_router.h"
-#include "draw.h"
-#include "globals.h"
-#include "netlist_fwd.h"
-#include "partition_tree.h"
-#include "read_route.h"
-#include "route_export.h"
-#include "route_common.h"
-#include "route_timing.h"
-#include "route_parallel.h"
-// all functions in profiling:: namespace, which are only activated if PROFILE is defined
-#include "route_profiling.h"
-#include "timing_util.h"
-#include "vtr_time.h"
-
-#include "NetPinTimingInvalidator.h"
-
-#ifdef VPR_USE_TBB
-
-#    include "tbb/enumerable_thread_specific.h"
-#    include "tbb/task_group.h"
-
-/** route_net and similar functions need many bits of state collected from various
- * parts of VPR, collect them here for ease of use */
-template<typename ConnectionRouter>
-class RouteIterCtx {
-  public:
-    tbb::enumerable_thread_specific<ConnectionRouter> routers;
-    const Netlist<>& net_list;
-    int itry;
-    float pres_fac;
-    const t_router_opts& router_opts;
-    CBRR& connections_inf;
-    tbb::enumerable_thread_specific<RouterStats> router_stats;
-    tbb::enumerable_thread_specific<timing_driven_route_structs> route_structs;
-    NetPinsMatrix<float>& net_delay;
-    const ClusteredPinAtomPinsLookup& netlist_pin_lookup;
-    std::shared_ptr<SetupHoldTimingInfo> timing_info;
-    NetPinTimingInvalidator* pin_timing_invalidator;
-    route_budgets& budgeting_inf;
-    float worst_negative_slack;
-    const RoutingPredictor& routing_predictor;
-    const vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>>& choking_spots;
-    bool is_flat;
-};
-
-/** Helper for reduce_partition_tree. Traverse \p node's subtree and collect results into \p results */
-static void reduce_partition_tree_helper(const PartitionTreeNode& node, RouteIterResults& results);
-
-/**
- * Try to route in parallel with the given ConnectionRouter.
- * ConnectionRouter is typically templated with a heap type, so this lets us
- * route with different heap implementations.
- *
- * This fn is very similar to try_timing_driven_route_tmpl, but it has enough small changes to
- * warrant a copy. (TODO: refactor this to reuse more of the serial code)
- * 
- * @param netlist Input netlist
- * @param det_routing_arch Routing architecture. See definition of t_det_routing_arch for more details.
- * @param router_opts Command line options for the router.
- * @param analysis_opts Command line options for timing analysis (used in generate_route_timing_reports())
- * @param segment_inf
- * @param[in, out] net_delay
- * @param netlist_pin_lookup
- * @param[in, out] timing_info Interface to the timing analyzer
- * @param delay_calc
- * @param first_iteration_priority
- * @param is_flat
- * @return Success status
- *
- * The reason that try_parallel_route_tmpl (and descendents) are being
- * templated over is because using a virtual interface instead fully templating
- * the router results in a 5% runtime increase.
- *
- * The reason to template over the router in general is to enable runtime
- * selection of core router algorithm's, specifically the router heap. */
-template<typename ConnectionRouter>
-static bool try_parallel_route_tmpl(const Netlist<>& netlist,
-                                    const t_det_routing_arch& det_routing_arch,
-                                    const t_router_opts& router_opts,
-                                    const t_analysis_opts& analysis_opts,
-                                    const std::vector<t_segment_inf>& segment_inf,
-                                    NetPinsMatrix<float>& net_delay,
-                                    const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                    std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                                    std::shared_ptr<RoutingDelayCalculator> delay_calc,
-                                    ScreenUpdatePriority first_iteration_priority,
-                                    bool is_flat);
-
-template<typename ConnectionRouter>
-static RouteIterResults route_with_partition_tree(tbb::task_group& g, RouteIterCtx<ConnectionRouter>& ctx);
-
-template<typename ConnectionRouter>
-static RouteIterResults route_without_partition_tree(std::vector<ParentNetId>& nets_to_route, RouteIterCtx<ConnectionRouter>& ctx);
-
-/************************ Subroutine definitions *****************************/
-
-bool try_parallel_route(const Netlist<>& net_list,
-                        const t_det_routing_arch& det_routing_arch,
-                        const t_router_opts& router_opts,
-                        const t_analysis_opts& analysis_opts,
-                        const std::vector<t_segment_inf>& segment_inf,
-                        NetPinsMatrix<float>& net_delay,
-                        const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                        std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                        std::shared_ptr<RoutingDelayCalculator> delay_calc,
-                        ScreenUpdatePriority first_iteration_priority,
-                        bool is_flat) {
-    switch (router_opts.router_heap) {
-        case e_heap_type::BINARY_HEAP:
-            return try_parallel_route_tmpl<ConnectionRouter<BinaryHeap>>(net_list,
-                                                                         det_routing_arch,
-                                                                         router_opts,
-                                                                         analysis_opts,
-                                                                         segment_inf,
-                                                                         net_delay,
-                                                                         netlist_pin_lookup,
-                                                                         timing_info,
-                                                                         delay_calc,
-                                                                         first_iteration_priority,
-                                                                         is_flat);
-            break;
-        case e_heap_type::BUCKET_HEAP_APPROXIMATION:
-            return try_parallel_route_tmpl<ConnectionRouter<Bucket>>(net_list,
-                                                                     det_routing_arch,
-                                                                     router_opts,
-                                                                     analysis_opts,
-                                                                     segment_inf,
-                                                                     net_delay,
-                                                                     netlist_pin_lookup,
-                                                                     timing_info,
-                                                                     delay_calc,
-                                                                     first_iteration_priority,
-                                                                     is_flat);
-        default:
-            VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown heap type %d", router_opts.router_heap);
-    }
-}
-
-template<typename ConnectionRouter>
-bool try_parallel_route_tmpl(const Netlist<>& net_list,
-                             const t_det_routing_arch& det_routing_arch,
-                             const t_router_opts& router_opts,
-                             const t_analysis_opts& analysis_opts,
-                             const std::vector<t_segment_inf>& segment_inf,
-                             NetPinsMatrix<float>& net_delay,
-                             const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                             std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                             std::shared_ptr<RoutingDelayCalculator> delay_calc,
-                             ScreenUpdatePriority first_iteration_priority,
-                             bool is_flat) {
-    // Make sure template type ConnectionRouter is a ConnectionRouterInterface.
-    /// TODO: Template on "NetRouter" instead of ConnectionRouter to avoid copying top level routing logic?
-    static_assert(std::is_base_of<ConnectionRouterInterface, ConnectionRouter>::value, "ConnectionRouter must implement the ConnectionRouterInterface");
-
-    const auto& device_ctx = g_vpr_ctx.device();
-    const auto& atom_ctx = g_vpr_ctx.atom();
-    auto& route_ctx = g_vpr_ctx.mutable_routing();
-
-    auto choking_spots = set_nets_choking_spots(net_list,
-                                                route_ctx.net_terminal_groups,
-                                                route_ctx.net_terminal_group_num,
-                                                router_opts.has_choking_spot,
-                                                is_flat);
-
-    //Initially, the router runs normally trying to reduce congestion while
-    //balancing other metrics (timing, wirelength, run-time etc.)
-    RouterCongestionMode router_congestion_mode = RouterCongestionMode::NORMAL;
-
-    //Initialize and properly size the lookups for profiling
-    profiling::profiling_initialization(get_max_pins_per_net(net_list));
-
-    /*
-     * Configure the routing predictor
-     */
-    RoutingPredictor routing_predictor;
-    float abort_iteration_threshold = std::numeric_limits<float>::infinity(); //Default no early abort
-    if (router_opts.routing_failure_predictor == SAFE) {
-        abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_SAFE * router_opts.max_router_iterations;
-    } else if (router_opts.routing_failure_predictor == AGGRESSIVE) {
-        abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_AGGRESSIVE * router_opts.max_router_iterations;
-    } else {
-        VTR_ASSERT_MSG(router_opts.routing_failure_predictor == OFF, "Unrecognized routing failure predictor setting");
-    }
-
-    float high_effort_congestion_mode_iteration_threshold = router_opts.congested_routing_iteration_threshold_frac * router_opts.max_router_iterations;
-
-    /* Set delay of ignored signals to zero. Non-ignored net delays are set by
-     * update_net_delays_from_route_tree() inside parallel_route_net(),
-     * which is only called for non-ignored nets. */
-    for (auto net_id : net_list.nets()) {
-        if (net_list.net_is_ignored(net_id)) {
-            for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) {
-                net_delay[net_id][ipin] = 0.;
-            }
-        }
-    }
-
-    CBRR connections_inf{net_list, route_ctx.net_rr_terminals, is_flat};
-
-    route_budgets budgeting_inf(net_list, is_flat);
-
-    // This needs to be called before filling intra-cluster lookahead maps to ensure that the intra-cluster lookahead maps are initialized.
-    const RouterLookahead* router_lookahead = get_cached_router_lookahead(det_routing_arch,
-                                                                          router_opts.lookahead_type,
-                                                                          router_opts.write_router_lookahead,
-                                                                          router_opts.read_router_lookahead,
-                                                                          segment_inf,
-                                                                          is_flat);
-
-    if (is_flat) {
-        // If is_flat is true, the router lookahead maps related to intra-cluster resources should be initialized since
-        // they haven't been initialized when the map related to global resources was initialized.
-        auto cache_key = route_ctx.router_lookahead_cache_key_;
-        std::unique_ptr<RouterLookahead> mut_router_lookahead(route_ctx.cached_router_lookahead_.release());
-        VTR_ASSERT(mut_router_lookahead);
-        route_ctx.cached_router_lookahead_.clear();
-        if (!router_opts.read_intra_cluster_router_lookahead.empty()) {
-            mut_router_lookahead->read_intra_cluster(router_opts.read_intra_cluster_router_lookahead);
-        } else {
-            mut_router_lookahead->compute_intra_tile();
-        }
-        route_ctx.cached_router_lookahead_.set(cache_key, std::move(mut_router_lookahead));
-        router_lookahead = get_cached_router_lookahead(det_routing_arch,
-                                                       router_opts.lookahead_type,
-                                                       router_opts.write_router_lookahead,
-                                                       router_opts.read_router_lookahead,
-                                                       segment_inf,
-                                                       is_flat);
-        if (!router_opts.write_intra_cluster_router_lookahead.empty()) {
-            router_lookahead->write_intra_cluster(router_opts.write_intra_cluster_router_lookahead);
-        }
-    }
-
-    VTR_ASSERT(router_lookahead != nullptr);
-
-    /*
-     * Routing parameters
-     */
-    float pres_fac = update_pres_fac(router_opts.first_iter_pres_fac); /* Typically 0 -> ignore cong. */
-    int bb_fac = router_opts.bb_factor;
-
-    //When routing conflicts are detected the bounding boxes are scaled
-    //by BB_SCALE_FACTOR every BB_SCALE_ITER_COUNT iterations
-    constexpr float BB_SCALE_FACTOR = 2;
-    constexpr int BB_SCALE_ITER_COUNT = 5;
-
-    size_t available_wirelength = calculate_wirelength_available();
-
-    /*
-     * Routing status and metrics
-     */
-    bool routing_is_successful = false;
-    WirelengthInfo wirelength_info;
-    OveruseInfo overuse_info(device_ctx.rr_graph.num_nodes());
-    tatum::TimingPathInfo critical_path;
-    int itry; //Routing iteration number
-    int itry_conflicted_mode = 0;
-
-    /*
-     * Best result so far
-     */
-    vtr::vector<ParentNetId, vtr::optional<RouteTree>> best_routing;
-    t_clb_opins_used best_clb_opins_used_locally;
-    RoutingMetrics best_routing_metrics;
-    int legal_convergence_count = 0;
-    std::vector<int> scratch;
-
-    /*
-     * On the first routing iteration ignore congestion to get reasonable net
-     * delay estimates. Set criticalities to 1 when timing analysis is on to
-     * optimize timing, and to 0 when timing analysis is off to optimize routability.
-     *
-     * Subsequent iterations use the net delays from the previous iteration.
-     */
-    std::shared_ptr<SetupHoldTimingInfo> route_timing_info;
-    {
-        vtr::ScopedStartFinishTimer init_timing_timer("Initializing router criticalities");
-        if (timing_info) {
-            if (router_opts.initial_timing == e_router_initial_timing::ALL_CRITICAL) {
-                //First routing iteration, make all nets critical for a min-delay routing
-                route_timing_info = make_constant_timing_info(1.);
-            } else {
-                VTR_ASSERT(router_opts.initial_timing == e_router_initial_timing::LOOKAHEAD);
-
-                {
-                    //Estimate initial connection delays from the router lookahead
-                    init_net_delay_from_lookahead(*router_lookahead,
-                                                  net_list,
-                                                  route_ctx.net_rr_terminals,
-                                                  net_delay,
-                                                  device_ctx.rr_graph,
-                                                  is_flat);
-
-                    //Run STA to get estimated criticalities
-                    timing_info->update();
-                }
-                route_timing_info = timing_info;
-            }
-        } else {
-            //Not timing driven, force criticality to zero for a routability-driven routing
-            route_timing_info = make_constant_timing_info(0.);
-        }
-        VTR_LOG("Initial Net Connection Criticality Histogram:\n");
-        print_router_criticality_histogram(net_list, *route_timing_info, netlist_pin_lookup, is_flat);
-    }
-
-    std::unique_ptr<NetPinTimingInvalidator> pin_timing_invalidator;
-    if (timing_info) {
-        pin_timing_invalidator = make_net_pin_timing_invalidator(
-            router_opts.timing_update_type,
-            net_list,
-            netlist_pin_lookup,
-            atom_ctx.nlist,
-            atom_ctx.lookup,
-            *timing_info->timing_graph(),
-            is_flat);
-    }
-
-    tbb::task_group tbb_task_group;
-
-    /* Set up thread local storage.
-     * tbb::enumerable_thread_specific will construct the elements as needed.
-     * see https://spec.oneapi.io/versions/1.0-rev-3/elements/oneTBB/source/thread_local_storage/enumerable_thread_specific_cls/construct_destroy_copy.html */
-    auto routers = tbb::enumerable_thread_specific<ConnectionRouter>(ConnectionRouter(
-        device_ctx.grid,
-        *router_lookahead,
-        device_ctx.rr_graph.rr_nodes(),
-        &device_ctx.rr_graph,
-        device_ctx.rr_rc_data,
-        device_ctx.rr_graph.rr_switch(),
-        route_ctx.rr_node_route_inf,
-        is_flat)); /* Here we provide an "exemplar" to copy for each thread */
-    auto router_stats_thread = tbb::enumerable_thread_specific<RouterStats>();
-    auto route_structs = tbb::enumerable_thread_specific<timing_driven_route_structs>(net_list);
-
-    RouterStats router_stats;
-    float prev_iter_cumm_time = 0;
-    vtr::Timer iteration_timer;
-    int num_net_bounding_boxes_updated = 0;
-    int itry_since_last_convergence = -1;
-
-    // This heap is used for reserve_locally_used_opins.
-    BinaryHeap small_heap;
-    small_heap.init_heap(device_ctx.grid);
-
-    // When RCV is enabled the router will not stop unless negative hold slack is 0
-    // In some cases this isn't doable, due to global nets or intracluster routing issues
-    // In these cases RCV will finish early if it goes RCV_FINISH_EARLY_COUNTDOWN iterations without detecting resolvable negative hold slack
-    // Increasing this will make the router fail occasionally, decreasing will sometimes not let all hold violations be resolved
-    constexpr int RCV_FINISH_EARLY_COUNTDOWN = 15;
-
-    int rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN;
-
-    print_route_status_header();
-    for (itry = 1; itry <= router_opts.max_router_iterations; ++itry) {
-        for (auto& stats : router_stats_thread) {
-            init_router_stats(stats);
-        }
-
-        /* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */
-        for (auto net_id : net_list.nets()) {
-            route_ctx.net_status.set_is_routed(net_id, false);
-            route_ctx.net_status.set_is_fixed(net_id, false);
-        }
-
-        if (itry_since_last_convergence >= 0) {
-            ++itry_since_last_convergence;
-        }
-
-        // Calculate this once and pass it into net routing to check if should reroute for hold
-        float worst_negative_slack = 0;
-        if (budgeting_inf.if_set()) {
-            worst_negative_slack = timing_info->hold_total_negative_slack();
-        }
-
-        /**
-         * Route nets in parallel using the partition tree. Need to pass on
-         * some context to each task.
-         * TODO: Move pin_criticality into timing_driven_route_net().
-         * TODO: Move rt_node_of_sink lookup into RouteTree. 
-         */
-        RouteIterCtx<ConnectionRouter> iter_ctx = {
-            routers,
-            net_list,
-            itry,
-            pres_fac,
-            router_opts,
-            connections_inf,
-            router_stats_thread,
-            route_structs,
-            net_delay,
-            netlist_pin_lookup,
-            route_timing_info,
-            pin_timing_invalidator.get(),
-            budgeting_inf,
-            worst_negative_slack,
-            routing_predictor,
-            choking_spots,
-            is_flat};
-
-        vtr::Timer net_routing_timer;
-        RouteIterResults iter_results = route_with_partition_tree(tbb_task_group, iter_ctx);
-        PartitionTreeDebug::log("Routing all nets took " + std::to_string(net_routing_timer.elapsed_sec()) + " s");
-
-        if (!iter_results.is_routable) {
-            return false; // Impossible to route
-        }
-
-        /* Note that breakpoints won't work properly with parallel routing.
-         * (how to do that? stop all threads when a thread hits a breakpoint? too complicated)
-         * However we still make an attempt to update graphics */
-#    ifndef NO_GRAPHICS
-        for (auto net_id : net_list.nets()) {
-            update_router_info_and_check_bp(BP_NET_ID, size_t(net_id));
-        }
-#    endif
-
-        // Make sure any CLB OPINs used up by subblocks being hooked directly to them are reserved for that purpose
-        bool rip_up_local_opins = (itry == 1 ? false : true);
-        if (!is_flat) {
-            reserve_locally_used_opins(&small_heap, pres_fac,
-                                       router_opts.acc_fac, rip_up_local_opins, is_flat);
-        }
-
-        /*
-         * Calculate metrics for the current routing
-         */
-        bool routing_is_feasible = feasible_routing();
-        float est_success_iteration = routing_predictor.estimate_success_iteration();
-
-        //Update resource costs and overuse info
-        if (itry == 1) {
-            pathfinder_update_acc_cost_and_overuse_info(0., overuse_info); /* Acc_fac=0 for first iter. */
-        } else {
-            pathfinder_update_acc_cost_and_overuse_info(router_opts.acc_fac, overuse_info);
-        }
-
-        wirelength_info = calculate_wirelength_info(net_list, available_wirelength);
-        routing_predictor.add_iteration_overuse(itry, overuse_info.overused_nodes);
-
-        if (timing_info) {
-            //Update timing based on the new routing
-            //Note that the net delays have already been updated by parallel_route_net
-            timing_info->update();
-            timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during routing
-            pin_timing_invalidator->reset();
-
-            //Use the real timing analysis criticalities for subsequent routing iterations
-            //  'route_timing_info' is what is actually passed into the net/connection routers,
-            //  and for the 1st iteration may not be the actual STA results (e.g. all criticalities set to 1)
-            route_timing_info = timing_info;
-
-            critical_path = timing_info->least_slack_critical_path();
-
-            VTR_ASSERT_SAFE(timing_driven_check_net_delays(net_list, net_delay));
-
-            if (itry == 1) {
-                generate_route_timing_reports(router_opts, analysis_opts, *timing_info, *delay_calc, is_flat);
-            }
-        }
-
-        float iter_cumm_time = iteration_timer.elapsed_sec();
-        float iter_elapsed_time = iter_cumm_time - prev_iter_cumm_time;
-
-        //Output progress
-        print_route_status(itry, iter_elapsed_time, pres_fac, num_net_bounding_boxes_updated, iter_results.stats, overuse_info, wirelength_info, timing_info, est_success_iteration);
-        PartitionTreeDebug::log("Iteration " + std::to_string(itry) + " took " + std::to_string(iter_elapsed_time) + " s");
-
-        prev_iter_cumm_time = iter_cumm_time;
-
-        //Update graphics
-        if (itry == 1) {
-            update_screen(first_iteration_priority, "Routing...", ROUTING, timing_info);
-        } else {
-            update_screen(ScreenUpdatePriority::MINOR, "Routing...", ROUTING, timing_info);
-        }
-
-        if (router_opts.save_routing_per_iteration) {
-            std::string filename = vtr::string_fmt("iteration_%03d.route", itry);
-            print_route(net_list, nullptr, filename.c_str(), is_flat);
-        }
-
-        // Update router stats
-        update_router_stats(router_stats, iter_results.stats);
-
-        /*
-         * Are we finished?
-         */
-        if (is_iteration_complete(routing_is_feasible, router_opts, itry, timing_info, rcv_finished_count == 0)) {
-            auto& router_ctx = g_vpr_ctx.routing();
-
-            if (is_better_quality_routing(best_routing, best_routing_metrics, wirelength_info, timing_info)) {
-                //Save routing
-                best_routing = router_ctx.route_trees;
-                best_clb_opins_used_locally = router_ctx.clb_opins_used_locally;
-
-                routing_is_successful = true;
-
-                //Update best metrics
-                if (timing_info) {
-                    timing_driven_check_net_delays(net_list, net_delay);
-
-                    best_routing_metrics.sTNS = timing_info->setup_total_negative_slack();
-                    best_routing_metrics.sWNS = timing_info->setup_worst_negative_slack();
-                    best_routing_metrics.hTNS = timing_info->hold_total_negative_slack();
-                    best_routing_metrics.hWNS = timing_info->hold_worst_negative_slack();
-                    best_routing_metrics.critical_path = critical_path;
-                }
-                best_routing_metrics.used_wirelength = wirelength_info.used_wirelength();
-            }
-
-            //Decrease pres_fac so that critical connections will take more direct routes
-            //Note that we use first_iter_pres_fac here (typically zero), and switch to
-            //use initial_pres_fac on the next iteration.
-            pres_fac = update_pres_fac(router_opts.first_iter_pres_fac);
-
-            //Reduce timing tolerances to re-route more delay-suboptimal signals
-            connections_inf.set_connection_criticality_tolerance(0.7);
-            connections_inf.set_connection_delay_tolerance(1.01);
-
-            ++legal_convergence_count;
-            itry_since_last_convergence = 0;
-
-            VTR_ASSERT(routing_is_successful);
-        }
-
-        if (itry_since_last_convergence == 1) {
-            //We used first_iter_pres_fac when we started routing again
-            //after the first routing convergence. Since that is often zero,
-            //we want to set pres_fac to a reasonable (i.e. typically non-zero)
-            //value afterwards -- so it grows when multiplied by pres_fac_mult
-            pres_fac = update_pres_fac(router_opts.initial_pres_fac);
-        }
-
-        //Have we converged the maximum number of times, did not make any changes, or does it seem
-        //unlikely additional convergences will improve QoR?
-        if (legal_convergence_count >= router_opts.max_convergence_count
-            || iter_results.stats.connections_routed == 0
-            || early_reconvergence_exit_heuristic(router_opts, itry_since_last_convergence, timing_info, best_routing_metrics)) {
-#    ifndef NO_GRAPHICS
-            update_router_info_and_check_bp(BP_ROUTE_ITER, -1);
-#    endif
-            break; //Done routing
-        }
-
-        /*
-         * Abort checks: Should we give-up because this routing problem is unlikely to converge to a legal routing?
-         */
-        if (itry == 1 && early_exit_heuristic(router_opts, wirelength_info)) {
-#    ifndef NO_GRAPHICS
-            update_router_info_and_check_bp(BP_ROUTE_ITER, -1);
-#    endif
-            //Abort
-            break;
-        }
-
-        //Estimate at what iteration we will converge to a legal routing
-        if (overuse_info.overused_nodes > ROUTING_PREDICTOR_MIN_ABSOLUTE_OVERUSE_THRESHOLD) {
-            //Only consider aborting if we have a significant number of overused resources
-
-            if (!std::isnan(est_success_iteration) && est_success_iteration > abort_iteration_threshold && router_opts.routing_budgets_algorithm != YOYO) {
-                VTR_LOG("Routing aborted, the predicted iteration for a successful route (%.1f) is too high.\n", est_success_iteration);
-#    ifndef NO_GRAPHICS
-                update_router_info_and_check_bp(BP_ROUTE_ITER, -1);
-#    endif
-                break; //Abort
-            }
-        }
-
-        if (itry == 1 && router_opts.exit_after_first_routing_iteration) {
-            VTR_LOG("Exiting after first routing iteration as requested\n");
-#    ifndef NO_GRAPHICS
-            update_router_info_and_check_bp(BP_ROUTE_ITER, -1);
-#    endif
-            break;
-        }
-
-        /*
-         * Prepare for the next iteration
-         */
-
-        if (router_opts.route_bb_update == e_route_bb_update::DYNAMIC) {
-            num_net_bounding_boxes_updated = dynamic_update_bounding_boxes(iter_results.rerouted_nets, net_list, router_opts.high_fanout_threshold);
-        }
-
-        if (itry >= high_effort_congestion_mode_iteration_threshold) {
-            //We are approaching the maximum number of routing iterations,
-            //and still do not have a legal routing. Switch to a mode which
-            //focuses more on attempting to resolve routing conflicts.
-            router_congestion_mode = RouterCongestionMode::CONFLICTED;
-        }
-
-        //Update pres_fac
-        if (itry == 1) {
-            pres_fac = update_pres_fac(router_opts.initial_pres_fac);
-        } else {
-            pres_fac *= router_opts.pres_fac_mult;
-
-            /* Avoid overflow for high iteration counts, even if acc_cost is big */
-            pres_fac = update_pres_fac(std::min(pres_fac, static_cast<float>(HUGE_POSITIVE_FLOAT / 1e5)));
-
-            // Increase short path criticality if it's having a hard time resolving hold violations due to congestion
-            if (budgeting_inf.if_set()) {
-                bool rcv_finished = false;
-
-                /* This constant represents how much extra delay the budget increaser adds to the minimum and maximum delay budgets
-                 * Experimentally this value delivers fast hold slack resolution, while not overwhelming the router
-                 * Increasing this will make it resolve hold faster, but could result in lower circuit quality */
-                constexpr float budget_increase_factor = 300e-12;
-
-                if (itry > 5 && worst_negative_slack != 0) rcv_finished = budgeting_inf.increase_min_budgets_if_struggling(budget_increase_factor, timing_info, worst_negative_slack, netlist_pin_lookup);
-                if (rcv_finished)
-                    rcv_finished_count--;
-                else
-                    rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN;
-            }
-        }
-
-        if (router_congestion_mode == RouterCongestionMode::CONFLICTED) {
-            //The design appears to have routing conflicts which are difficult to resolve:
-            //  1) Don't re-route legal connections due to delay. This allows
-            //     the router to focus on the actual conflicts
-            //  2) Increase the net bounding boxes. This potentially allows
-            //     the router to route around otherwise congested regions
-            //     (at the cost of high run-time).
-
-            //Increase the size of the net bounding boxes to give the router more
-            //freedom to find alternate paths.
-            //
-            //In the case of routing conflicts there are multiple connections competing
-            //for the same resources which can not resolve the congestion themselves.
-            //In normal routing mode we try to keep the bounding boxes small to minimize
-            //run-time, but this can limits how far signals can detour (i.e. they can't
-            //route outside the bounding box), which can cause conflicts to oscillate back
-            //and forth without resolving.
-            //
-            //By scaling the bounding boxes here, we slowly increase the router's search
-            //space in hopes of it allowing signals to move further out of the way to
-            //alleviate the conflicts.
-            if (itry_conflicted_mode % BB_SCALE_ITER_COUNT == 0) {
-                //We scale the bounding boxes by BB_SCALE_FACTOR,
-                //every BB_SCALE_ITER_COUNT iterations. This ensures
-                //that we give the router some time (BB_SCALE_ITER_COUNT) to try
-                //resolve/negotiate congestion at the new BB factor.
-                //
-                //Note that we increase the BB factor slowly to try and minimize
-                //the bounding box size (since larger bounding boxes slow the router down).
-                auto& grid = g_vpr_ctx.device().grid;
-                int max_grid_dim = std::max(grid.width(), grid.height());
-
-                //Scale by BB_SCALE_FACTOR but clip to grid size to avoid overflow
-                bb_fac = std::min<int>(max_grid_dim, bb_fac * BB_SCALE_FACTOR);
-
-                route_ctx.route_bb = load_route_bb(net_list, bb_fac);
-            }
-
-            ++itry_conflicted_mode;
-        }
-
-        if (timing_info) {
-            if (should_setup_lower_bound_connection_delays(itry, router_opts)) {
-                // first iteration sets up the lower bound connection delays since only timing is optimized for
-                connections_inf.set_stable_critical_path_delay(critical_path.delay());
-                connections_inf.set_lower_bound_connection_delays(net_delay);
-
-                //load budgets using information from uncongested delay information
-                budgeting_inf.load_route_budgets(net_delay, timing_info, netlist_pin_lookup, router_opts);
-                /*for debugging purposes*/
-                // if (budgeting_inf.if_set()) {
-                //     budgeting_inf.print_route_budget(std::string("route_budgets_") + std::to_string(itry) + ".txt", net_delay);
-                // }
-
-                if (router_opts.routing_budgets_algorithm == YOYO) {
-                    for (auto& router : routers) {
-                        router.set_rcv_enabled(true);
-                    }
-                }
-
-            } else {
-                bool stable_routing_configuration = true;
-
-                /*
-                 * Determine if any connection need to be forcibly re-routed due to timing
-                 */
-
-                //Yes, if explicitly enabled
-                bool should_ripup_for_delay = (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::ON);
-
-                //Or, if things are not too congested
-                should_ripup_for_delay |= (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::AUTO
-                                           && router_congestion_mode == RouterCongestionMode::NORMAL);
-
-                if (should_ripup_for_delay) {
-                    if (connections_inf.critical_path_delay_grew_significantly(critical_path.delay())) {
-                        // only need to forcibly reroute if critical path grew significantly
-                        stable_routing_configuration = connections_inf.forcibly_reroute_connections(router_opts.max_criticality,
-                                                                                                    timing_info,
-                                                                                                    netlist_pin_lookup,
-                                                                                                    net_delay);
-                    }
-                }
-
-                // not stable if any connection needs to be forcibly rerouted
-                if (stable_routing_configuration) {
-                    connections_inf.set_stable_critical_path_delay(critical_path.delay());
-                }
-            }
-        } else {
-            /* If timing analysis is not enabled, make sure that the criticalities and the
-             * net_delays stay as 0 so that wirelength can be optimized. */
-
-            for (auto net_id : net_list.nets()) {
-                for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) {
-                    net_delay[net_id][ipin] = 0.;
-                }
-            }
-        }
-
-        if (router_opts.congestion_analysis) profiling::congestion_analysis();
-        if (router_opts.fanout_analysis) profiling::time_on_fanout_analysis();
-        // profiling::time_on_criticality_analysis();
-    }
-
-    if (routing_is_successful) {
-        VTR_LOG("Restoring best routing\n");
-
-        auto& router_ctx = g_vpr_ctx.mutable_routing();
-
-        /* Restore congestion from best route */
-        for (auto net_id : net_list.nets()) {
-            if (route_ctx.route_trees[net_id])
-                pathfinder_update_cost_from_route_tree(route_ctx.route_trees[net_id]->root(), -1);
-            if (best_routing[net_id])
-                pathfinder_update_cost_from_route_tree(best_routing[net_id]->root(), 1);
-        }
-        router_ctx.route_trees = best_routing;
-        router_ctx.clb_opins_used_locally = best_clb_opins_used_locally;
-
-        prune_unused_non_configurable_nets(connections_inf, net_list);
-
-        if (timing_info) {
-            VTR_LOG("Critical path: %g ns\n", 1e9 * best_routing_metrics.critical_path.delay());
-        }
-
-        VTR_LOG("Successfully routed after %d routing iterations.\n", itry);
-    } else {
-        VTR_LOG("Routing failed.\n");
-
-        //If the routing fails, print the overused info
-        print_overused_nodes_status(router_opts, overuse_info);
-
-#    ifdef VTR_ENABLE_DEBUG_LOGGING
-        if (f_router_debug) print_invalid_routing_info(net_list, is_flat);
-#    endif
-    }
-
-    VTR_LOG("Final Net Connection Criticality Histogram:\n");
-    print_router_criticality_histogram(net_list, *route_timing_info, netlist_pin_lookup, is_flat);
-
-    VTR_ASSERT(router_stats.heap_pushes >= router_stats.intra_cluster_node_pushes);
-    VTR_ASSERT(router_stats.heap_pops >= router_stats.intra_cluster_node_pops);
-    VTR_LOG(
-        "Router Stats: total_nets_routed: %zu total_connections_routed: %zu total_heap_pushes: %zu total_heap_pops: %zu "
-        "total_internal_heap_pushes: %zu total_internal_heap_pops: %zu total_external_heap_pushes: %zu total_external_heap_pops: %zu ",
-        router_stats.nets_routed, router_stats.connections_routed, router_stats.heap_pushes, router_stats.heap_pops,
-        router_stats.intra_cluster_node_pushes, router_stats.intra_cluster_node_pops,
-        router_stats.inter_cluster_node_pushes, router_stats.inter_cluster_node_pops);
-    for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) {
-        VTR_LOG("total_external_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]);
-        VTR_LOG("total_external_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pops[node_type_idx]);
-        VTR_LOG("total_internal_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]);
-        VTR_LOG("total_internal_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pops[node_type_idx]);
-        VTR_LOG("rt_node_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_pushes[node_type_idx]);
-        VTR_LOG("rt_node_%s_high_fanout_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_high_fanout_pushes[node_type_idx]);
-        VTR_LOG("rt_node_%s_entire_tree_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_entire_tree_pushes[node_type_idx]);
-    }
-
-    VTR_LOG("total_number_of_adding_all_rt: %zu ", router_stats.add_all_rt);
-    VTR_LOG("total_number_of_adding_high_fanout_rt: %zu ", router_stats.add_high_fanout_rt);
-    VTR_LOG("total_number_of_adding_all_rt_from_calling_high_fanout_rt: %zu ", router_stats.add_all_rt_from_high_fanout);
-    VTR_LOG("\n");
-
-    PartitionTreeDebug::write("partition_tree.log");
-    return routing_is_successful;
-}
-
-/** Try routing a net. This calls timing_driven_route_net.
- * The only difference is that it returns a "retry_net" flag, which means that the net
- * couldn't be routed with the default bounding box and needs a full-device BB.
- * This is required when routing in parallel, because the threads ensure data separation based on BB size.
- * The single-thread router just retries with a full-device BB and does not need to notify the caller.
- * TODO: make the serial router follow this execution path to decrease code duplication */
-template<typename ConnectionRouter>
-NetResultFlags try_parallel_route_net(ConnectionRouter& router,
-                                      const Netlist<>& net_list,
-                                      const ParentNetId& net_id,
-                                      int itry,
-                                      float pres_fac,
-                                      const t_router_opts& router_opts,
-                                      CBRR& connections_inf,
-                                      RouterStats& router_stats,
-                                      std::vector<float>& pin_criticality,
-                                      NetPinsMatrix<float>& net_delay,
-                                      const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                      std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                                      NetPinTimingInvalidator* pin_timing_invalidator,
-                                      route_budgets& budgeting_inf,
-                                      float worst_negative_slack,
-                                      const RoutingPredictor& routing_predictor,
-                                      const std::vector<std::unordered_map<RRNodeId, int>>& choking_spots,
-                                      bool is_flat) {
-    auto& route_ctx = g_vpr_ctx.mutable_routing();
-
-    NetResultFlags flags;
-
-    bool reroute_for_hold = false;
-    if (budgeting_inf.if_set()) {
-        reroute_for_hold = (budgeting_inf.get_should_reroute(net_id));
-        reroute_for_hold &= worst_negative_slack != 0;
-    }
-    if (route_ctx.net_status.is_fixed(net_id)) { /* Skip pre-routed nets. */
-        flags.success = true;
-    } else if (net_list.net_is_ignored(net_id)) { /* Skip ignored nets. */
-        flags.success = true;
-    } else if (!(reroute_for_hold) && !should_route_net(net_id, connections_inf, true)) {
-        flags.success = true;
-    } else {
-        // track time spent vs fanout
-        profiling::net_fanout_start();
-
-        vtr::Timer routing_timer;
-        flags = timing_driven_route_net(router,
-                                        net_list,
-                                        net_id,
-                                        itry,
-                                        pres_fac,
-                                        router_opts,
-                                        connections_inf,
-                                        router_stats,
-                                        pin_criticality,
-                                        net_delay[net_id].data(),
-                                        netlist_pin_lookup,
-                                        timing_info,
-                                        pin_timing_invalidator,
-                                        budgeting_inf,
-                                        worst_negative_slack,
-                                        routing_predictor,
-                                        choking_spots,
-                                        is_flat);
-
-        profiling::net_fanout_end(net_list.net_sinks(net_id).size());
-
-        /* Impossible to route? (disconnected rr_graph) */
-        if (flags.success) {
-            route_ctx.net_status.set_is_routed(net_id, true);
-        } else {
-            VTR_LOG("Routing failed for net %d\n", net_id);
-        }
-
-        flags.was_rerouted = true; //Flag to record whether routing was actually changed
-    }
-
-    return flags;
-}
-
-/* Helper for route_partition_tree(). */
-template<typename ConnectionRouter>
-void route_partition_tree_helper(tbb::task_group& g,
-                                 PartitionTreeNode& node,
-                                 RouteIterCtx<ConnectionRouter>& ctx,
-                                 vtr::linear_map<ParentNetId, int>& nets_to_retry) {
-    /* Sort so net with most sinks is routed first. */
-    std::sort(node.nets.begin(), node.nets.end(), [&](const ParentNetId id1, const ParentNetId id2) -> bool {
-        return ctx.net_list.net_sinks(id1).size() > ctx.net_list.net_sinks(id2).size();
-    });
-
-    node.is_routable = true;
-    node.rerouted_nets.clear();
-
-    vtr::Timer t;
-    for (auto net_id : node.nets) {
-        auto flags = try_parallel_route_net(
-            ctx.routers.local(),
-            ctx.net_list,
-            net_id,
-            ctx.itry,
-            ctx.pres_fac,
-            ctx.router_opts,
-            ctx.connections_inf,
-            ctx.router_stats.local(),
-            ctx.route_structs.local().pin_criticality,
-            ctx.net_delay,
-            ctx.netlist_pin_lookup,
-            ctx.timing_info,
-            ctx.pin_timing_invalidator,
-            ctx.budgeting_inf,
-            ctx.worst_negative_slack,
-            ctx.routing_predictor,
-            ctx.choking_spots[net_id],
-            ctx.is_flat);
-
-        if (!flags.success && !flags.retry_with_full_bb) {
-            node.is_routable = false;
-        }
-        if (flags.was_rerouted) {
-            node.rerouted_nets.push_back(net_id);
-        }
-        /* If we need to retry this net with full-device BB, it will go up to the top
-         * of the tree, so remove it from this node and keep track of it */
-        if (flags.retry_with_full_bb) {
-            node.nets.erase(std::remove(node.nets.begin(), node.nets.end(), net_id), node.nets.end());
-            nets_to_retry[net_id] = true;
-        }
-    }
-
-    PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets routed in " + std::to_string(t.elapsed_sec()) + " s");
-
-    /* add left and right trees to task queue */
-    if (node.left && node.right) {
-        g.run([&]() {
-            route_partition_tree_helper(g, *node.left, ctx, nets_to_retry);
-        });
-        g.run([&]() {
-            route_partition_tree_helper(g, *node.right, ctx, nets_to_retry);
-        });
-    } else {
-        VTR_ASSERT(!node.left && !node.right); // there shouldn't be a node with a single branch
-    }
-}
-
-/** Reduce results from partition tree into a single RouteIterResults */
-static void reduce_partition_tree_helper(const PartitionTreeNode& node, RouteIterResults& results) {
-    results.is_routable &= node.is_routable;
-    const std::vector<ParentNetId>& rerouted = node.rerouted_nets;
-    results.rerouted_nets.insert(results.rerouted_nets.end(), rerouted.begin(), rerouted.end());
-
-    if (node.left)
-        reduce_partition_tree_helper(*node.left, results);
-    if (node.right)
-        reduce_partition_tree_helper(*node.right, results);
-}
-
-/** Route all nets in parallel using the partitioning information in the PartitionTree.
- *
- * @param[in, out] g TBB task group to dispatch tasks.
- * @param[in, out] tree The partition tree. Non-const reference because iteration results get written on the nodes.
- * @param[in, out] ctx RouteIterCtx containing all the necessary bits of state for routing.
- * @return RouteIterResults combined from all threads.
- *
- * See comments in PartitionTreeNode for how parallel routing works. */
-template<typename ConnectionRouter>
-RouteIterResults route_partition_tree(tbb::task_group& g,
-                                      PartitionTree& tree,
-                                      RouteIterCtx<ConnectionRouter>& ctx) {
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& route_ctx = g_vpr_ctx.mutable_routing();
-
-    /* a net id -> retry? vector
-     * not a bool vector or a set because multiple threads may be writing on it */
-    vtr::linear_map<ParentNetId, int> nets_to_retry;
-
-    route_partition_tree_helper(g, tree.root(), ctx, nets_to_retry);
-    g.wait();
-
-    /* grow bounding box and add to top level if there is any net to retry */
-    for (const auto& kv : nets_to_retry) {
-        if (kv.second) {
-            ParentNetId net_id = kv.first;
-            route_ctx.route_bb[net_id] = {
-                0,
-                (int)(device_ctx.grid.width() - 1),
-                0,
-                (int)(device_ctx.grid.height() - 1),
-                0,
-                (int)(device_ctx.grid.get_num_layers() - 1)};
-            tree.root().nets.push_back(net_id);
-        }
-    }
-
-    RouteIterResults out;
-    reduce_partition_tree_helper(tree.root(), out);
-    for (auto& thread_stats : ctx.router_stats) {
-        update_router_stats(out.stats, thread_stats);
-    }
-    return out;
-}
-
-/* Build a partition tree and route with it */
-template<typename ConnectionRouter>
-static RouteIterResults route_with_partition_tree(tbb::task_group& g, RouteIterCtx<ConnectionRouter>& ctx) {
-    vtr::Timer t2;
-    PartitionTree partition_tree(ctx.net_list);
-    float total_prep_time = t2.elapsed_sec();
-    VTR_LOG("# Built partition tree in %f seconds\n", total_prep_time);
-
-    return route_partition_tree(g, partition_tree, ctx);
-}
-
-/* Route serially */
-template<typename ConnectionRouter>
-static RouteIterResults route_without_partition_tree(std::vector<ParentNetId>& nets_to_route, RouteIterCtx<ConnectionRouter>& ctx) {
-    RouteIterResults out;
-
-    /* Sort so net with most sinks is routed first. */
-    std::sort(nets_to_route.begin(), nets_to_route.end(), [&](const ParentNetId id1, const ParentNetId id2) -> bool {
-        return ctx.net_list.net_sinks(id1).size() > ctx.net_list.net_sinks(id2).size();
-    });
-
-    for (auto net_id : nets_to_route) {
-        auto flags = try_timing_driven_route_net(
-            ctx.routers.local(),
-            ctx.net_list,
-            net_id,
-            ctx.itry,
-            ctx.pres_fac,
-            ctx.router_opts,
-            ctx.connections_inf,
-            ctx.router_stats.local(),
-            ctx.route_structs.local().pin_criticality,
-            ctx.route_structs.local().rt_node_of_sink,
-            ctx.net_delay,
-            ctx.netlist_pin_lookup,
-            ctx.timing_info,
-            ctx.pin_timing_invalidator,
-            ctx.budgeting_inf,
-            ctx.worst_negative_slack,
-            ctx.routing_predictor,
-            ctx.choking_spots[net_id],
-            ctx.is_flat);
-
-        if (!flags.success) {
-            out.is_routable = false;
-        }
-        if (flags.was_rerouted) {
-            out.rerouted_nets.push_back(net_id);
-        }
-    }
-
-    update_router_stats(out.stats, ctx.router_stats.local());
-
-    return out;
-}
-
-#endif // VPR_USE_TBB
diff --git a/vpr/src/route/route_parallel.h b/vpr/src/route/route_parallel.h
deleted file mode 100644
index b6b4766469f..00000000000
--- a/vpr/src/route/route_parallel.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#pragma once
-
-#include <unordered_map>
-#include <vector>
-#include "connection_based_routing.h"
-#include "netlist.h"
-#include "vpr_types.h"
-
-#include "vpr_utils.h"
-#include "timing_info_fwd.h"
-#include "route_budgets.h"
-#include "router_stats.h"
-#include "router_lookahead.h"
-#include "spatial_route_tree_lookup.h"
-#include "connection_router_interface.h"
-#include "heap_type.h"
-#include "routing_predictor.h"
-
-#ifdef VPR_USE_TBB
-/** Route in parallel. The number of threads is set by the global -j option to VPR.
- * Return success status. */
-bool try_parallel_route(const Netlist<>& net_list,
-                        const t_det_routing_arch& det_routing_arch,
-                        const t_router_opts& router_opts,
-                        const t_analysis_opts& analysis_opts,
-                        const std::vector<t_segment_inf>& segment_inf,
-                        NetPinsMatrix<float>& net_delay,
-                        const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                        std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                        std::shared_ptr<RoutingDelayCalculator> delay_calc,
-                        ScreenUpdatePriority first_iteration_priority,
-                        bool is_flat);
-#endif
diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp
deleted file mode 100644
index 2b497066e32..00000000000
--- a/vpr/src/route/route_timing.cpp
+++ /dev/null
@@ -1,2263 +0,0 @@
-#include <cstdio>
-#include <ctime>
-#include <cmath>
-#include <vector>
-#include <unordered_map>
-#include <algorithm>
-#include <iostream>
-#include <tuple>
-
-#include "NetPinTimingInvalidator.h"
-#include "netlist_fwd.h"
-#include "rr_graph_fwd.h"
-#include "vtr_assert.h"
-#include "vtr_log.h"
-#include "vtr_time.h"
-
-#include "vpr_utils.h"
-#include "vpr_types.h"
-#include "vpr_error.h"
-
-#include "globals.h"
-#include "read_route.h"
-#include "route_export.h"
-#include "route_common.h"
-#include "route_timing.h"
-#include "net_delay.h"
-#include "stats.h"
-#include "echo_files.h"
-#include "draw.h"
-#include "breakpoint.h"
-#include "move_utils.h"
-#include "rr_graph.h"
-#include "routing_predictor.h"
-#include "VprTimingGraphResolver.h"
-
-// all functions in profiling:: namespace, which are only activated if PROFILE is defined
-#include "route_profiling.h"
-
-#include "concrete_timing_info.h"
-#include "timing_util.h"
-#include "route_budgets.h"
-#include "binary_heap.h"
-#include "bucket.h"
-#include "connection_router.h"
-
-#include "tatum/TimingReporter.hpp"
-#include "overuse_report.h"
-
-/*
- * File-scope variables
- */
-
-/**
- * @brief Run-time flag to control when router debug information is printed
- * Note only enables debug output if compiled with VTR_ENABLE_DEBUG_LOGGING defined
- * f_router_debug is used to stop the router when a breakpoint is reached. When a breakpoint is reached, this flag is set to true.
- *
- * In addition f_router_debug is used to print additional debug information during routing, for instance lookahead expected costs
- * information.
- */
-bool f_router_debug = false;
-
-//Count the number of times the router has failed
-static int num_routing_failed = 0;
-
-/******************** Subroutines local to route_timing.cpp ********************/
-
-/** Attempt to route a single sink (target_pin) in a net.
- * In the process, update global pathfinder costs, rr_node_route_inf and extend the global RouteTree
- * for this net.
- *
- * @param router The ConnectionRouter instance 
- * @param net_list Input netlist
- * @param net_id
- * @param itarget # of this connection in the net (only used for debug output)
- * @param target_pin # of this sink in the net (TODO: is it the same thing as itarget?)
- * @param cost_params
- * @param router_opts
- * @param[in, out] tree RouteTree describing the current routing state
- * @param rt_node_of_sink Lookup from target_pin-like indices (indicating SINK nodes) to RouteTreeNodes
- * @param spatial_rt_lookup
- * @param router_stats
- * @param budgeting_inf
- * @param routing_predictor
- * @param choking_spots
- * @param is_flat
- * @return NetResultFlags for this sink to be bubbled up through timing_driven_route_net */
-template<typename ConnectionRouter>
-static NetResultFlags timing_driven_route_sink(ConnectionRouter& router,
-                                               const Netlist<>& net_list,
-                                               ParentNetId net_id,
-                                               unsigned itarget,
-                                               int target_pin,
-                                               const t_conn_cost_params cost_params,
-                                               const t_router_opts& router_opts,
-                                               RouteTree& tree,
-                                               SpatialRouteTreeLookup& spatial_rt_lookup,
-                                               RouterStats& router_stats,
-                                               route_budgets& budgeting_inf,
-                                               const RoutingPredictor& routing_predictor,
-                                               const std::vector<std::unordered_map<RRNodeId, int>>& choking_spots,
-                                               bool is_flat);
-
-/** Return tuple of:
- * bool: Did we find a path for each sink in this net? 
- * bool: Should the caller retry with a full-device bounding box? */
-template<typename ConnectionRouter>
-static std::tuple<bool, bool> timing_driven_pre_route_to_clock_root(ConnectionRouter& router,
-                                                                    ParentNetId net_id,
-                                                                    const Netlist<>& net_list,
-                                                                    RRNodeId sink_node,
-                                                                    const t_conn_cost_params cost_params,
-                                                                    int high_fanout_threshold,
-                                                                    RouteTree& tree,
-                                                                    SpatialRouteTreeLookup& spatial_rt_lookup,
-                                                                    RouterStats& router_stats,
-                                                                    bool is_flat,
-                                                                    bool can_grow_bb);
-
-static void setup_routing_resources(int itry,
-                                    ParentNetId net_id,
-                                    const Netlist<>& net_list,
-                                    unsigned num_sinks,
-                                    int min_incremental_reroute_fanout,
-                                    CBRR& connections_inf,
-                                    const t_router_opts& router_opts,
-                                    bool ripup_high_fanout_nets);
-
-static void update_net_delays_from_route_tree(float* net_delay,
-                                              const Netlist<>& net_list,
-                                              ParentNetId inet,
-                                              TimingInfo* timing_info,
-                                              NetPinTimingInvalidator* pin_timing_invalidator);
-
-static bool check_hold(const t_router_opts& router_opts, float worst_neg_slack);
-
-static float get_net_pin_criticality(const std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                                     const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                     float max_criticality,
-                                     float criticality_exp,
-                                     ParentNetId net_id,
-                                     ParentPinId pin_id,
-                                     bool is_flat);
-
-struct more_sinks_than {
-    const Netlist<>& net_list_;
-    more_sinks_than(const Netlist<>& net_list)
-        : net_list_(net_list) {}
-    inline bool operator()(const ParentNetId& net_index1, const ParentNetId& net_index2) {
-        return net_list_.net_sinks(net_index1).size() > net_list_.net_sinks(net_index2).size();
-    }
-};
-
-static bool is_high_fanout(int fanout, int fanout_threshold);
-
-// The reason that try_timing_driven_route_tmpl (and descendents) are being
-// templated over is because using a virtual interface instead fully templating
-// the router results in a 5% runtime increase.
-//
-// The reason to template over the router in general is to enable runtime
-// selection of core router algorithm's, specifically the router heap.
-template<typename ConnectionRouter>
-static bool try_timing_driven_route_tmpl(const Netlist<>& netlist,
-                                         const t_det_routing_arch& det_routing_arch,
-                                         const t_router_opts& router_opts,
-                                         const t_analysis_opts& analysis_opts,
-                                         const std::vector<t_segment_inf>& segment_inf,
-                                         NetPinsMatrix<float>& net_delay,
-                                         const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                         std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                                         std::shared_ptr<RoutingDelayCalculator> delay_calc,
-                                         ScreenUpdatePriority first_iteration_priority,
-                                         bool is_flat);
-
-/************************ Subroutine definitions *****************************/
-bool try_timing_driven_route(const Netlist<>& net_list,
-                             const t_det_routing_arch& det_routing_arch,
-                             const t_router_opts& router_opts,
-                             const t_analysis_opts& analysis_opts,
-                             const std::vector<t_segment_inf>& segment_inf,
-                             NetPinsMatrix<float>& net_delay,
-                             const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                             std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                             std::shared_ptr<RoutingDelayCalculator> delay_calc,
-                             ScreenUpdatePriority first_iteration_priority,
-                             bool is_flat) {
-    switch (router_opts.router_heap) {
-        case e_heap_type::BINARY_HEAP:
-            return try_timing_driven_route_tmpl<ConnectionRouter<BinaryHeap>>(net_list,
-                                                                              det_routing_arch,
-                                                                              router_opts,
-                                                                              analysis_opts,
-                                                                              segment_inf,
-                                                                              net_delay,
-                                                                              netlist_pin_lookup,
-                                                                              timing_info,
-                                                                              delay_calc,
-                                                                              first_iteration_priority,
-                                                                              is_flat);
-            break;
-        case e_heap_type::BUCKET_HEAP_APPROXIMATION:
-            return try_timing_driven_route_tmpl<ConnectionRouter<Bucket>>(net_list,
-                                                                          det_routing_arch,
-                                                                          router_opts,
-                                                                          analysis_opts,
-                                                                          segment_inf,
-                                                                          net_delay,
-                                                                          netlist_pin_lookup,
-                                                                          timing_info,
-                                                                          delay_calc,
-                                                                          first_iteration_priority,
-                                                                          is_flat);
-        default:
-            VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown heap type %d", router_opts.router_heap);
-    }
-}
-
-template<typename ConnectionRouter>
-bool try_timing_driven_route_tmpl(const Netlist<>& net_list,
-                                  const t_det_routing_arch& det_routing_arch,
-                                  const t_router_opts& router_opts,
-                                  const t_analysis_opts& analysis_opts,
-                                  const std::vector<t_segment_inf>& segment_inf,
-                                  NetPinsMatrix<float>& net_delay,
-                                  const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                  std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                                  std::shared_ptr<RoutingDelayCalculator> delay_calc,
-                                  ScreenUpdatePriority first_iteration_priority,
-                                  bool is_flat) {
-    /* Timing-driven routing algorithm.  The timing graph (includes slack)   *
-     * must have already been allocated, and net_delay must have been allocated. *
-     * Returns true if the routing succeeds, false otherwise.                    */
-
-    // Make sure template type ConnectionRouter is a ConnectionRouterInterface.
-    static_assert(std::is_base_of<ConnectionRouterInterface, ConnectionRouter>::value, "ConnectionRouter must implement the ConnectionRouterInterface");
-
-    const auto& device_ctx = g_vpr_ctx.device();
-    const auto& atom_ctx = g_vpr_ctx.atom();
-    auto& route_ctx = g_vpr_ctx.mutable_routing();
-
-    auto choking_spots = set_nets_choking_spots(net_list,
-                                                route_ctx.net_terminal_groups,
-                                                route_ctx.net_terminal_group_num,
-                                                router_opts.has_choking_spot,
-                                                is_flat);
-
-    //Initially, the router runs normally trying to reduce congestion while
-    //balancing other metrics (timing, wirelength, run-time etc.)
-    RouterCongestionMode router_congestion_mode = RouterCongestionMode::NORMAL;
-
-    //Initialize and properly size the lookups for profiling
-    profiling::profiling_initialization(get_max_pins_per_net(net_list));
-
-    //sort so net with most sinks is routed first.
-    auto sorted_nets = std::vector<ParentNetId>(net_list.nets().begin(), net_list.nets().end());
-    std::sort(sorted_nets.begin(), sorted_nets.end(), more_sinks_than(net_list));
-
-    /*
-     * Configure the routing predictor
-     */
-    RoutingPredictor routing_predictor;
-    float abort_iteration_threshold = std::numeric_limits<float>::infinity(); //Default no early abort
-    if (router_opts.routing_failure_predictor == SAFE) {
-        abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_SAFE * router_opts.max_router_iterations;
-    } else if (router_opts.routing_failure_predictor == AGGRESSIVE) {
-        abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_AGGRESSIVE * router_opts.max_router_iterations;
-    } else {
-        VTR_ASSERT_MSG(router_opts.routing_failure_predictor == OFF, "Unrecognized routing failure predictor setting");
-    }
-
-    float high_effort_congestion_mode_iteration_threshold = router_opts.congested_routing_iteration_threshold_frac * router_opts.max_router_iterations;
-
-    /* Set delay of ignored signals to zero. Non-ignored net delays are set by
-     * update_net_delays_from_route_tree() inside timing_driven_route_net(),
-     * which is only called for non-ignored nets. */
-    for (auto net_id : net_list.nets()) {
-        if (net_list.net_is_ignored(net_id)) {
-            for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) {
-                net_delay[net_id][ipin] = 0.;
-            }
-        }
-    }
-
-    CBRR connections_inf{net_list, route_ctx.net_rr_terminals, is_flat};
-
-    route_budgets budgeting_inf(net_list, is_flat);
-
-    // This needs to be called before filling intra-cluster lookahead maps to ensure that the intra-cluster lookahead maps are initialized.
-    const RouterLookahead* router_lookahead = get_cached_router_lookahead(det_routing_arch,
-                                                                          router_opts.lookahead_type,
-                                                                          router_opts.write_router_lookahead,
-                                                                          router_opts.read_router_lookahead,
-                                                                          segment_inf,
-                                                                          is_flat);
-
-    if (is_flat) {
-        // If is_flat is true, the router lookahead maps related to intra-cluster resources should be initialized since
-        // they haven't been initialized when the map related to global resources was initialized.
-        auto cache_key = route_ctx.router_lookahead_cache_key_;
-        std::unique_ptr<RouterLookahead> mut_router_lookahead(route_ctx.cached_router_lookahead_.release());
-        VTR_ASSERT(mut_router_lookahead);
-        route_ctx.cached_router_lookahead_.clear();
-        if (!router_opts.read_intra_cluster_router_lookahead.empty()) {
-            mut_router_lookahead->read_intra_cluster(router_opts.read_intra_cluster_router_lookahead);
-        } else {
-            mut_router_lookahead->compute_intra_tile();
-        }
-        route_ctx.cached_router_lookahead_.set(cache_key, std::move(mut_router_lookahead));
-        router_lookahead = get_cached_router_lookahead(det_routing_arch,
-                                                       router_opts.lookahead_type,
-                                                       router_opts.write_router_lookahead,
-                                                       router_opts.read_router_lookahead,
-                                                       segment_inf,
-                                                       is_flat);
-        if (!router_opts.write_intra_cluster_router_lookahead.empty()) {
-            router_lookahead->write_intra_cluster(router_opts.write_intra_cluster_router_lookahead);
-        }
-    }
-
-    VTR_ASSERT(router_lookahead != nullptr);
-
-    /*
-     * Routing parameters
-     */
-    float pres_fac = update_pres_fac(router_opts.first_iter_pres_fac); /* Typically 0 -> ignore cong. */
-    int bb_fac = router_opts.bb_factor;
-
-    //When routing conflicts are detected the bounding boxes are scaled
-    //by BB_SCALE_FACTOR every BB_SCALE_ITER_COUNT iterations
-    constexpr float BB_SCALE_FACTOR = 2;
-    constexpr int BB_SCALE_ITER_COUNT = 5;
-
-    size_t available_wirelength = calculate_wirelength_available();
-
-    /*
-     * Routing status and metrics
-     */
-    bool routing_is_successful = false;
-    WirelengthInfo wirelength_info;
-    OveruseInfo overuse_info(device_ctx.rr_graph.num_nodes());
-    tatum::TimingPathInfo critical_path;
-    int itry; //Routing iteration number
-    int itry_conflicted_mode = 0;
-
-    /*
-     * Best result so far
-     */
-    vtr::vector<ParentNetId, vtr::optional<RouteTree>> best_routing;
-    t_clb_opins_used best_clb_opins_used_locally;
-    RoutingMetrics best_routing_metrics;
-    int legal_convergence_count = 0;
-
-    ConnectionRouter router(
-        device_ctx.grid,
-        *router_lookahead,
-        device_ctx.rr_graph.rr_nodes(),
-        &device_ctx.rr_graph,
-        device_ctx.rr_rc_data,
-        device_ctx.rr_graph.rr_switch(),
-        route_ctx.rr_node_route_inf,
-        is_flat);
-
-    /*
-     * On the first routing iteration ignore congestion to get reasonable net
-     * delay estimates. Set criticalities to 1 when timing analysis is on to
-     * optimize timing, and to 0 when timing analysis is off to optimize routability.
-     *
-     * Subsequent iterations use the net delays from the previous iteration.
-     */
-    std::shared_ptr<SetupHoldTimingInfo> route_timing_info;
-    {
-        vtr::ScopedStartFinishTimer init_timing_timer("Initializing router criticalities");
-        if (timing_info) {
-            if (router_opts.initial_timing == e_router_initial_timing::ALL_CRITICAL) {
-                //First routing iteration, make all nets critical for a min-delay routing
-                route_timing_info = make_constant_timing_info(1.);
-            } else {
-                VTR_ASSERT(router_opts.initial_timing == e_router_initial_timing::LOOKAHEAD);
-
-                {
-                    //Estimate initial connection delays from the router lookahead
-                    init_net_delay_from_lookahead(*router_lookahead,
-                                                  net_list,
-                                                  route_ctx.net_rr_terminals,
-                                                  net_delay,
-                                                  device_ctx.rr_graph,
-                                                  is_flat);
-
-                    //Run STA to get estimated criticalities
-                    timing_info->update();
-                }
-                route_timing_info = timing_info;
-            }
-        } else {
-            //Not timing driven, force criticality to zero for a routability-driven routing
-            route_timing_info = make_constant_timing_info(0.);
-        }
-        VTR_LOG("Initial Net Connection Criticality Histogram:\n");
-        print_router_criticality_histogram(net_list, *route_timing_info, netlist_pin_lookup, is_flat);
-    }
-
-    std::unique_ptr<NetPinTimingInvalidator> pin_timing_invalidator;
-    if (timing_info) {
-        pin_timing_invalidator = make_net_pin_timing_invalidator(
-            router_opts.timing_update_type,
-            net_list,
-            netlist_pin_lookup,
-            atom_ctx.nlist,
-            atom_ctx.lookup,
-            *timing_info->timing_graph(),
-            is_flat);
-    }
-
-    RouterStats router_stats;
-    init_router_stats(router_stats);
-    timing_driven_route_structs route_structs(net_list);
-    float prev_iter_cumm_time = 0;
-    vtr::Timer iteration_timer;
-    int num_net_bounding_boxes_updated = 0;
-    int itry_since_last_convergence = -1;
-
-    // This heap is used for reserve_locally_used_opins.
-    BinaryHeap small_heap;
-    small_heap.init_heap(device_ctx.grid);
-
-    // When RCV is enabled the router will not stop unless negative hold slack is 0
-    // In some cases this isn't doable, due to global nets or intracluster routing issues
-    // In these cases RCV will finish early if it goes RCV_FINISH_EARLY_COUNTDOWN iterations without detecting resolvable negative hold slack
-    // Increasing this will make the router fail occasionally, decreasing will sometimes not let all hold violations be resolved
-    constexpr int RCV_FINISH_EARLY_COUNTDOWN = 15;
-
-    int rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN;
-
-    print_route_status_header();
-    for (itry = 1; itry <= router_opts.max_router_iterations; ++itry) {
-        RouterStats router_iteration_stats;
-        init_router_stats(router_iteration_stats);
-        std::vector<ParentNetId> rerouted_nets;
-
-        /* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */
-        for (auto net_id : net_list.nets()) {
-            route_ctx.net_status.set_is_routed(net_id, false);
-            route_ctx.net_status.set_is_fixed(net_id, false);
-        }
-
-        if (itry_since_last_convergence >= 0) {
-            ++itry_since_last_convergence;
-        }
-
-        // Calculate this once and pass it into net routing to check if should reroute for hold
-        float worst_negative_slack = 0;
-        if (budgeting_inf.if_set()) {
-            worst_negative_slack = timing_info->hold_total_negative_slack();
-        }
-
-        /*
-         * Route each net
-         */
-        for (auto net_id : sorted_nets) {
-            NetResultFlags flags = try_timing_driven_route_net(router,
-                                                               net_list,
-                                                               net_id,
-                                                               itry,
-                                                               pres_fac,
-                                                               router_opts,
-                                                               connections_inf,
-                                                               router_iteration_stats,
-                                                               route_structs.pin_criticality,
-                                                               net_delay,
-                                                               netlist_pin_lookup,
-                                                               route_timing_info,
-                                                               pin_timing_invalidator.get(),
-                                                               budgeting_inf,
-                                                               worst_negative_slack,
-                                                               routing_predictor,
-                                                               choking_spots[net_id],
-                                                               is_flat);
-
-            if (!flags.success) {
-                return false; //Impossible to route
-            }
-
-            if (flags.was_rerouted) {
-                rerouted_nets.push_back(net_id);
-#ifndef NO_GRAPHICS
-                update_router_info_and_check_bp(BP_NET_ID, size_t(net_id));
-#endif
-            }
-        }
-
-        // Make sure any CLB OPINs used up by subblocks being hooked directly to them are reserved for that purpose
-        bool rip_up_local_opins = (itry == 1 ? false : true);
-        if (!is_flat) {
-            reserve_locally_used_opins(&small_heap, pres_fac,
-                                       router_opts.acc_fac, rip_up_local_opins, is_flat);
-        }
-
-        /*
-         * Calculate metrics for the current routing
-         */
-        bool routing_is_feasible = feasible_routing();
-        float est_success_iteration = routing_predictor.estimate_success_iteration();
-
-        //Update resource costs and overuse info
-        if (itry == 1) {
-            pathfinder_update_acc_cost_and_overuse_info(0., overuse_info); /* Acc_fac=0 for first iter. */
-        } else {
-            pathfinder_update_acc_cost_and_overuse_info(router_opts.acc_fac, overuse_info);
-        }
-
-        wirelength_info = calculate_wirelength_info(net_list, available_wirelength);
-        routing_predictor.add_iteration_overuse(itry, overuse_info.overused_nodes);
-
-        if (timing_info) {
-            //Update timing based on the new routing
-            //Note that the net delays have already been updated by timing_driven_route_net
-            timing_info->update();
-            timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during routing
-            pin_timing_invalidator->reset();
-
-            //Use the real timing analysis criticalities for subsequent routing iterations
-            //  'route_timing_info' is what is actually passed into the net/connection routers,
-            //  and for the 1st iteration may not be the actual STA results (e.g. all criticalities set to 1)
-            route_timing_info = timing_info;
-
-            critical_path = timing_info->least_slack_critical_path();
-
-            VTR_ASSERT_SAFE(timing_driven_check_net_delays(net_list, net_delay));
-
-            if (itry == 1) {
-                generate_route_timing_reports(router_opts, analysis_opts, *timing_info, *delay_calc, is_flat);
-            }
-        }
-
-        float iter_cumm_time = iteration_timer.elapsed_sec();
-        float iter_elapsed_time = iter_cumm_time - prev_iter_cumm_time;
-
-        //Output progress
-        print_route_status(itry, iter_elapsed_time, pres_fac, num_net_bounding_boxes_updated, router_iteration_stats, overuse_info, wirelength_info, timing_info, est_success_iteration);
-
-        prev_iter_cumm_time = iter_cumm_time;
-
-        //Update graphics
-        if (itry == 1) {
-            update_screen(first_iteration_priority, "Routing...", ROUTING, timing_info);
-        } else {
-            update_screen(ScreenUpdatePriority::MINOR, "Routing...", ROUTING, timing_info);
-        }
-
-        if (router_opts.save_routing_per_iteration) {
-            std::string filename = vtr::string_fmt("iteration_%03d.route", itry);
-            print_route(net_list, nullptr, filename.c_str(), is_flat);
-        }
-
-        //Update router stats (total)
-        update_router_stats(router_stats, router_iteration_stats);
-
-        /*
-         * Are we finished?
-         */
-        if (is_iteration_complete(routing_is_feasible, router_opts, itry, timing_info, rcv_finished_count == 0)) {
-            auto& router_ctx = g_vpr_ctx.routing();
-
-            if (is_better_quality_routing(best_routing, best_routing_metrics, wirelength_info, timing_info)) {
-                //Save routing
-                best_routing = router_ctx.route_trees;
-                best_clb_opins_used_locally = router_ctx.clb_opins_used_locally;
-
-                routing_is_successful = true;
-
-                //Update best metrics
-                if (timing_info) {
-                    timing_driven_check_net_delays(net_list, net_delay);
-
-                    best_routing_metrics.sTNS = timing_info->setup_total_negative_slack();
-                    best_routing_metrics.sWNS = timing_info->setup_worst_negative_slack();
-                    best_routing_metrics.hTNS = timing_info->hold_total_negative_slack();
-                    best_routing_metrics.hWNS = timing_info->hold_worst_negative_slack();
-                    best_routing_metrics.critical_path = critical_path;
-                }
-                best_routing_metrics.used_wirelength = wirelength_info.used_wirelength();
-            }
-
-            //Decrease pres_fac so that critical connections will take more direct routes
-            //Note that we use first_iter_pres_fac here (typically zero), and switch to
-            //use initial_pres_fac on the next iteration.
-            pres_fac = update_pres_fac(router_opts.first_iter_pres_fac);
-
-            //Reduce timing tolerances to re-route more delay-suboptimal signals
-            connections_inf.set_connection_criticality_tolerance(0.7);
-            connections_inf.set_connection_delay_tolerance(1.01);
-
-            ++legal_convergence_count;
-            itry_since_last_convergence = 0;
-
-            VTR_ASSERT(routing_is_successful);
-        }
-
-        if (itry_since_last_convergence == 1) {
-            //We used first_iter_pres_fac when we started routing again
-            //after the first routing convergence. Since that is often zero,
-            //we want to set pres_fac to a reasonable (i.e. typically non-zero)
-            //value afterwards -- so it grows when multiplied by pres_fac_mult
-            pres_fac = update_pres_fac(router_opts.initial_pres_fac);
-        }
-
-        //Have we converged the maximum number of times, did not make any changes, or does it seem
-        //unlikely additional convergences will improve QoR?
-        if (legal_convergence_count >= router_opts.max_convergence_count
-            || router_iteration_stats.connections_routed == 0
-            || early_reconvergence_exit_heuristic(router_opts, itry_since_last_convergence, timing_info, best_routing_metrics)) {
-#ifndef NO_GRAPHICS
-            update_router_info_and_check_bp(BP_ROUTE_ITER, -1);
-#endif
-            break; //Done routing
-        }
-
-        /*
-         * Abort checks: Should we give-up because this routing problem is unlikely to converge to a legal routing?
-         */
-        if (itry == 1 && early_exit_heuristic(router_opts, wirelength_info)) {
-#ifndef NO_GRAPHICS
-            update_router_info_and_check_bp(BP_ROUTE_ITER, -1);
-#endif
-            //Abort
-            break;
-        }
-
-        //Estimate at what iteration we will converge to a legal routing
-        if (overuse_info.overused_nodes > ROUTING_PREDICTOR_MIN_ABSOLUTE_OVERUSE_THRESHOLD) {
-            //Only consider aborting if we have a significant number of overused resources
-
-            if (!std::isnan(est_success_iteration) && est_success_iteration > abort_iteration_threshold && router_opts.routing_budgets_algorithm != YOYO) {
-                VTR_LOG("Routing aborted, the predicted iteration for a successful route (%.1f) is too high.\n", est_success_iteration);
-#ifndef NO_GRAPHICS
-                update_router_info_and_check_bp(BP_ROUTE_ITER, -1);
-#endif
-                break; //Abort
-            }
-        }
-
-        if (itry == 1 && router_opts.exit_after_first_routing_iteration) {
-            VTR_LOG("Exiting after first routing iteration as requested\n");
-#ifndef NO_GRAPHICS
-            update_router_info_and_check_bp(BP_ROUTE_ITER, -1);
-#endif
-            break;
-        }
-
-        /*
-         * Prepare for the next iteration
-         */
-
-        if (router_opts.route_bb_update == e_route_bb_update::DYNAMIC) {
-            num_net_bounding_boxes_updated = dynamic_update_bounding_boxes(rerouted_nets, net_list, router_opts.high_fanout_threshold);
-        }
-
-        if (itry >= high_effort_congestion_mode_iteration_threshold) {
-            //We are approaching the maximum number of routing iterations,
-            //and still do not have a legal routing. Switch to a mode which
-            //focuses more on attempting to resolve routing conflicts.
-            router_congestion_mode = RouterCongestionMode::CONFLICTED;
-        }
-
-        //Update pres_fac
-        if (itry == 1) {
-            pres_fac = update_pres_fac(router_opts.initial_pres_fac);
-        } else {
-            pres_fac *= router_opts.pres_fac_mult;
-
-            /* Avoid overflow for high iteration counts, even if acc_cost is big */
-            pres_fac = update_pres_fac(std::min(pres_fac, static_cast<float>(HUGE_POSITIVE_FLOAT / 1e5)));
-
-            // Increase short path criticality if it's having a hard time resolving hold violations due to congestion
-            if (budgeting_inf.if_set()) {
-                bool rcv_finished = false;
-
-                /* This constant represents how much extra delay the budget increaser adds to the minimum and maximum delay budgets
-                 * Experimentally this value delivers fast hold slack resolution, while not overwhelming the router
-                 * Increasing this will make it resolve hold faster, but could result in lower circuit quality */
-                constexpr float budget_increase_factor = 300e-12;
-
-                if (itry > 5 && worst_negative_slack != 0) rcv_finished = budgeting_inf.increase_min_budgets_if_struggling(budget_increase_factor, timing_info, worst_negative_slack, netlist_pin_lookup);
-                if (rcv_finished)
-                    rcv_finished_count--;
-                else
-                    rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN;
-            }
-        }
-
-        if (router_congestion_mode == RouterCongestionMode::CONFLICTED) {
-            //The design appears to have routing conflicts which are difficult to resolve:
-            //  1) Don't re-route legal connections due to delay. This allows
-            //     the router to focus on the actual conflicts
-            //  2) Increase the net bounding boxes. This potentially allows
-            //     the router to route around otherwise congested regions
-            //     (at the cost of high run-time).
-
-            //Increase the size of the net bounding boxes to give the router more
-            //freedom to find alternate paths.
-            //
-            //In the case of routing conflicts there are multiple connections competing
-            //for the same resources which can not resolve the congestion themselves.
-            //In normal routing mode we try to keep the bounding boxes small to minimize
-            //run-time, but this can limits how far signals can detour (i.e. they can't
-            //route outside the bounding box), which can cause conflicts to oscillate back
-            //and forth without resolving.
-            //
-            //By scaling the bounding boxes here, we slowly increase the router's search
-            //space in hopes of it allowing signals to move further out of the way to
-            //alleviate the conflicts.
-            if (itry_conflicted_mode % BB_SCALE_ITER_COUNT == 0) {
-                //We scale the bounding boxes by BB_SCALE_FACTOR,
-                //every BB_SCALE_ITER_COUNT iterations. This ensures
-                //that we give the router some time (BB_SCALE_ITER_COUNT) to try
-                //resolve/negotiate congestion at the new BB factor.
-                //
-                //Note that we increase the BB factor slowly to try and minimize
-                //the bounding box size (since larger bounding boxes slow the router down).
-                auto& grid = g_vpr_ctx.device().grid;
-                int max_grid_dim = std::max(grid.width(), grid.height());
-
-                //Scale by BB_SCALE_FACTOR but clip to grid size to avoid overflow
-                bb_fac = std::min<int>(max_grid_dim, bb_fac * BB_SCALE_FACTOR);
-
-                route_ctx.route_bb = load_route_bb(net_list, bb_fac);
-            }
-
-            ++itry_conflicted_mode;
-        }
-
-        if (timing_info) {
-            if (should_setup_lower_bound_connection_delays(itry, router_opts)) {
-                // first iteration sets up the lower bound connection delays since only timing is optimized for
-                connections_inf.set_stable_critical_path_delay(critical_path.delay());
-                connections_inf.set_lower_bound_connection_delays(net_delay);
-
-                //load budgets using information from uncongested delay information
-                budgeting_inf.load_route_budgets(net_delay, timing_info, netlist_pin_lookup, router_opts);
-                /*for debugging purposes*/
-                // if (budgeting_inf.if_set()) {
-                //     budgeting_inf.print_route_budget(std::string("route_budgets_") + std::to_string(itry) + ".txt", net_delay);
-                // }
-
-                if (router_opts.routing_budgets_algorithm == YOYO)
-                    router.set_rcv_enabled(true);
-
-            } else {
-                bool stable_routing_configuration = true;
-
-                /*
-                 * Determine if any connection need to be forcibly re-routed due to timing
-                 */
-
-                //Yes, if explicitly enabled
-                bool should_ripup_for_delay = (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::ON);
-
-                //Or, if things are not too congested
-                should_ripup_for_delay |= (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::AUTO
-                                           && router_congestion_mode == RouterCongestionMode::NORMAL);
-
-                if (should_ripup_for_delay) {
-                    if (connections_inf.critical_path_delay_grew_significantly(critical_path.delay())) {
-                        // only need to forcibly reroute if critical path grew significantly
-                        stable_routing_configuration = connections_inf.forcibly_reroute_connections(router_opts.max_criticality,
-                                                                                                    timing_info,
-                                                                                                    netlist_pin_lookup,
-                                                                                                    net_delay);
-                    }
-                }
-
-                // not stable if any connection needs to be forcibly rerouted
-                if (stable_routing_configuration) {
-                    connections_inf.set_stable_critical_path_delay(critical_path.delay());
-                }
-            }
-        } else {
-            /* If timing analysis is not enabled, make sure that the criticalities and the
-             * net_delays stay as 0 so that wirelength can be optimized. */
-
-            for (auto net_id : net_list.nets()) {
-                for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) {
-                    net_delay[net_id][ipin] = 0.;
-                }
-            }
-        }
-
-        if (router_opts.congestion_analysis) profiling::congestion_analysis();
-        if (router_opts.fanout_analysis) profiling::time_on_fanout_analysis();
-        // profiling::time_on_criticality_analysis();
-    }
-
-    if (routing_is_successful) {
-        VTR_LOG("Restoring best routing\n");
-
-        auto& router_ctx = g_vpr_ctx.mutable_routing();
-
-        /* Restore congestion from best route */
-        for (auto net_id : net_list.nets()) {
-            if (route_ctx.route_trees[net_id])
-                pathfinder_update_cost_from_route_tree(route_ctx.route_trees[net_id]->root(), -1);
-            if (best_routing[net_id])
-                pathfinder_update_cost_from_route_tree(best_routing[net_id]->root(), 1);
-        }
-        router_ctx.route_trees = best_routing;
-        router_ctx.clb_opins_used_locally = best_clb_opins_used_locally;
-
-        prune_unused_non_configurable_nets(connections_inf, net_list);
-
-        if (timing_info) {
-            VTR_LOG("Critical path: %g ns\n", 1e9 * best_routing_metrics.critical_path.delay());
-        }
-
-        VTR_LOG("Successfully routed after %d routing iterations.\n", itry);
-    } else {
-        VTR_LOG("Routing failed.\n");
-
-        //If the routing fails, print the overused info
-        print_overused_nodes_status(router_opts, overuse_info);
-
-        ++num_routing_failed;
-
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-        if (f_router_debug) print_invalid_routing_info(net_list, is_flat);
-#endif
-    }
-
-    VTR_LOG("Final Net Connection Criticality Histogram:\n");
-    print_router_criticality_histogram(net_list, *route_timing_info, netlist_pin_lookup, is_flat);
-
-    VTR_ASSERT(router_stats.heap_pushes >= router_stats.intra_cluster_node_pushes);
-    VTR_ASSERT(router_stats.heap_pops >= router_stats.intra_cluster_node_pops);
-    VTR_LOG(
-        "Router Stats: total_nets_routed: %zu total_connections_routed: %zu total_heap_pushes: %zu total_heap_pops: %zu "
-        "total_internal_heap_pushes: %zu total_internal_heap_pops: %zu total_external_heap_pushes: %zu total_external_heap_pops: %zu ",
-        router_stats.nets_routed, router_stats.connections_routed, router_stats.heap_pushes, router_stats.heap_pops,
-        router_stats.intra_cluster_node_pushes, router_stats.intra_cluster_node_pops,
-        router_stats.inter_cluster_node_pushes, router_stats.inter_cluster_node_pops);
-    for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) {
-        VTR_LOG("total_external_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]);
-        VTR_LOG("total_external_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pops[node_type_idx]);
-        VTR_LOG("total_internal_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]);
-        VTR_LOG("total_internal_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pops[node_type_idx]);
-        VTR_LOG("rt_node_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_pushes[node_type_idx]);
-        VTR_LOG("rt_node_%s_high_fanout_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_high_fanout_pushes[node_type_idx]);
-        VTR_LOG("rt_node_%s_entire_tree_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_entire_tree_pushes[node_type_idx]);
-    }
-
-    VTR_LOG("total_number_of_adding_all_rt: %zu ", router_stats.add_all_rt);
-    VTR_LOG("total_number_of_adding_high_fanout_rt: %zu ", router_stats.add_high_fanout_rt);
-    VTR_LOG("total_number_of_adding_all_rt_from_calling_high_fanout_rt: %zu ", router_stats.add_all_rt_from_high_fanout);
-    VTR_LOG("\n");
-
-    return routing_is_successful;
-}
-
-template<typename ConnectionRouter>
-NetResultFlags try_timing_driven_route_net(ConnectionRouter& router,
-                                           const Netlist<>& net_list,
-                                           const ParentNetId& net_id,
-                                           int itry,
-                                           float pres_fac,
-                                           const t_router_opts& router_opts,
-                                           CBRR& connections_inf,
-                                           RouterStats& router_stats,
-                                           std::vector<float>& pin_criticality,
-                                           NetPinsMatrix<float>& net_delay,
-                                           const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                           std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                                           NetPinTimingInvalidator* pin_timing_invalidator,
-                                           route_budgets& budgeting_inf,
-                                           float worst_negative_slack,
-                                           const RoutingPredictor& routing_predictor,
-                                           const std::vector<std::unordered_map<RRNodeId, int>>& choking_spots,
-                                           bool is_flat) {
-    auto& route_ctx = g_vpr_ctx.mutable_routing();
-
-    NetResultFlags flags;
-
-    bool reroute_for_hold = false;
-    if (budgeting_inf.if_set()) {
-        reroute_for_hold = (budgeting_inf.get_should_reroute(net_id));
-        reroute_for_hold &= worst_negative_slack != 0;
-    }
-
-    if (route_ctx.net_status.is_fixed(net_id)) { /* Skip pre-routed nets. */
-        flags.success = true;
-    } else if (net_list.net_is_ignored(net_id)) { /* Skip ignored nets. */
-        flags.success = true;
-    } else if (!(reroute_for_hold) && !should_route_net(net_id, connections_inf, true)) {
-        flags.success = true;
-    } else {
-        // track time spent vs fanout
-        profiling::net_fanout_start();
-
-        flags = timing_driven_route_net(router,
-                                        net_list,
-                                        net_id,
-                                        itry,
-                                        pres_fac,
-                                        router_opts,
-                                        connections_inf,
-                                        router_stats,
-                                        pin_criticality,
-                                        net_delay[net_id].data(),
-                                        netlist_pin_lookup,
-                                        timing_info,
-                                        pin_timing_invalidator,
-                                        budgeting_inf,
-                                        worst_negative_slack,
-                                        routing_predictor,
-                                        choking_spots,
-                                        is_flat);
-
-        profiling::net_fanout_end(net_list.net_sinks(net_id).size());
-
-        /* Impossible to route? (disconnected rr_graph) */
-        if (flags.success) {
-            route_ctx.net_status.set_is_routed(net_id, true);
-        } else {
-            VTR_LOG("Routing failed for net %d\n", net_id);
-        }
-
-        flags.was_rerouted = true; // Flag to record whether routing was actually changed
-    }
-
-    return flags;
-}
-
-int get_max_pins_per_net(const Netlist<>& net_list) {
-    int max_pins_per_net = 0;
-    for (auto net_id : net_list.nets()) {
-        if (!net_list.net_is_ignored(net_id))
-            max_pins_per_net = std::max(max_pins_per_net, (int)net_list.net_pins(net_id).size());
-    }
-
-    return (max_pins_per_net);
-}
-
-template<typename ConnectionRouter>
-NetResultFlags timing_driven_route_net(ConnectionRouter& router,
-                                       const Netlist<>& net_list,
-                                       ParentNetId net_id,
-                                       int itry,
-                                       float pres_fac,
-                                       const t_router_opts& router_opts,
-                                       CBRR& connections_inf,
-                                       RouterStats& router_stats,
-                                       std::vector<float>& pin_criticality,
-                                       float* net_delay,
-                                       const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                       std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                                       NetPinTimingInvalidator* pin_timing_invalidator,
-                                       route_budgets& budgeting_inf,
-                                       float worst_neg_slack,
-                                       const RoutingPredictor& routing_predictor,
-                                       const std::vector<std::unordered_map<RRNodeId, int>>& choking_spots,
-                                       bool is_flat) {
-    auto& device_ctx = g_vpr_ctx.device();
-    const auto& rr_graph = device_ctx.rr_graph;
-    auto& route_ctx = g_vpr_ctx.mutable_routing();
-
-    unsigned int num_sinks = net_list.net_sinks(net_id).size();
-
-    VTR_LOGV_DEBUG(f_router_debug, "Routing Net %zu (%zu sinks)\n", size_t(net_id), num_sinks);
-
-    NetResultFlags flags;
-
-    setup_routing_resources(
-        itry,
-        net_id,
-        net_list,
-        num_sinks,
-        router_opts.min_incremental_reroute_fanout,
-        connections_inf,
-        router_opts,
-        check_hold(router_opts, worst_neg_slack));
-
-    VTR_ASSERT(route_ctx.route_trees[net_id]);
-    RouteTree& tree = route_ctx.route_trees[net_id].value();
-
-    bool high_fanout = is_high_fanout(num_sinks, router_opts.high_fanout_threshold);
-
-    SpatialRouteTreeLookup spatial_route_tree_lookup;
-    if (high_fanout) {
-        spatial_route_tree_lookup = build_route_tree_spatial_lookup(net_list,
-                                                                    route_ctx.route_bb,
-                                                                    net_id,
-                                                                    tree.root());
-    }
-
-    // after this point the route tree is correct
-    // remaining_targets from this point on are the **pin indices** that have yet to be routed
-    std::vector<int> remaining_targets(tree.get_remaining_isinks().begin(), tree.get_remaining_isinks().end());
-
-    // calculate criticality of remaining target pins
-    for (int ipin : remaining_targets) {
-        if (timing_info) {
-            auto pin = net_list.net_pin(net_id, ipin);
-            pin_criticality[ipin] = get_net_pin_criticality(timing_info,
-                                                            netlist_pin_lookup,
-                                                            router_opts.max_criticality,
-                                                            router_opts.criticality_exp,
-                                                            net_id,
-                                                            pin,
-                                                            is_flat);
-
-        } else {
-            //No timing info, implies we want a min delay routing, so use criticality of 1.
-            pin_criticality[ipin] = 1.;
-        }
-    }
-
-    // compare the criticality of different sink nodes
-    sort(begin(remaining_targets), end(remaining_targets), [&](int a, int b) {
-        return pin_criticality[a] > pin_criticality[b];
-    });
-
-    /* Update base costs according to fanout and criticality rules */
-    update_rr_base_costs(num_sinks);
-
-    t_conn_delay_budget conn_delay_budget;
-    t_conn_cost_params cost_params;
-    cost_params.astar_fac = router_opts.astar_fac;
-    cost_params.bend_cost = router_opts.bend_cost;
-    cost_params.pres_fac = pres_fac;
-    cost_params.delay_budget = ((budgeting_inf.if_set()) ? &conn_delay_budget : nullptr);
-
-    // Pre-route to clock source for clock nets (marked as global nets)
-    if (net_list.net_is_global(net_id) && router_opts.two_stage_clock_routing) {
-        //VTR_ASSERT(router_opts.clock_modeling == DEDICATED_NETWORK);
-        RRNodeId sink_node(device_ctx.virtual_clock_network_root_idx);
-
-        enable_router_debug(router_opts, net_id, sink_node, itry, &router);
-
-        VTR_LOGV_DEBUG(f_router_debug, "Pre-routing global net %zu\n", size_t(net_id));
-
-        // Set to the max timing criticality which should intern minimize clock insertion
-        // delay by selecting a direct route from the clock source to the virtual sink
-        cost_params.criticality = router_opts.max_criticality;
-
-        /* Is the connection router allowed to grow the bounding box? That's not the case
-         * when routing in parallel, so disallow it. TODO: Have both timing_driven and parallel
-         * routers handle this in the same way */
-        bool can_grow_bb = (router_opts.router_algorithm != PARALLEL);
-
-        std::tie(flags.success, flags.retry_with_full_bb) = timing_driven_pre_route_to_clock_root(router,
-                                                                                                  net_id,
-                                                                                                  net_list,
-                                                                                                  sink_node,
-                                                                                                  cost_params,
-                                                                                                  router_opts.high_fanout_threshold,
-                                                                                                  tree,
-                                                                                                  spatial_route_tree_lookup,
-                                                                                                  router_stats,
-                                                                                                  is_flat,
-                                                                                                  can_grow_bb);
-
-        return flags;
-    }
-
-    if (budgeting_inf.if_set()) {
-        budgeting_inf.set_should_reroute(net_id, false);
-    }
-
-    // explore in order of decreasing criticality (no longer need sink_order array)
-    for (unsigned itarget = 0; itarget < remaining_targets.size(); ++itarget) {
-        int target_pin = remaining_targets[itarget];
-
-        RRNodeId sink_rr = route_ctx.net_rr_terminals[net_id][target_pin];
-
-        enable_router_debug(router_opts, net_id, sink_rr, itry, &router);
-
-        cost_params.criticality = pin_criticality[target_pin];
-
-        if (budgeting_inf.if_set()) {
-            conn_delay_budget.max_delay = budgeting_inf.get_max_delay_budget(net_id, target_pin);
-            conn_delay_budget.target_delay = budgeting_inf.get_delay_target(net_id, target_pin);
-            conn_delay_budget.min_delay = budgeting_inf.get_min_delay_budget(net_id, target_pin);
-            conn_delay_budget.short_path_criticality = budgeting_inf.get_crit_short_path(net_id, target_pin);
-            conn_delay_budget.routing_budgets_algorithm = router_opts.routing_budgets_algorithm;
-        }
-
-        profiling::conn_start();
-
-        // build a branch in the route tree to the target
-        auto sink_flags = timing_driven_route_sink(router,
-                                                   net_list,
-                                                   net_id,
-                                                   itarget,
-                                                   target_pin,
-                                                   cost_params,
-                                                   router_opts,
-                                                   tree,
-                                                   spatial_route_tree_lookup,
-                                                   router_stats,
-                                                   budgeting_inf,
-                                                   routing_predictor,
-                                                   choking_spots,
-                                                   is_flat);
-
-        flags.retry_with_full_bb |= sink_flags.retry_with_full_bb;
-
-        if (!sink_flags.success) {
-            flags.success = false;
-            return flags;
-        }
-
-        profiling::conn_finish(size_t(route_ctx.net_rr_terminals[net_id][0]),
-                               size_t(sink_rr),
-                               pin_criticality[target_pin]);
-
-        ++router_stats.connections_routed;
-    } // finished all sinks
-
-    ++router_stats.nets_routed;
-    profiling::net_finish();
-
-    /* For later timing analysis. */
-
-    // may have to update timing delay of the previously legally reached sinks since downstream capacitance could be changed
-    update_net_delays_from_route_tree(net_delay,
-                                      net_list,
-                                      net_id,
-                                      timing_info.get(),
-                                      pin_timing_invalidator);
-
-    if (router_opts.update_lower_bound_delays) {
-        for (int ipin : remaining_targets) {
-            connections_inf.update_lower_bound_connection_delay(net_id, ipin, net_delay[ipin]);
-        }
-    }
-
-    VTR_ASSERT_MSG(g_vpr_ctx.routing().rr_node_route_inf[tree.root().inode].occ() <= rr_graph.node_capacity(tree.root().inode), "SOURCE should never be congested");
-
-    VTR_LOGV_DEBUG(f_router_debug, "Routed Net %zu (%zu sinks)\n", size_t(net_id), num_sinks);
-    router.empty_rcv_route_tree_set(); // ?
-
-    flags.success = true;
-    return flags;
-}
-
-template<typename ConnectionRouter>
-static std::tuple<bool, bool> timing_driven_pre_route_to_clock_root(ConnectionRouter& router,
-                                                                    ParentNetId net_id,
-                                                                    const Netlist<>& net_list,
-                                                                    RRNodeId sink_node,
-                                                                    const t_conn_cost_params cost_params,
-                                                                    int high_fanout_threshold,
-                                                                    RouteTree& tree,
-                                                                    SpatialRouteTreeLookup& spatial_rt_lookup,
-                                                                    RouterStats& router_stats,
-                                                                    bool is_flat,
-                                                                    bool can_grow_bb) {
-    const auto& device_ctx = g_vpr_ctx.device();
-    auto& route_ctx = g_vpr_ctx.mutable_routing();
-    auto& m_route_ctx = g_vpr_ctx.mutable_routing();
-
-    bool high_fanout = is_high_fanout(net_list.net_sinks(net_id).size(), high_fanout_threshold);
-
-    VTR_LOGV_DEBUG(f_router_debug, "Net %zu pre-route to (%s)\n", size_t(net_id), describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, sink_node, is_flat).c_str());
-    profiling::sink_criticality_start();
-
-    t_bb bounding_box = route_ctx.route_bb[net_id];
-
-    router.clear_modified_rr_node_info();
-
-    bool found_path, retry_with_full_bb;
-    t_heap cheapest;
-    ConnectionParameters conn_params(net_id,
-                                     -1,
-                                     false,
-                                     std::unordered_map<RRNodeId, int>());
-
-    std::tie(found_path, retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree(
-        tree.root(),
-        sink_node,
-        cost_params,
-        bounding_box,
-        router_stats,
-        conn_params,
-        can_grow_bb);
-
-    // TODO: Parts of the rest of this function are repetitive to code in timing_driven_route_sink. Should refactor.
-    if (!found_path) {
-        ParentBlockId src_block = net_list.net_driver_block(net_id);
-        VTR_LOG("Failed to route connection from '%s' to '%s' for net '%s' (#%zu)\n",
-                net_list.block_name(src_block).c_str(),
-                describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, sink_node, is_flat).c_str(),
-                net_list.net_name(net_id).c_str(),
-                size_t(net_id));
-        if (f_router_debug) {
-            update_screen(ScreenUpdatePriority::MAJOR, "Unable to route connection.", ROUTING, nullptr);
-        }
-        return std::make_tuple(found_path, retry_with_full_bb);
-    }
-
-    profiling::sink_criticality_end(cost_params.criticality);
-
-    /* This is a special pre-route to a sink that does not correspond to any    *
-     * netlist pin, but which can be reached from the global clock root drive   *
-     * points. Therefore, we can set the net pin index of the sink node to      *
-     * OPEN (meaning illegal) as it is not meaningful for this sink.            */
-    vtr::optional<const RouteTreeNode&> new_branch, new_sink;
-    std::tie(new_branch, new_sink) = tree.update_from_heap(&cheapest, OPEN, ((high_fanout) ? &spatial_rt_lookup : nullptr), is_flat);
-
-    VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(tree.root(), spatial_rt_lookup));
-
-    if (f_router_debug) {
-        std::string msg = vtr::string_fmt("Routed Net %zu connection to RR node %d successfully", size_t(net_id), sink_node);
-        update_screen(ScreenUpdatePriority::MAJOR, msg.c_str(), ROUTING, nullptr);
-    }
-
-    if (new_branch)
-        pathfinder_update_cost_from_route_tree(new_branch.value(), 1);
-
-    // need to guarantee ALL nodes' path costs are HUGE_POSITIVE_FLOAT at the start of routing to a sink
-    // do this by resetting all the path_costs that have been touched while routing to the current sink
-    router.reset_path_costs();
-
-    // Post route cleanup:
-    // - remove sink from route tree and fix routing for all nodes leading to the sink ("freeze")
-    // - free up virtual sink occupancy
-    tree.freeze();
-    m_route_ctx.rr_node_route_inf[sink_node].set_occ(0);
-
-    // routed to a sink successfully
-    return std::make_tuple(true, false);
-}
-
-template<typename ConnectionRouter>
-static NetResultFlags timing_driven_route_sink(ConnectionRouter& router,
-                                               const Netlist<>& net_list,
-                                               ParentNetId net_id,
-                                               unsigned itarget,
-                                               int target_pin,
-                                               const t_conn_cost_params cost_params,
-                                               const t_router_opts& router_opts,
-                                               RouteTree& tree,
-                                               SpatialRouteTreeLookup& spatial_rt_lookup,
-                                               RouterStats& router_stats,
-                                               route_budgets& budgeting_inf,
-                                               const RoutingPredictor& routing_predictor,
-                                               const std::vector<std::unordered_map<RRNodeId, int>>& choking_spots,
-                                               bool is_flat) {
-    const auto& device_ctx = g_vpr_ctx.device();
-    auto& route_ctx = g_vpr_ctx.mutable_routing();
-
-    NetResultFlags flags;
-
-    profiling::sink_criticality_start();
-
-    RRNodeId sink_node = route_ctx.net_rr_terminals[net_id][target_pin];
-    VTR_LOGV_DEBUG(f_router_debug, "Net %zu Target %d (%s)\n", size_t(net_id), itarget, describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, sink_node, is_flat).c_str());
-
-    router.clear_modified_rr_node_info();
-
-    bool found_path;
-    t_heap cheapest;
-    t_bb bounding_box = route_ctx.route_bb[net_id];
-
-    /* Is the connection router allowed to grow the bounding box? That's not the case
-     * when routing in parallel, so disallow it. */
-    bool can_grow_bb = (router_opts.router_algorithm != PARALLEL);
-
-    bool net_is_global = net_list.net_is_global(net_id);
-    bool high_fanout = is_high_fanout(net_list.net_sinks(net_id).size(), router_opts.high_fanout_threshold);
-    constexpr float HIGH_FANOUT_CRITICALITY_THRESHOLD = 0.9;
-    bool sink_critical = (cost_params.criticality > HIGH_FANOUT_CRITICALITY_THRESHOLD);
-    bool net_is_clock = route_ctx.is_clock_net[net_id] != 0;
-
-    bool has_choking_spot = ((int)choking_spots[target_pin].size() != 0) && router_opts.has_choking_spot;
-    ConnectionParameters conn_params(net_id, target_pin, has_choking_spot, choking_spots[target_pin]);
-
-    //We normally route high fanout nets by only adding spatially close-by routing to the heap (reduces run-time).
-    //However, if the current sink is 'critical' from a timing perspective, we put the entire route tree back onto
-    //the heap to ensure it has more flexibility to find the best path.
-    if (high_fanout && !sink_critical && !net_is_global && !net_is_clock && -routing_predictor.get_slope() > router_opts.high_fanout_max_slope) {
-        std::tie(found_path, flags.retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree_high_fanout(tree.root(),
-                                                                                                                                     sink_node,
-                                                                                                                                     cost_params,
-                                                                                                                                     bounding_box,
-                                                                                                                                     spatial_rt_lookup,
-                                                                                                                                     router_stats,
-                                                                                                                                     conn_params,
-                                                                                                                                     can_grow_bb);
-    } else {
-        std::tie(found_path, flags.retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree(tree.root(),
-                                                                                                                         sink_node,
-                                                                                                                         cost_params,
-                                                                                                                         bounding_box,
-                                                                                                                         router_stats,
-                                                                                                                         conn_params,
-                                                                                                                         can_grow_bb);
-    }
-
-    if (!found_path) {
-        ParentBlockId src_block = net_list.net_driver_block(net_id);
-        ParentBlockId sink_block = net_list.pin_block(*(net_list.net_pins(net_id).begin() + target_pin));
-        VTR_LOG("Failed to route connection from '%s' to '%s' for net '%s' (#%zu)\n",
-                net_list.block_name(src_block).c_str(),
-                net_list.block_name(sink_block).c_str(),
-                net_list.net_name(net_id).c_str(),
-                size_t(net_id));
-        if (f_router_debug) {
-            update_screen(ScreenUpdatePriority::MAJOR, "Unable to route connection.", ROUTING, nullptr);
-        }
-        flags.success = false;
-        return flags;
-    }
-
-    profiling::sink_criticality_end(cost_params.criticality);
-
-    RRNodeId inode(cheapest.index);
-    route_ctx.rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */
-
-    vtr::optional<const RouteTreeNode&> new_branch, new_sink;
-    std::tie(new_branch, new_sink) = tree.update_from_heap(&cheapest, target_pin, ((high_fanout) ? &spatial_rt_lookup : nullptr), is_flat);
-
-    VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(tree.root(), spatial_rt_lookup));
-
-    if (f_router_debug) {
-        std::string msg = vtr::string_fmt("Routed Net %zu connection %d to RR node %d successfully", size_t(net_id), itarget, sink_node);
-        update_screen(ScreenUpdatePriority::MAJOR, msg.c_str(), ROUTING, nullptr);
-    }
-
-    if (budgeting_inf.if_set() && cheapest.path_data != nullptr && cost_params.delay_budget) {
-        if (cheapest.path_data->backward_delay < cost_params.delay_budget->min_delay) {
-            budgeting_inf.set_should_reroute(net_id, true);
-        }
-    }
-
-    /* update global occupancy from the new branch */
-    if (new_branch)
-        pathfinder_update_cost_from_route_tree(new_branch.value(), 1);
-
-    // need to guarantee ALL nodes' path costs are HUGE_POSITIVE_FLOAT at the start of routing to a sink
-    // do this by resetting all the path_costs that have been touched while routing to the current sink
-    router.reset_path_costs();
-
-    // routed to a sink successfully
-    flags.success = true;
-    return flags;
-}
-
-static void setup_routing_resources(int itry,
-                                    ParentNetId net_id,
-                                    const Netlist<>& net_list,
-                                    unsigned num_sinks,
-                                    int min_incremental_reroute_fanout,
-                                    CBRR& connections_inf,
-                                    const t_router_opts& router_opts,
-                                    bool ripup_high_fanout_nets) {
-    /* Build and return a partial route tree from the legal connections from last iteration.
-     * along the way do:
-     * 	update pathfinder costs to be accurate to the partial route tree
-     *	mark the rr_node sinks as targets to be reached. */
-    auto& route_ctx = g_vpr_ctx.mutable_routing();
-
-    /* "tree" points to this net's spot in the global context here, so re-initializing it etc. changes the global state */
-    vtr::optional<RouteTree>& tree = route_ctx.route_trees[net_id];
-
-    // for nets below a certain size (min_incremental_reroute_fanout), rip up any old routing
-    // otherwise, we incrementally reroute by reusing legal parts of the previous iteration
-    if ((int)num_sinks < min_incremental_reroute_fanout || itry == 1 || ripup_high_fanout_nets) {
-        profiling::net_rerouted();
-
-        /* rip up the whole net */
-        if (tree)
-            pathfinder_update_cost_from_route_tree(tree.value().root(), -1);
-        tree = vtr::nullopt;
-
-        /* re-initialize net */
-        tree = RouteTree(net_id);
-        pathfinder_update_cost_from_route_tree(tree.value().root(), 1);
-
-        // since all connections will be rerouted for this net, clear all of net's forced reroute flags
-        connections_inf.clear_force_reroute_for_net(net_id);
-
-        // when we don't prune the tree, we also don't know the sink node indices
-        // thus we'll use functions that act on pin indices like mark_ends instead
-        // of their versions that act on node indices directly like mark_remaining_ends
-        mark_ends(net_list, net_id);
-    } else {
-        profiling::net_rebuild_start();
-
-        if (!tree) {
-            tree = RouteTree(net_id);
-            pathfinder_update_cost_from_route_tree(tree.value().root(), 1);
-        }
-
-        /* copy the existing routing
-         * prune() depends on global occ, so we can't subtract before pruning
-         * OPT: to skip this copy, return a "diff" from RouteTree::prune */
-        RouteTree tree2 = tree.value();
-
-        // Skip this check if RCV is enabled, as RCV can use another method to cause reroutes
-        VTR_ASSERT_SAFE(should_route_net(net_id, connections_inf, true) || router_opts.routing_budgets_algorithm == YOYO);
-
-        // Prune the copy (using congestion data before subtraction)
-        vtr::optional<RouteTree&> pruned_tree2 = tree2.prune(connections_inf);
-
-        // Subtract congestion using the non-pruned original
-        pathfinder_update_cost_from_route_tree(tree.value().root(), -1);
-
-        if (pruned_tree2) { //Partially pruned
-            profiling::route_tree_preserved();
-
-            // Add back congestion for the pruned route tree
-            pathfinder_update_cost_from_route_tree(pruned_tree2.value().root(), 1);
-            // pruned_tree2 is no longer required -> we can move rather than copy
-            tree = std::move(pruned_tree2.value());
-        } else { // Fully destroyed
-            profiling::route_tree_pruned();
-
-            // Initialize only to source
-            tree = RouteTree(net_id);
-            pathfinder_update_cost_from_route_tree(tree.value().root(), 1);
-        }
-
-        profiling::net_rebuild_end(num_sinks, tree->get_remaining_isinks().size());
-
-        // still need to calculate the tree's time delay
-        tree.value().reload_timing();
-
-        // check for R_upstream C_downstream and edge correctness
-        VTR_ASSERT_SAFE(tree.value().is_valid());
-
-        // congestion should've been pruned away
-        VTR_ASSERT_SAFE(tree.value().is_uncongested());
-
-        // mark remaining ends
-        mark_remaining_ends(net_id);
-
-        // mark the lookup (rr_node_route_inf) for existing tree elements as NO_PREVIOUS so add_to_path stops when it reaches one of them
-        update_rr_route_inf_from_tree(tree.value().root());
-    }
-
-    // completed constructing the partial route tree and updated all other data structures to match
-}
-
-/** Change the base costs of rr_nodes according to # of fanouts */
-void update_rr_base_costs(int fanout) {
-    auto& device_ctx = g_vpr_ctx.mutable_device();
-
-    float factor;
-    size_t index;
-
-    /* Other reasonable values for factor include fanout and 1 */
-    factor = sqrt(fanout);
-
-    for (index = CHANX_COST_INDEX_START; index < device_ctx.rr_indexed_data.size(); index++) {
-        if (device_ctx.rr_indexed_data[RRIndexedDataId(index)].T_quadratic > 0.) { /* pass transistor */
-            device_ctx.rr_indexed_data[RRIndexedDataId(index)].base_cost = device_ctx.rr_indexed_data[RRIndexedDataId(index)].saved_base_cost * factor;
-        } else {
-            device_ctx.rr_indexed_data[RRIndexedDataId(index)].base_cost = device_ctx.rr_indexed_data[RRIndexedDataId(index)].saved_base_cost;
-        }
-    }
-}
-
-void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node) {
-    auto& route_ctx = g_vpr_ctx.mutable_routing();
-
-    for (auto& child : rt_node.child_nodes()) {
-        RRNodeId inode = child.inode;
-        route_ctx.rr_node_route_inf[inode].prev_node = RRNodeId::INVALID();
-        route_ctx.rr_node_route_inf[inode].prev_edge = RREdgeId::INVALID();
-
-        // path cost should be unset
-        VTR_ASSERT(std::isinf(route_ctx.rr_node_route_inf[inode].path_cost));
-        VTR_ASSERT(std::isinf(route_ctx.rr_node_route_inf[inode].backward_path_cost));
-
-        update_rr_route_inf_from_tree(child);
-    }
-}
-
-bool timing_driven_check_net_delays(const Netlist<>& net_list, NetPinsMatrix<float>& net_delay) {
-    constexpr float ERROR_TOL = 0.0001;
-
-    /* Checks that the net delays computed incrementally during timing driven    *
-     * routing match those computed from scratch by the net_delay.c module.      */
-
-    unsigned int ipin;
-    auto net_delay_check = make_net_pins_matrix<float>(net_list);
-
-    load_net_delay_from_routing(net_list, net_delay_check);
-
-    for (auto net_id : net_list.nets()) {
-        for (ipin = 1; ipin < net_list.net_pins(net_id).size(); ipin++) {
-            if (net_delay_check[net_id][ipin] == 0.) { /* Should be only GLOBAL nets */
-                if (fabs(net_delay[net_id][ipin]) > ERROR_TOL) {
-                    VPR_ERROR(VPR_ERROR_ROUTE,
-                              "in timing_driven_check_net_delays: net %lu pin %d.\n"
-                              "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
-                              size_t(net_id), ipin, net_delay[net_id][ipin], net_delay_check[net_id][ipin]);
-                }
-            } else {
-                float error = fabs(1.0 - net_delay[net_id][ipin] / net_delay_check[net_id][ipin]);
-                if (error > ERROR_TOL) {
-                    VPR_ERROR(VPR_ERROR_ROUTE,
-                              "in timing_driven_check_net_delays: net %d pin %lu.\n"
-                              "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
-                              size_t(net_id), ipin, net_delay[net_id][ipin], net_delay_check[net_id][ipin]);
-                }
-            }
-        }
-    }
-
-    return true;
-}
-
-/* Goes through all the sinks of this net and copies their delay values from
- * the route_tree to the net_delay array. */
-static void update_net_delays_from_route_tree(float* net_delay,
-                                              const Netlist<>& net_list,
-                                              ParentNetId inet,
-                                              TimingInfo* timing_info,
-                                              NetPinTimingInvalidator* pin_timing_invalidator) {
-    auto& route_ctx = g_vpr_ctx.routing();
-    const RouteTree& tree = route_ctx.route_trees[inet].value();
-
-    for (unsigned int isink = 1; isink < net_list.net_pins(inet).size(); isink++) {
-        update_net_delay_from_isink(net_delay, tree, isink, net_list, inet, timing_info, pin_timing_invalidator);
-    }
-}
-
-/* Detect if net should be routed or not */
-bool should_route_net(ParentNetId net_id,
-                      CBRR& connections_inf,
-                      bool if_force_reroute) {
-    auto& route_ctx = g_vpr_ctx.routing();
-    auto& device_ctx = g_vpr_ctx.device();
-    const auto& rr_graph = device_ctx.rr_graph;
-
-    if (!route_ctx.route_trees[net_id]) {
-        /* No routing yet. */
-        return true;
-    }
-
-    const RouteTree& tree = route_ctx.route_trees[net_id].value();
-
-    /* Walk over all rt_nodes in the net */
-    for (auto& rt_node : tree.all_nodes()) {
-        RRNodeId inode = rt_node.inode;
-        int occ = route_ctx.rr_node_route_inf[inode].occ();
-        int capacity = rr_graph.node_capacity(inode);
-
-        if (occ > capacity) {
-            return true; /* overuse detected */
-        }
-
-        if (rt_node.is_leaf()) { //End of a branch
-            // even if net is fully routed, not complete if parts of it should get ripped up (EXPERIMENTAL)
-            if (if_force_reroute) {
-                if (connections_inf.should_force_reroute_connection(net_id, inode)) {
-                    return true;
-                }
-            }
-        }
-    }
-
-    /* If all sinks have been routed to without overuse, no need to route this */
-    if (tree.get_remaining_isinks().empty())
-        return false;
-
-    return true;
-}
-
-bool early_exit_heuristic(const t_router_opts& router_opts, const WirelengthInfo& wirelength_info) {
-    if (wirelength_info.used_wirelength_ratio() > router_opts.init_wirelength_abort_threshold) {
-        VTR_LOG("Wire length usage ratio %g exceeds limit of %g, fail routing.\n",
-                wirelength_info.used_wirelength_ratio(),
-                router_opts.init_wirelength_abort_threshold);
-        return true;
-    }
-    return false;
-}
-
-static bool check_hold(const t_router_opts& router_opts, float worst_neg_slack) {
-    /* When RCV is enabled, it's necessary to be able to completely ripup high fanout nets if there is still negative hold slack
-     * Normally the router will prune the illegal branches of high fanout nets, this will bypass this */
-
-    if (router_opts.routing_budgets_algorithm != YOYO) {
-        return false;
-    } else if (worst_neg_slack != 0) {
-        return true;
-    }
-    return false;
-}
-
-static float get_net_pin_criticality(const std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                                     const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                     float max_criticality,
-                                     float criticality_exp,
-                                     ParentNetId net_id,
-                                     ParentPinId pin_id,
-                                     bool is_flat) {
-    float pin_criticality = 0.0;
-    const auto& route_ctx = g_vpr_ctx.routing();
-
-    if (route_ctx.is_clock_net[net_id]) {
-        pin_criticality = max_criticality;
-    } else {
-        pin_criticality = calculate_clb_net_pin_criticality(*timing_info,
-                                                            netlist_pin_lookup,
-                                                            pin_id,
-                                                            is_flat);
-    }
-
-    /* Pin criticality is between 0 and 1.
-     * Shift it downwards by 1 - max_criticality (max_criticality is 0.99 by default,
-     * so shift down by 0.01) and cut off at 0.  This means that all pins with small
-     * criticalities (<0.01) get criticality 0 and are ignored entirely, and everything
-     * else becomes a bit less critical. This effect becomes more pronounced if
-     * max_criticality is set lower. */
-    // VTR_ASSERT(pin_criticality[ipin] > -0.01 && pin_criticality[ipin] < 1.01);
-    pin_criticality = std::max(pin_criticality - (1.0 - max_criticality), 0.0);
-
-    /* Take pin criticality to some power (1 by default). */
-    pin_criticality = std::pow(pin_criticality, criticality_exp);
-
-    /* Cut off pin criticality at max_criticality. */
-    pin_criticality = std::min(pin_criticality, max_criticality);
-
-    return pin_criticality;
-}
-
-size_t calculate_wirelength_available() {
-    auto& device_ctx = g_vpr_ctx.device();
-    const auto& rr_graph = device_ctx.rr_graph;
-
-    size_t available_wirelength = 0;
-    // But really what's happening is that this for loop iterates over every node and determines the available wirelength
-    for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) {
-        const t_rr_type channel_type = rr_graph.node_type(rr_id);
-        if (channel_type == CHANX || channel_type == CHANY) {
-            available_wirelength += rr_graph.node_capacity(rr_id) * rr_graph.node_length(rr_id);
-        }
-    }
-    return available_wirelength;
-}
-
-WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t available_wirelength) {
-    size_t used_wirelength = 0;
-    VTR_ASSERT(available_wirelength > 0);
-
-    auto& route_ctx = g_vpr_ctx.routing();
-
-    for (auto net_id : net_list.nets()) {
-        if (!net_list.net_is_ignored(net_id)
-            && net_list.net_sinks(net_id).size() != 0 /* Globals don't count. */
-            && route_ctx.route_trees[net_id]) {
-            int bends, wirelength, segments;
-            bool is_absorbed;
-            get_num_bends_and_length(net_id, &bends, &wirelength, &segments, &is_absorbed);
-
-            used_wirelength += wirelength;
-        }
-    }
-
-    return WirelengthInfo(available_wirelength, used_wirelength);
-}
-
-void print_route_status_header() {
-    VTR_LOG("---- ------ ------- ---- ------- ------- ------- ----------------- --------------- -------- ---------- ---------- ---------- ---------- --------\n");
-    VTR_LOG("Iter   Time    pres  BBs    Heap  Re-Rtd  Re-Rtd Overused RR Nodes      Wirelength      CPD       sTNS       sWNS       hTNS       hWNS Est Succ\n");
-    VTR_LOG("      (sec)     fac Updt    push    Nets   Conns                                       (ns)       (ns)       (ns)       (ns)       (ns)     Iter\n");
-    VTR_LOG("---- ------ ------- ---- ------- ------- ------- ----------------- --------------- -------- ---------- ---------- ---------- ---------- --------\n");
-}
-
-void print_route_status(int itry, double elapsed_sec, float pres_fac, int num_bb_updated, const RouterStats& router_stats, const OveruseInfo& overuse_info, const WirelengthInfo& wirelength_info, std::shared_ptr<const SetupHoldTimingInfo> timing_info, float est_success_iteration) {
-    //Iteration
-    VTR_LOG("%4d", itry);
-
-    //Elapsed Time
-    VTR_LOG(" %6.1f", elapsed_sec);
-
-    //pres_fac
-    constexpr int PRES_FAC_DIGITS = 7;
-    constexpr int PRES_FAC_SCI_PRECISION = 1;
-    pretty_print_float(" ", pres_fac, PRES_FAC_DIGITS, PRES_FAC_SCI_PRECISION);
-    //VTR_LOG(" %5.1f", pres_fac);
-
-    //Number of bounding boxes updated
-    VTR_LOG(" %4d", num_bb_updated);
-
-    //Heap push/pop
-    constexpr int HEAP_OP_DIGITS = 7;
-    constexpr int HEAP_OP_SCI_PRECISION = 2;
-    pretty_print_uint(" ", router_stats.heap_pushes, HEAP_OP_DIGITS, HEAP_OP_SCI_PRECISION);
-    VTR_ASSERT(router_stats.heap_pops <= router_stats.heap_pushes);
-
-    //Rerouted nets
-    constexpr int NET_ROUTED_DIGITS = 7;
-    constexpr int NET_ROUTED_SCI_PRECISION = 2;
-    pretty_print_uint(" ", router_stats.nets_routed, NET_ROUTED_DIGITS, NET_ROUTED_SCI_PRECISION);
-
-    //Rerouted connections
-    constexpr int CONN_ROUTED_DIGITS = 7;
-    constexpr int CONN_ROUTED_SCI_PRECISION = 2;
-    pretty_print_uint(" ", router_stats.connections_routed, CONN_ROUTED_DIGITS, CONN_ROUTED_SCI_PRECISION);
-
-    //Overused RR nodes
-    constexpr int OVERUSE_DIGITS = 7;
-    constexpr int OVERUSE_SCI_PRECISION = 2;
-    pretty_print_uint(" ", overuse_info.overused_nodes, OVERUSE_DIGITS, OVERUSE_SCI_PRECISION);
-    VTR_LOG(" (%6.3f%%)", overuse_info.overused_node_ratio() * 100);
-
-    //Wirelength
-    constexpr int WL_DIGITS = 7;
-    constexpr int WL_SCI_PRECISION = 2;
-    pretty_print_uint(" ", wirelength_info.used_wirelength(), WL_DIGITS, WL_SCI_PRECISION);
-    VTR_LOG(" (%4.1f%%)", wirelength_info.used_wirelength_ratio() * 100);
-
-    //CPD
-    if (timing_info) {
-        float cpd = timing_info->least_slack_critical_path().delay();
-        VTR_LOG(" %#8.3f", 1e9 * cpd);
-    } else {
-        VTR_LOG(" %8s", "N/A");
-    }
-
-    //sTNS
-    if (timing_info) {
-        float sTNS = timing_info->setup_total_negative_slack();
-        VTR_LOG(" % #10.4g", 1e9 * sTNS);
-    } else {
-        VTR_LOG(" %10s", "N/A");
-    }
-
-    //sWNS
-    if (timing_info) {
-        float sWNS = timing_info->setup_worst_negative_slack();
-        VTR_LOG(" % #10.3f", 1e9 * sWNS);
-    } else {
-        VTR_LOG(" %10s", "N/A");
-    }
-
-    //hTNS
-    if (timing_info) {
-        float hTNS = timing_info->hold_total_negative_slack();
-        VTR_LOG(" % #10.4g", 1e9 * hTNS);
-    } else {
-        VTR_LOG(" %10s", "N/A");
-    }
-
-    //hWNS
-    if (timing_info) {
-        float hWNS = timing_info->hold_worst_negative_slack();
-        VTR_LOG(" % #10.3f", 1e9 * hWNS);
-    } else {
-        VTR_LOG(" %10s", "N/A");
-    }
-
-    //Estimated success iteration
-    if (std::isnan(est_success_iteration)) {
-        VTR_LOG(" %8s", "N/A");
-    } else {
-        VTR_LOG(" %8.0f", est_success_iteration);
-    }
-
-    VTR_LOG("\n");
-
-    fflush(stdout);
-}
-
-void print_router_criticality_histogram(const Netlist<>& net_list,
-                                        const SetupTimingInfo& timing_info,
-                                        const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                        bool is_flat) {
-    print_histogram(create_criticality_histogram(net_list, timing_info, netlist_pin_lookup, is_flat, 10));
-}
-
-void print_overused_nodes_status(const t_router_opts& router_opts, const OveruseInfo& overuse_info) {
-    //Print the index of this routing failure
-    VTR_LOG("\nFailed routing attempt #%d\n", num_routing_failed);
-
-    size_t num_overused = overuse_info.overused_nodes;
-    size_t max_logged_overused_rr_nodes = router_opts.max_logged_overused_rr_nodes;
-
-    //Overused nodes info logging upper limit
-    VTR_LOG("Total number of overused nodes: %d\n", num_overused);
-    if (num_overused > max_logged_overused_rr_nodes) {
-        VTR_LOG("Total number of overused nodes is larger than the logging limit (%d).\n", max_logged_overused_rr_nodes);
-        VTR_LOG("Displaying the first %d entries.\n", max_logged_overused_rr_nodes);
-    }
-
-    log_overused_nodes_status(max_logged_overused_rr_nodes);
-    VTR_LOG("\n");
-}
-
-//Returns true if the specified net fanout is classified as high fanout
-static bool is_high_fanout(int fanout, int fanout_threshold) {
-    if (fanout_threshold < 0 || fanout < fanout_threshold) return false;
-    return true;
-}
-
-// In heavily congested designs a static bounding box (BB) can
-// become problematic for routability (it effectively enforces a
-// hard blockage restricting where a net can route).
-//
-// For instance, the router will try to route non-critical connections
-// away from congested regions, but may end up hitting the edge of the
-// bounding box. Limiting how far out-of-the-way it can be routed, and
-// preventing congestion from resolving.
-//
-// To alleviate this, we dynamically expand net bounding boxes if the net's
-// *current* routing uses RR nodes 'close' to the edge of it's bounding box.
-//
-// The result is that connections trying to move out of the way and hitting
-// their BB will have their bounding boxes will expand slowly in that direction.
-// This helps spread out regions of heavy congestion (over several routing
-// iterations).
-//
-// By growing the BBs slowly and only as needed we minimize the size of the BBs.
-// This helps keep the router's graph search fast.
-//
-// Typically, only a small minority of nets (typically > 10%) have their BBs updated
-// each routing iteration.
-size_t dynamic_update_bounding_boxes(const std::vector<ParentNetId>& updated_nets,
-                                     const Netlist<>& net_list,
-                                     int high_fanout_threshold) {
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& route_ctx = g_vpr_ctx.mutable_routing();
-
-    auto& grid = device_ctx.grid;
-
-    //Controls how close a net's routing needs to be to it's bounding box
-    //before the bounding box is expanded.
-    //
-    //A value of zero indicates that the routing needs to be at the bounding box
-    //edge
-    constexpr int DYNAMIC_BB_DELTA_THRESHOLD = 0;
-
-    //Walk through each net, calculating the bounding box of its current routing,
-    //and then increase the router's bounding box if the two are close together
-
-    int grid_xmax = grid.width() - 1;
-    int grid_ymax = grid.height() - 1;
-
-    size_t num_bb_updated = 0;
-
-    for (ParentNetId net : updated_nets) {
-        if (!route_ctx.route_trees[net])
-            continue; // Skip if no routing
-        if (!route_ctx.net_status.is_routed(net))
-            continue;
-
-        //We do not adjust the bounding boxes of high fanout nets, since they
-        //use different bounding boxes based on the target location.
-        //
-        //This ensures that the delta values calculated below are always non-negative
-        if (is_high_fanout(net_list.net_sinks(net).size(), high_fanout_threshold)) continue;
-
-        t_bb curr_bb = calc_current_bb(route_ctx.route_trees[net].value());
-        t_bb& router_bb = route_ctx.route_bb[net];
-
-        //Calculate the distances between the net's used RR nodes and
-        //the router's bounding box
-        int delta_xmin = curr_bb.xmin - router_bb.xmin;
-        int delta_xmax = router_bb.xmax - curr_bb.xmax;
-        int delta_ymin = curr_bb.ymin - router_bb.ymin;
-        int delta_ymax = router_bb.ymax - curr_bb.ymax;
-
-        //Note that if the net uses non-configurable switches it's routing
-        //may end-up outside the bounding boxes, so the delta values may be
-        //negative. The code below will expand the bounding box in those
-        //cases.
-
-        //Expand each dimension by one if within DYNAMIC_BB_DELTA_THRESHOLD threshold
-        bool updated_bb = false;
-        if (delta_xmin <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.xmin > 0) {
-            --router_bb.xmin;
-            updated_bb = true;
-        }
-
-        if (delta_ymin <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.ymin > 0) {
-            --router_bb.ymin;
-            updated_bb = true;
-        }
-
-        if (delta_xmax <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.xmax < grid_xmax) {
-            ++router_bb.xmax;
-            updated_bb = true;
-        }
-
-        if (delta_ymax <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.ymax < grid_ymax) {
-            ++router_bb.ymax;
-            updated_bb = true;
-        }
-
-        if (updated_bb) {
-            ++num_bb_updated;
-            //VTR_LOG("Expanded net %6zu router BB to (%d,%d)x(%d,%d) based on net RR node BB (%d,%d)x(%d,%d)\n", size_t(net),
-            //router_bb.xmin, router_bb.ymin, router_bb.xmax, router_bb.ymax,
-            //curr_bb.xmin, curr_bb.ymin, curr_bb.xmax, curr_bb.ymax);
-        }
-    }
-    return num_bb_updated;
-}
-
-//Returns the bounding box of a net's used routing resources
-t_bb calc_current_bb(const RouteTree& tree) {
-    auto& device_ctx = g_vpr_ctx.device();
-    const auto& rr_graph = device_ctx.rr_graph;
-    auto& grid = device_ctx.grid;
-
-    t_bb bb;
-    bb.xmin = grid.width() - 1;
-    bb.ymin = grid.height() - 1;
-    bb.layer_min = grid.get_num_layers() - 1;
-    bb.xmax = 0;
-    bb.ymax = 0;
-    bb.layer_max = 0;
-
-    for (auto& rt_node : tree.all_nodes()) {
-        //The router interprets RR nodes which cross the boundary as being
-        //'within' of the BB. Only those which are *strictly* out side the
-        //box are excluded, hence we use the nodes xhigh/yhigh for xmin/xmax,
-        //and xlow/ylow for xmax/ymax calculations
-        bb.xmin = std::min<int>(bb.xmin, rr_graph.node_xhigh(rt_node.inode));
-        bb.ymin = std::min<int>(bb.ymin, rr_graph.node_yhigh(rt_node.inode));
-        bb.layer_min = std::min<int>(bb.layer_min, rr_graph.node_layer(rt_node.inode));
-        bb.xmax = std::max<int>(bb.xmax, rr_graph.node_xlow(rt_node.inode));
-        bb.ymax = std::max<int>(bb.ymax, rr_graph.node_ylow(rt_node.inode));
-        bb.layer_max = std::max<int>(bb.layer_max, rr_graph.node_layer(rt_node.inode));
-    }
-
-    VTR_ASSERT(bb.xmin <= bb.xmax);
-    VTR_ASSERT(bb.ymin <= bb.ymax);
-
-    return bb;
-}
-
-void enable_router_debug(
-    const t_router_opts& router_opts,
-    ParentNetId net,
-    RRNodeId sink_rr,
-    int router_iteration,
-    ConnectionRouterInterface* router) {
-    bool active_net_debug = (router_opts.router_debug_net >= -1);
-    bool active_sink_debug = (router_opts.router_debug_sink_rr >= 0);
-    bool active_iteration_debug = (router_opts.router_debug_iteration >= 0);
-
-    bool match_net = (ParentNetId(router_opts.router_debug_net) == net || router_opts.router_debug_net == -1);
-    bool match_sink = (router_opts.router_debug_sink_rr == int(size_t((sink_rr))) || router_opts.router_debug_sink_rr < 0);
-    bool match_iteration = (router_opts.router_debug_iteration == router_iteration || router_opts.router_debug_iteration < 0);
-
-    f_router_debug = active_net_debug || active_sink_debug || active_iteration_debug;
-
-    if (active_net_debug) f_router_debug &= match_net;
-    if (active_sink_debug) f_router_debug &= match_sink;
-    if (active_iteration_debug) f_router_debug &= match_iteration;
-
-    router->set_router_debug(f_router_debug);
-
-#ifndef VTR_ENABLE_DEBUG_LOGGING
-    VTR_LOGV_WARN(f_router_debug, "Limited router debug output provided since compiled without VTR_ENABLE_DEBUG_LOGGING defined\n");
-#endif
-}
-
-bool is_iteration_complete(bool routing_is_feasible, const t_router_opts& router_opts, int itry, std::shared_ptr<const SetupHoldTimingInfo> timing_info, bool rcv_finished) {
-    //This function checks if a routing iteration has completed.
-    //When VPR is run normally, we check if routing_budgets_algorithm is disabled, and if the routing is legal
-    //With the introduction of yoyo budgeting algorithm, we must check if there are no hold violations
-    //in addition to routing being legal and the correct budgeting algorithm being set.
-
-    if (routing_is_feasible) {
-        if (router_opts.routing_budgets_algorithm != YOYO) {
-            return true;
-        } else if (router_opts.routing_budgets_algorithm == YOYO && (timing_info->hold_worst_negative_slack() == 0 || rcv_finished) && itry != 1) {
-            return true;
-        }
-    }
-    return false;
-}
-
-bool should_setup_lower_bound_connection_delays(int itry, const t_router_opts& /*router_opts*/) {
-    /* Checks to see if router should (re)calculate route budgets
-     * It's currently set to only calculate after the first routing iteration */
-
-    if (itry == 1) return true;
-    return false;
-}
-
-bool is_better_quality_routing(const vtr::vector<ParentNetId, vtr::optional<RouteTree>>& best_routing,
-                               const RoutingMetrics& best_routing_metrics,
-                               const WirelengthInfo& wirelength_info,
-                               std::shared_ptr<const SetupHoldTimingInfo> timing_info) {
-    if (best_routing.empty()) {
-        return true; // First legal routing
-    }
-
-    // Rank first based on sWNS, followed by other timing metrics
-    if (timing_info) {
-        if (timing_info->setup_worst_negative_slack() > best_routing_metrics.sWNS) {
-            return true;
-        } else if (timing_info->setup_worst_negative_slack() < best_routing_metrics.sWNS) {
-            return false;
-        }
-
-        if (timing_info->setup_total_negative_slack() > best_routing_metrics.sTNS) {
-            return true;
-        } else if (timing_info->setup_total_negative_slack() < best_routing_metrics.sTNS) {
-            return false;
-        }
-
-        if (timing_info->hold_worst_negative_slack() > best_routing_metrics.hWNS) {
-            return true;
-        } else if (timing_info->hold_worst_negative_slack() > best_routing_metrics.hWNS) {
-            return false;
-        }
-
-        if (timing_info->hold_total_negative_slack() > best_routing_metrics.hTNS) {
-            return true;
-        } else if (timing_info->hold_total_negative_slack() > best_routing_metrics.hTNS) {
-            return false;
-        }
-    }
-
-    // Finally, wirelength tie breaker
-    return wirelength_info.used_wirelength() < best_routing_metrics.used_wirelength;
-}
-
-bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts,
-                                        int itry_since_last_convergence,
-                                        std::shared_ptr<const SetupHoldTimingInfo> timing_info,
-                                        const RoutingMetrics& best_routing_metrics) {
-    // Give-up on reconvergent routing if the CPD improvement after the
-    // first iteration since convergence is small, compared to the best
-    // CPD seen so far
-    if (itry_since_last_convergence == 1) {
-        float cpd_ratio = timing_info->setup_worst_negative_slack() / best_routing_metrics.sWNS;
-
-        // Give up if we see less than a 1% CPD improvement,
-        // after reducing pres_fac. Typically larger initial
-        // improvements are needed to see an actual improvement
-        // in final legal routing quality.
-        if (cpd_ratio >= router_opts.reconvergence_cpd_threshold) {
-            VTR_LOG("Giving up routing since additional routing convergences seem unlikely to improve quality (CPD ratio: %g)\n", cpd_ratio);
-            return true; // Potential CPD improvement is small, don't spend run-time trying to improve it
-        }
-    }
-
-    return false; // Don't give up
-}
-
-void generate_route_timing_reports(const t_router_opts& router_opts,
-                                   const t_analysis_opts& analysis_opts,
-                                   const SetupTimingInfo& timing_info,
-                                   const RoutingDelayCalculator& delay_calc,
-                                   bool is_flat) {
-    auto& timing_ctx = g_vpr_ctx.timing();
-    auto& atom_ctx = g_vpr_ctx.atom();
-
-    VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, delay_calc, is_flat);
-    resolver.set_detail_level(analysis_opts.timing_report_detail);
-
-    tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph, *timing_ctx.constraints);
-
-    timing_reporter.report_timing_setup(router_opts.first_iteration_timing_report_file, *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths);
-}
-
-// If a route is ripped up during routing, non-configurable sets are left
-// behind. As a result, the final routing may have stubs at
-// non-configurable sets. This function tracks non-configurable set usage,
-// and if the sets are unused, prunes them.
-void prune_unused_non_configurable_nets(CBRR& connections_inf,
-                                        const Netlist<>& net_list) {
-    auto& device_ctx = g_vpr_ctx.device();
-    auto& route_ctx = g_vpr_ctx.mutable_routing();
-
-    std::vector<int> non_config_node_set_usage(device_ctx.rr_non_config_node_sets.size(), 0);
-    for (auto net_id : net_list.nets()) {
-        if (!route_ctx.route_trees[net_id])
-            continue;
-        RouteTree& tree = route_ctx.route_trees[net_id].value();
-
-        connections_inf.clear_force_reroute_for_net(net_id);
-
-        std::vector<int> usage = tree.get_non_config_node_set_usage();
-
-        // Prune the branches of the tree that don't legally lead to sinks
-        tree.prune(connections_inf, &usage);
-    }
-}
-
-// Initializes net_delay based on best-case delay estimates from the router lookahead
-void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead,
-                                   const Netlist<>& net_list,
-                                   const vtr::vector<ParentNetId, std::vector<RRNodeId>>& net_rr_terminals,
-                                   NetPinsMatrix<float>& net_delay,
-                                   const RRGraphView& rr_graph,
-                                   bool is_flat) {
-    t_conn_cost_params cost_params;
-    cost_params.criticality = 1.; // Ensures lookahead returns delay value
-
-    for (auto net_id : net_list.nets()) {
-        if (net_list.net_is_ignored(net_id)) continue;
-
-        RRNodeId source_rr = net_rr_terminals[net_id][0];
-
-        for (size_t ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) {
-            RRNodeId sink_rr = net_rr_terminals[net_id][ipin];
-
-            float est_delay = get_cost_from_lookahead(router_lookahead,
-                                                      rr_graph,
-                                                      source_rr,
-                                                      sink_rr,
-                                                      0.,
-                                                      cost_params,
-                                                      is_flat);
-            VTR_ASSERT(std::isfinite(est_delay) && est_delay < std::numeric_limits<float>::max());
-
-            net_delay[net_id][ipin] = est_delay;
-        }
-    }
-}
-
-void update_router_stats(RouterStats& router_stats, RouterStats& router_iteration_stats) {
-    router_stats.connections_routed += router_iteration_stats.connections_routed;
-    router_stats.nets_routed += router_iteration_stats.nets_routed;
-    router_stats.heap_pushes += router_iteration_stats.heap_pushes;
-    router_stats.inter_cluster_node_pushes += router_iteration_stats.inter_cluster_node_pushes;
-    router_stats.intra_cluster_node_pushes += router_iteration_stats.intra_cluster_node_pushes;
-    router_stats.heap_pops += router_iteration_stats.heap_pops;
-    router_stats.inter_cluster_node_pops += router_iteration_stats.inter_cluster_node_pops;
-    router_stats.intra_cluster_node_pops += router_iteration_stats.intra_cluster_node_pops;
-    for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) {
-        router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx] += router_iteration_stats.inter_cluster_node_type_cnt_pushes[node_type_idx];
-        router_stats.inter_cluster_node_type_cnt_pops[node_type_idx] += router_iteration_stats.inter_cluster_node_type_cnt_pops[node_type_idx];
-        router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx] += router_iteration_stats.intra_cluster_node_type_cnt_pushes[node_type_idx];
-        router_stats.intra_cluster_node_type_cnt_pops[node_type_idx] += router_iteration_stats.intra_cluster_node_type_cnt_pops[node_type_idx];
-        router_stats.rt_node_pushes[node_type_idx] += router_iteration_stats.rt_node_pushes[node_type_idx];
-        router_stats.rt_node_high_fanout_pushes[node_type_idx] += router_iteration_stats.rt_node_high_fanout_pushes[node_type_idx];
-        router_stats.rt_node_entire_tree_pushes[node_type_idx] += router_iteration_stats.rt_node_entire_tree_pushes[node_type_idx];
-    }
-    router_stats.add_all_rt += router_iteration_stats.add_all_rt;
-    router_stats.add_all_rt_from_high_fanout += router_iteration_stats.add_all_rt_from_high_fanout;
-    router_stats.add_high_fanout_rt += router_iteration_stats.add_high_fanout_rt;
-}
-
-void init_router_stats(RouterStats& router_stats) {
-    router_stats.connections_routed = 0;
-    router_stats.nets_routed = 0;
-    router_stats.heap_pushes = 0;
-    router_stats.heap_pops = 0;
-    router_stats.inter_cluster_node_pushes = 0;
-    router_stats.inter_cluster_node_pops = 0;
-    router_stats.intra_cluster_node_pushes = 0;
-    router_stats.intra_cluster_node_pops = 0;
-    for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) {
-        router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx] = 0;
-        router_stats.inter_cluster_node_type_cnt_pops[node_type_idx] = 0;
-        router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx] = 0;
-        router_stats.intra_cluster_node_type_cnt_pops[node_type_idx] = 0;
-        router_stats.rt_node_pushes[node_type_idx] = 0;
-        router_stats.rt_node_entire_tree_pushes[node_type_idx] = 0;
-        router_stats.rt_node_high_fanout_pushes[node_type_idx] = 0;
-    }
-    router_stats.add_all_rt = 0;
-    router_stats.add_high_fanout_rt = 0;
-    router_stats.add_all_rt_from_high_fanout = 0;
-}
-
-vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>> set_nets_choking_spots(const Netlist<>& net_list,
-                                                                                                const vtr::vector<ParentNetId,
-                                                                                                                  std::vector<std::vector<int>>>& net_terminal_groups,
-                                                                                                const vtr::vector<ParentNetId,
-                                                                                                                  std::vector<int>>& net_terminal_group_num,
-                                                                                                bool has_choking_spot,
-                                                                                                bool is_flat) {
-    vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>> choking_spots(net_list.nets().size());
-    for (const auto& net_id : net_list.nets()) {
-        choking_spots[net_id].resize(net_list.net_pins(net_id).size());
-    }
-
-    // Return if the architecture doesn't have any potential choke points
-    if (!has_choking_spot) {
-        return choking_spots;
-    }
-
-    // We only identify choke points if flat_routing is enabled.
-    VTR_ASSERT(is_flat);
-
-    const auto& device_ctx = g_vpr_ctx.device();
-    const auto& rr_graph = device_ctx.rr_graph;
-    const auto& route_ctx = g_vpr_ctx.routing();
-    const auto& net_rr_terminal = route_ctx.net_rr_terminals;
-
-    for (const auto& net_id : net_list.nets()) {
-        int pin_count = 0;
-        // Global nets are not routed, thus we don't consider them.
-        if (net_list.net_is_global(net_id)) {
-            continue;
-        }
-        for (auto pin_id : net_list.net_pins(net_id)) {
-            // pin_count == 0 corresponds to the net's source pin
-            if (pin_count == 0) {
-                pin_count++;
-                continue;
-            }
-            auto block_id = net_list.pin_block(pin_id);
-            auto blk_loc = get_block_loc(block_id, is_flat);
-            int group_num = net_terminal_group_num[net_id][pin_count];
-            // This is a group of sinks, including the current pin_id, which share a specific number of parent blocks.
-            // To determine the choke points of the current sink, pin_id, we only consider the sinks in this group for the
-            // run-time purpose
-            std::vector<int> sink_grp = net_terminal_groups[net_id][group_num];
-            VTR_ASSERT((int)sink_grp.size() >= 1);
-            if (sink_grp.size() == 1) {
-                pin_count++;
-                continue;
-            } else {
-                // get the ptc_number of the sinks in the group
-                std::for_each(sink_grp.begin(), sink_grp.end(), [&rr_graph](int& sink_rr_num) {
-                    sink_rr_num = rr_graph.node_ptc_num(RRNodeId(sink_rr_num));
-                });
-                auto physical_type = device_ctx.grid.get_physical_type({blk_loc.loc.x, blk_loc.loc.y, blk_loc.loc.layer});
-                // Get the choke points of the sink corresponds to pin_count given the sink group
-                auto sink_choking_spots = get_sink_choking_points(physical_type,
-                                                                  rr_graph.node_ptc_num(RRNodeId(net_rr_terminal[net_id][pin_count])),
-                                                                  sink_grp);
-                // Store choke points rr_node_id and the number reachable sinks
-                for (const auto& choking_spot : sink_choking_spots) {
-                    int pin_physical_num = choking_spot.first;
-                    int num_reachable_sinks = choking_spot.second;
-                    auto pin_rr_node_id = get_pin_rr_node_id(rr_graph.node_lookup(),
-                                                             physical_type,
-                                                             blk_loc.loc.layer,
-                                                             blk_loc.loc.x,
-                                                             blk_loc.loc.y,
-                                                             pin_physical_num);
-                    if (pin_rr_node_id != RRNodeId::INVALID()) {
-                        choking_spots[net_id][pin_count].insert(std::make_pair(pin_rr_node_id, num_reachable_sinks));
-                    }
-                }
-            }
-            pin_count++;
-        }
-    }
-
-    return choking_spots;
-}
-
-#ifndef NO_GRAPHICS
-// updates router iteration information and checks for router iteration and net id breakpoints
-// stops after the specified router iteration or net id is encountered
-void update_router_info_and_check_bp(bp_router_type type, int net_id) {
-    t_draw_state* draw_state = get_draw_state_vars();
-    if (draw_state->list_of_breakpoints.size() != 0) {
-        if (type == BP_ROUTE_ITER)
-            get_bp_state_globals()->get_glob_breakpoint_state()->router_iter++;
-        else if (type == BP_NET_ID)
-            get_bp_state_globals()->get_glob_breakpoint_state()->route_net_id = net_id;
-        f_router_debug = check_for_breakpoints(false);
-        if (f_router_debug) {
-            breakpoint_info_window(get_bp_state_globals()->get_glob_breakpoint_state()->bp_description, *get_bp_state_globals()->get_glob_breakpoint_state(), false);
-            update_screen(ScreenUpdatePriority::MAJOR, "Breakpoint Encountered", ROUTING, nullptr);
-        }
-    }
-}
-#endif
diff --git a/vpr/src/route/route_timing.h b/vpr/src/route/route_timing.h
deleted file mode 100644
index 38495bb806b..00000000000
--- a/vpr/src/route/route_timing.h
+++ /dev/null
@@ -1,291 +0,0 @@
-#pragma once
-
-#include <unordered_map>
-#include <vector>
-
-#include "connection_based_routing.h"
-#include "connection_router_interface.h"
-#include "heap_type.h"
-#include "netlist.h"
-#include "route_budgets.h"
-#include "router_stats.h"
-#include "router_lookahead.h"
-#include "routing_predictor.h"
-#include "rr_graph_type.h"
-#include "spatial_route_tree_lookup.h"
-#include "timing_info_fwd.h"
-#include "vpr_types.h"
-#include "vpr_utils.h"
-
-#include "NetPinTimingInvalidator.h"
-
-extern bool f_router_debug;
-
-/** TODO: remove timing_driven_route_structs together with this fn */
-int get_max_pins_per_net(const Netlist<>& net_list);
-
-/** Types and defines common to timing_driven and parallel routers */
-
-#define CONGESTED_SLOPE_VAL -0.04
-
-/** Per-iteration congestion mode for the router: focus more on routability after a certain threshold */
-enum class RouterCongestionMode {
-    NORMAL,
-    CONFLICTED
-};
-
-/** Identifies the two breakpoint types in routing */
-typedef enum router_breakpoint_type {
-    BP_ROUTE_ITER,
-    BP_NET_ID
-} bp_router_type;
-
-/** Results from attempting to route a net.
- * success: Could we route it?
- * was_rerouted: Is the routing different from the last one? (set by try_* functions)
- * retry_with_full_bb: Should we retry this net with a full-device bounding box? (used in the parallel router)
- *
- * I'm fine with returning 3 bytes from a fn: consider an enum class if this becomes too big */
-struct NetResultFlags {
-    bool success = false;
-    bool was_rerouted = false;
-    bool retry_with_full_bb = false;
-};
-
-struct RoutingMetrics {
-    size_t used_wirelength = 0;
-
-    float sWNS = std::numeric_limits<float>::quiet_NaN();
-    float sTNS = std::numeric_limits<float>::quiet_NaN();
-    float hWNS = std::numeric_limits<float>::quiet_NaN();
-    float hTNS = std::numeric_limits<float>::quiet_NaN();
-    tatum::TimingPathInfo critical_path;
-};
-
-/* Data while timing driven route is active */
-class timing_driven_route_structs {
-  public:
-    std::vector<float> pin_criticality; /* [1..max_pins_per_net-1] */
-
-    timing_driven_route_structs(const Netlist<>& net_list) {
-        int max_sinks = std::max(get_max_pins_per_net(net_list) - 1, 0);
-        pin_criticality.resize(max_sinks + 1);
-
-        /* Set element 0 to invalid values */
-        pin_criticality[0] = std::numeric_limits<float>::quiet_NaN();
-    }
-};
-
-/** Returns the bounding box of a net's used routing resources */
-t_bb calc_current_bb(const RouteTree& tree);
-
-/** Get available wirelength for the current RR graph */
-size_t calculate_wirelength_available();
-
-/** Calculate wirelength for the current routing and populate a WirelengthInfo */
-WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t available_wirelength);
-
-size_t dynamic_update_bounding_boxes(const std::vector<ParentNetId>& updated_nets,
-                                     const Netlist<>& net_list,
-                                     int high_fanout_threshold);
-
-/** Early exit code for cases where it is obvious that a successful route will not be found
- * Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */
-bool early_exit_heuristic(const t_router_opts& router_opts, const WirelengthInfo& wirelength_info);
-
-/** Give-up on reconvergent routing if the CPD improvement after the
- * first iteration since convergence is small, compared to the best
- * CPD seen so far */
-bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts,
-                                        int itry_since_last_convergence,
-                                        std::shared_ptr<const SetupHoldTimingInfo> timing_info,
-                                        const RoutingMetrics& best_routing_metrics);
-
-void enable_router_debug(const t_router_opts& router_opts, ParentNetId net, RRNodeId sink_rr, int router_iteration, ConnectionRouterInterface* router);
-
-void generate_route_timing_reports(const t_router_opts& router_opts,
-                                   const t_analysis_opts& analysis_opts,
-                                   const SetupTimingInfo& timing_info,
-                                   const RoutingDelayCalculator& delay_calc,
-                                   bool is_flat);
-
-/** Initialize net_delay based on best-case delay estimates from the router lookahead. */
-void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead,
-                                   const Netlist<>& net_list,
-                                   const vtr::vector<ParentNetId, std::vector<RRNodeId>>& net_rr_terminals,
-                                   NetPinsMatrix<float>& net_delay,
-                                   const RRGraphView& rr_graph,
-                                   bool is_flat);
-
-void init_router_stats(RouterStats& router_stats);
-
-bool is_better_quality_routing(const vtr::vector<ParentNetId, vtr::optional<RouteTree>>& best_routing,
-                               const RoutingMetrics& best_routing_metrics,
-                               const WirelengthInfo& wirelength_info,
-                               std::shared_ptr<const SetupHoldTimingInfo> timing_info);
-
-bool is_iteration_complete(bool routing_is_feasible, const t_router_opts& router_opts, int itry, std::shared_ptr<const SetupHoldTimingInfo> timing_info, bool rcv_finished);
-
-/** Print the index of this routing failure */
-void print_overused_nodes_status(const t_router_opts& router_opts, const OveruseInfo& overuse_info);
-
-void print_route_status_header();
-
-void print_route_status(int itry,
-                        double elapsed_sec,
-                        float pres_fac,
-                        int num_bb_updated,
-                        const RouterStats& router_stats,
-                        const OveruseInfo& overuse_info,
-                        const WirelengthInfo& wirelength_info,
-                        std::shared_ptr<const SetupHoldTimingInfo> timing_info,
-                        float est_success_iteration);
-
-void print_router_criticality_histogram(const Netlist<>& net_list,
-                                        const SetupTimingInfo& timing_info,
-                                        const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                        bool is_flat);
-
-/** If a route is ripped up during routing, non-configurable sets are left
- * behind. As a result, the final routing may have stubs at
- * non-configurable sets. This function tracks non-configurable set usage,
- * and if the sets are unused, prunes them. */
-void prune_unused_non_configurable_nets(CBRR& connections_inf,
-                                        const Netlist<>& net_list);
-
-/**
- * If flat_routing and has_choking_spot are true, there are some choke points inside the cluster which would increase the convergence time of routing.
- * To address this issue, the congestion cost of those choke points needs to decrease. This function identify those choke points for each net,
- * and since the amount of congestion reduction is dependant on the number sinks reachable from that choke point, it also store the number of reachable sinks
- * for each choke point.
- * @param net_list
- * @param net_terminal_groups [Net_id][group_id] -> rr_node_id of the pins in the group
- * @param net_terminal_group_num [Net_id][pin_id] -> group_id
- * @param has_choking_spot is true if the given architecture has choking spots inside the cluster
- * @param is_flat is true if flat_routing is enabled
- * @return [Net_id][pin_id] -> [choke_point_rr_node_id, number of sinks reachable by this choke point]
- */
-
-vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>> set_nets_choking_spots(const Netlist<>& net_list,
-                                                                                                const vtr::vector<ParentNetId,
-                                                                                                                  std::vector<std::vector<int>>>& net_terminal_groups,
-                                                                                                const vtr::vector<ParentNetId,
-                                                                                                                  std::vector<int>>& net_terminal_group_num,
-                                                                                                bool has_choking_spot,
-                                                                                                bool is_flat);
-
-/** Detect if net should be routed or not */
-bool should_route_net(ParentNetId net_id,
-                      CBRR& connections_inf,
-                      bool if_force_reroute);
-
-bool should_setup_lower_bound_connection_delays(int itry, const t_router_opts& router_opts);
-
-bool timing_driven_check_net_delays(const Netlist<>& net_list,
-                                    NetPinsMatrix<float>& net_delay);
-
-bool try_timing_driven_route(const Netlist<>& net_list,
-                             const t_det_routing_arch& det_routing_arch,
-                             const t_router_opts& router_opts,
-                             const t_analysis_opts& analysis_opts,
-                             const std::vector<t_segment_inf>& segment_inf,
-                             NetPinsMatrix<float>& net_delay,
-                             const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                             std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                             std::shared_ptr<RoutingDelayCalculator> delay_calc,
-                             ScreenUpdatePriority first_iteration_priority,
-                             bool is_flat);
-
-/** Attempt to route a single net.
- *
- * @param router The ConnectionRouter instance 
- * @param net_list Input netlist
- * @param net_id
- * @param itry # of iteration
- * @param pres_fac
- * @param router_opts
- * @param connections_inf
- * @param router_stats
- * @param pin_criticality
- * @param rt_node_of_sink Lookup from target_pin-like indices (indicating SINK nodes) to RouteTreeNodes
- * @param net_delay
- * @param netlist_pin_lookup
- * @param timing_info
- * @param pin_timing_invalidator
- * @param budgeting_inf
- * @param worst_neg_slack
- * @param routing_predictor
- * @param choking_spots
- * @param is_flat
- * @return NetResultFlags for this net. success = false means the RR graph is disconnected and the caller can give up */
-template<typename ConnectionRouter>
-NetResultFlags timing_driven_route_net(ConnectionRouter& router,
-                                       const Netlist<>& net_list,
-                                       ParentNetId net_id,
-                                       int itry,
-                                       float pres_fac,
-                                       const t_router_opts& router_opts,
-                                       CBRR& connections_inf,
-                                       RouterStats& router_stats,
-                                       std::vector<float>& pin_criticality,
-                                       float* net_delay,
-                                       const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                       std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                                       NetPinTimingInvalidator* pin_timing_invalidator,
-                                       route_budgets& budgeting_inf,
-                                       float worst_neg_slack,
-                                       const RoutingPredictor& routing_predictor,
-                                       const std::vector<std::unordered_map<RRNodeId, int>>& choking_spots,
-                                       bool is_flat);
-
-template<typename ConnectionRouter>
-NetResultFlags try_timing_driven_route_net(ConnectionRouter& router,
-                                           const Netlist<>& net_list,
-                                           const ParentNetId& net_id,
-                                           int itry,
-                                           float pres_fac,
-                                           const t_router_opts& router_opts,
-                                           CBRR& connections_inf,
-                                           RouterStats& router_stats,
-                                           std::vector<float>& pin_criticality,
-                                           NetPinsMatrix<float>& net_delay,
-                                           const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
-                                           std::shared_ptr<SetupHoldTimingInfo> timing_info,
-                                           NetPinTimingInvalidator* pin_timing_invalidator,
-                                           route_budgets& budgeting_inf,
-                                           float worst_negative_slack,
-                                           const RoutingPredictor& routing_predictor,
-                                           const std::vector<std::unordered_map<RRNodeId, int>>& choking_spots,
-                                           bool is_flat);
-
-/** Update net_delay value for a single sink in a RouteTree. */
-inline void update_net_delay_from_isink(float* net_delay,
-                                        const RouteTree& tree,
-                                        int isink,
-                                        const Netlist<>& net_list,
-                                        ParentNetId inet,
-                                        TimingInfo* timing_info,
-                                        NetPinTimingInvalidator* pin_timing_invalidator) {
-    float new_delay = tree.find_by_isink(isink)->Tdel;
-
-    if (pin_timing_invalidator && new_delay != net_delay[isink]) {
-        //Delay changed, invalidate for incremental timing update
-        VTR_ASSERT_SAFE(timing_info);
-        ParentPinId pin = net_list.net_pin(inet, isink);
-        pin_timing_invalidator->invalidate_connection(pin, timing_info);
-    }
-
-    net_delay[isink] = new_delay;
-}
-
-void update_router_stats(RouterStats& router_stats, RouterStats& router_iteration_stats);
-
-#ifndef NO_GRAPHICS
-void update_router_info_and_check_bp(bp_router_type type, int net_id);
-#endif
-
-void update_rr_base_costs(int fanout);
-
-/** Traverses down a route tree and updates rr_node_inf for all nodes
- * to reflect that these nodes have already been routed to */
-void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node);
diff --git a/vpr/src/route/route_tree.cpp b/vpr/src/route/route_tree.cpp
index 36f37461527..4c4e37c3052 100644
--- a/vpr/src/route/route_tree.cpp
+++ b/vpr/src/route/route_tree.cpp
@@ -1,7 +1,9 @@
 #include "route_tree.h"
+
+#include "connection_based_routing.h"
 #include "globals.h"
 #include "netlist_fwd.h"
-#include "route_timing.h"
+#include "route_debug.h"
 #include "rr_graph_fwd.h"
 #include "vtr_math.h"
 
diff --git a/vpr/src/route/route_util.cpp b/vpr/src/route/route_utilization.cpp
similarity index 99%
rename from vpr/src/route/route_util.cpp
rename to vpr/src/route/route_utilization.cpp
index 8ca7f657124..ec6da92cf1d 100644
--- a/vpr/src/route/route_util.cpp
+++ b/vpr/src/route/route_utilization.cpp
@@ -1,4 +1,4 @@
-#include "route_util.h"
+#include "route_utilization.h"
 #include "globals.h"
 #include "draw_types.h"
 #include "draw_global.h"
diff --git a/vpr/src/route/route_util.h b/vpr/src/route/route_utilization.h
similarity index 100%
rename from vpr/src/route/route_util.h
rename to vpr/src/route/route_utilization.h
diff --git a/vpr/src/route/route_utils.cpp b/vpr/src/route/route_utils.cpp
new file mode 100644
index 00000000000..f90789e5250
--- /dev/null
+++ b/vpr/src/route/route_utils.cpp
@@ -0,0 +1,536 @@
+/** @file Utility fns for top-level router. */
+
+#include "route_utils.h"
+
+#include "connection_based_routing.h"
+#include "draw.h"
+#include "draw_debug.h"
+#include "draw_global.h"
+#include "draw_types.h"
+#include "net_delay.h"
+#include "overuse_report.h"
+#include "place_and_route.h"
+#include "route_debug.h"
+
+#include "VprTimingGraphResolver.h"
+#include "tatum/TimingReporter.hpp"
+
+bool check_net_delays(const Netlist<>& net_list, NetPinsMatrix<float>& net_delay) {
+    constexpr float ERROR_TOL = 0.0001;
+
+    unsigned int ipin;
+    auto net_delay_check = make_net_pins_matrix<float>(net_list);
+
+    load_net_delay_from_routing(net_list, net_delay_check);
+
+    for (auto net_id : net_list.nets()) {
+        for (ipin = 1; ipin < net_list.net_pins(net_id).size(); ipin++) {
+            if (net_delay_check[net_id][ipin] == 0.) { /* Should be only GLOBAL nets */
+                if (fabs(net_delay[net_id][ipin]) > ERROR_TOL) {
+                    VPR_ERROR(VPR_ERROR_ROUTE,
+                              "in timing_driven_check_net_delays: net %lu pin %d.\n"
+                              "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
+                              size_t(net_id), ipin, net_delay[net_id][ipin], net_delay_check[net_id][ipin]);
+                }
+            } else {
+                float error = fabs(1.0 - net_delay[net_id][ipin] / net_delay_check[net_id][ipin]);
+                if (error > ERROR_TOL) {
+                    VPR_ERROR(VPR_ERROR_ROUTE,
+                              "in timing_driven_check_net_delays: net %d pin %lu.\n"
+                              "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
+                              size_t(net_id), ipin, net_delay[net_id][ipin], net_delay_check[net_id][ipin]);
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+// In heavily congested designs a static bounding box (BB) can
+// become problematic for routability (it effectively enforces a
+// hard blockage restricting where a net can route).
+//
+// For instance, the router will try to route non-critical connections
+// away from congested regions, but may end up hitting the edge of the
+// bounding box. Limiting how far out-of-the-way it can be routed, and
+// preventing congestion from resolving.
+//
+// To alleviate this, we dynamically expand net bounding boxes if the net's
+// *current* routing uses RR nodes 'close' to the edge of it's bounding box.
+//
+// The result is that connections trying to move out of the way and hitting
+// their BB will have their bounding boxes will expand slowly in that direction.
+// This helps spread out regions of heavy congestion (over several routing
+// iterations).
+//
+// By growing the BBs slowly and only as needed we minimize the size of the BBs.
+// This helps keep the router's graph search fast.
+//
+// Typically, only a small minority of nets (typically > 10%) have their BBs updated
+// each routing iteration.
+size_t dynamic_update_bounding_boxes(const std::vector<ParentNetId>& updated_nets) {
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& route_ctx = g_vpr_ctx.mutable_routing();
+
+    auto& grid = device_ctx.grid;
+
+    //Controls how close a net's routing needs to be to it's bounding box
+    //before the bounding box is expanded.
+    //
+    //A value of zero indicates that the routing needs to be at the bounding box
+    //edge
+    constexpr int DYNAMIC_BB_DELTA_THRESHOLD = 0;
+
+    //Walk through each net, calculating the bounding box of its current routing,
+    //and then increase the router's bounding box if the two are close together
+
+    int grid_xmax = grid.width() - 1;
+    int grid_ymax = grid.height() - 1;
+
+    size_t num_bb_updated = 0;
+
+    for (ParentNetId net : updated_nets) {
+        if (!route_ctx.route_trees[net])
+            continue; // Skip if no routing
+        if (!route_ctx.net_status.is_routed(net))
+            continue;
+
+        t_bb curr_bb = calc_current_bb(route_ctx.route_trees[net].value());
+        t_bb& router_bb = route_ctx.route_bb[net];
+
+        //Calculate the distances between the net's used RR nodes and
+        //the router's bounding box
+        int delta_xmin = curr_bb.xmin - router_bb.xmin;
+        int delta_xmax = router_bb.xmax - curr_bb.xmax;
+        int delta_ymin = curr_bb.ymin - router_bb.ymin;
+        int delta_ymax = router_bb.ymax - curr_bb.ymax;
+
+        //Note that if the net uses non-configurable switches it's routing
+        //may end-up outside the bounding boxes, so the delta values may be
+        //negative. The code below will expand the bounding box in those
+        //cases.
+
+        //Expand each dimension by one if within DYNAMIC_BB_DELTA_THRESHOLD threshold
+        bool updated_bb = false;
+        if (delta_xmin <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.xmin > 0) {
+            --router_bb.xmin;
+            updated_bb = true;
+        }
+
+        if (delta_ymin <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.ymin > 0) {
+            --router_bb.ymin;
+            updated_bb = true;
+        }
+
+        if (delta_xmax <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.xmax < grid_xmax) {
+            ++router_bb.xmax;
+            updated_bb = true;
+        }
+
+        if (delta_ymax <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.ymax < grid_ymax) {
+            ++router_bb.ymax;
+            updated_bb = true;
+        }
+
+        if (updated_bb) {
+            ++num_bb_updated;
+            //VTR_LOG("Expanded net %6zu router BB to (%d,%d)x(%d,%d) based on net RR node BB (%d,%d)x(%d,%d)\n", size_t(net),
+            //router_bb.xmin, router_bb.ymin, router_bb.xmax, router_bb.ymax,
+            //curr_bb.xmin, curr_bb.ymin, curr_bb.xmax, curr_bb.ymax);
+        }
+    }
+    return num_bb_updated;
+}
+
+bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts,
+                                        int itry_since_last_convergence,
+                                        std::shared_ptr<const SetupHoldTimingInfo> timing_info,
+                                        const RoutingMetrics& best_routing_metrics) {
+    if (itry_since_last_convergence == 1) {
+        float cpd_ratio = timing_info->setup_worst_negative_slack() / best_routing_metrics.sWNS;
+
+        // Give up if we see less than a 1% CPD improvement,
+        // after reducing pres_fac. Typically larger initial
+        // improvements are needed to see an actual improvement
+        // in final legal routing quality.
+        if (cpd_ratio >= router_opts.reconvergence_cpd_threshold) {
+            VTR_LOG("Giving up routing since additional routing convergences seem unlikely to improve quality (CPD ratio: %g)\n", cpd_ratio);
+            return true; // Potential CPD improvement is small, don't spend run-time trying to improve it
+        }
+    }
+
+    return false; // Don't give up
+}
+
+bool is_better_quality_routing(const vtr::vector<ParentNetId, vtr::optional<RouteTree>>& best_routing,
+                               const RoutingMetrics& best_routing_metrics,
+                               const WirelengthInfo& wirelength_info,
+                               std::shared_ptr<const SetupHoldTimingInfo> timing_info) {
+    if (best_routing.empty()) {
+        return true; // First legal routing
+    }
+
+    // Rank first based on sWNS, followed by other timing metrics
+    if (timing_info) {
+        if (timing_info->setup_worst_negative_slack() > best_routing_metrics.sWNS) {
+            return true;
+        } else if (timing_info->setup_worst_negative_slack() < best_routing_metrics.sWNS) {
+            return false;
+        }
+
+        if (timing_info->setup_total_negative_slack() > best_routing_metrics.sTNS) {
+            return true;
+        } else if (timing_info->setup_total_negative_slack() < best_routing_metrics.sTNS) {
+            return false;
+        }
+
+        if (timing_info->hold_worst_negative_slack() > best_routing_metrics.hWNS) {
+            return true;
+        } else if (timing_info->hold_worst_negative_slack() > best_routing_metrics.hWNS) {
+            return false;
+        }
+
+        if (timing_info->hold_total_negative_slack() > best_routing_metrics.hTNS) {
+            return true;
+        } else if (timing_info->hold_total_negative_slack() > best_routing_metrics.hTNS) {
+            return false;
+        }
+    }
+
+    // Finally, wirelength tie breaker
+    return wirelength_info.used_wirelength() < best_routing_metrics.used_wirelength;
+}
+
+bool is_iteration_complete(bool routing_is_feasible, const t_router_opts& router_opts, int itry, std::shared_ptr<const SetupHoldTimingInfo> timing_info, bool rcv_finished) {
+    if (routing_is_feasible) {
+        if (router_opts.routing_budgets_algorithm != YOYO) {
+            return true;
+        } else if (router_opts.routing_budgets_algorithm == YOYO && (timing_info->hold_worst_negative_slack() == 0 || rcv_finished) && itry != 1) {
+            return true;
+        }
+    }
+    return false;
+}
+
+void generate_route_timing_reports(const t_router_opts& router_opts,
+                                   const t_analysis_opts& analysis_opts,
+                                   const SetupTimingInfo& timing_info,
+                                   const RoutingDelayCalculator& delay_calc,
+                                   bool is_flat) {
+    auto& timing_ctx = g_vpr_ctx.timing();
+    auto& atom_ctx = g_vpr_ctx.atom();
+
+    VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, delay_calc, is_flat);
+    resolver.set_detail_level(analysis_opts.timing_report_detail);
+
+    tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph, *timing_ctx.constraints);
+
+    timing_reporter.report_timing_setup(router_opts.first_iteration_timing_report_file, *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths);
+}
+
+int get_max_pins_per_net(const Netlist<>& net_list) {
+    int max_pins_per_net = 0;
+    for (auto net_id : net_list.nets()) {
+        if (!net_list.net_is_ignored(net_id))
+            max_pins_per_net = std::max(max_pins_per_net, (int)net_list.net_pins(net_id).size());
+    }
+
+    return (max_pins_per_net);
+}
+
+void print_overused_nodes_status(const t_router_opts& router_opts, const OveruseInfo& overuse_info) {
+    VTR_LOG("\nFailed routing attempt\n");
+
+    size_t num_overused = overuse_info.overused_nodes;
+    size_t max_logged_overused_rr_nodes = router_opts.max_logged_overused_rr_nodes;
+
+    //Overused nodes info logging upper limit
+    VTR_LOG("Total number of overused nodes: %d\n", num_overused);
+    if (num_overused > max_logged_overused_rr_nodes) {
+        VTR_LOG("Total number of overused nodes is larger than the logging limit (%d).\n", max_logged_overused_rr_nodes);
+        VTR_LOG("Displaying the first %d entries.\n", max_logged_overused_rr_nodes);
+    }
+
+    log_overused_nodes_status(max_logged_overused_rr_nodes);
+    VTR_LOG("\n");
+}
+
+void print_route_status(int itry, double elapsed_sec, float pres_fac, int num_bb_updated, const RouterStats& router_stats, const OveruseInfo& overuse_info, const WirelengthInfo& wirelength_info, std::shared_ptr<const SetupHoldTimingInfo> timing_info, float est_success_iteration) {
+    //Iteration
+    VTR_LOG("%4d", itry);
+
+    //Elapsed Time
+    VTR_LOG(" %6.1f", elapsed_sec);
+
+    //pres_fac
+    constexpr int PRES_FAC_DIGITS = 7;
+    constexpr int PRES_FAC_SCI_PRECISION = 1;
+    pretty_print_float(" ", pres_fac, PRES_FAC_DIGITS, PRES_FAC_SCI_PRECISION);
+    //VTR_LOG(" %5.1f", pres_fac);
+
+    //Number of bounding boxes updated
+    VTR_LOG(" %4d", num_bb_updated);
+
+    //Heap push/pop
+    constexpr int HEAP_OP_DIGITS = 7;
+    constexpr int HEAP_OP_SCI_PRECISION = 2;
+    pretty_print_uint(" ", router_stats.heap_pushes, HEAP_OP_DIGITS, HEAP_OP_SCI_PRECISION);
+    VTR_ASSERT(router_stats.heap_pops <= router_stats.heap_pushes);
+
+    //Rerouted nets
+    constexpr int NET_ROUTED_DIGITS = 7;
+    constexpr int NET_ROUTED_SCI_PRECISION = 2;
+    pretty_print_uint(" ", router_stats.nets_routed, NET_ROUTED_DIGITS, NET_ROUTED_SCI_PRECISION);
+
+    //Rerouted connections
+    constexpr int CONN_ROUTED_DIGITS = 7;
+    constexpr int CONN_ROUTED_SCI_PRECISION = 2;
+    pretty_print_uint(" ", router_stats.connections_routed, CONN_ROUTED_DIGITS, CONN_ROUTED_SCI_PRECISION);
+
+    //Overused RR nodes
+    constexpr int OVERUSE_DIGITS = 7;
+    constexpr int OVERUSE_SCI_PRECISION = 2;
+    pretty_print_uint(" ", overuse_info.overused_nodes, OVERUSE_DIGITS, OVERUSE_SCI_PRECISION);
+    VTR_LOG(" (%6.3f%%)", overuse_info.overused_node_ratio() * 100);
+
+    //Wirelength
+    constexpr int WL_DIGITS = 7;
+    constexpr int WL_SCI_PRECISION = 2;
+    pretty_print_uint(" ", wirelength_info.used_wirelength(), WL_DIGITS, WL_SCI_PRECISION);
+    VTR_LOG(" (%4.1f%%)", wirelength_info.used_wirelength_ratio() * 100);
+
+    //CPD
+    if (timing_info) {
+        float cpd = timing_info->least_slack_critical_path().delay();
+        VTR_LOG(" %#8.3f", 1e9 * cpd);
+    } else {
+        VTR_LOG(" %8s", "N/A");
+    }
+
+    //sTNS
+    if (timing_info) {
+        float sTNS = timing_info->setup_total_negative_slack();
+        VTR_LOG(" % #10.4g", 1e9 * sTNS);
+    } else {
+        VTR_LOG(" %10s", "N/A");
+    }
+
+    //sWNS
+    if (timing_info) {
+        float sWNS = timing_info->setup_worst_negative_slack();
+        VTR_LOG(" % #10.3f", 1e9 * sWNS);
+    } else {
+        VTR_LOG(" %10s", "N/A");
+    }
+
+    //hTNS
+    if (timing_info) {
+        float hTNS = timing_info->hold_total_negative_slack();
+        VTR_LOG(" % #10.4g", 1e9 * hTNS);
+    } else {
+        VTR_LOG(" %10s", "N/A");
+    }
+
+    //hWNS
+    if (timing_info) {
+        float hWNS = timing_info->hold_worst_negative_slack();
+        VTR_LOG(" % #10.3f", 1e9 * hWNS);
+    } else {
+        VTR_LOG(" %10s", "N/A");
+    }
+
+    //Estimated success iteration
+    if (std::isnan(est_success_iteration)) {
+        VTR_LOG(" %8s", "N/A");
+    } else {
+        VTR_LOG(" %8.0f", est_success_iteration);
+    }
+
+    VTR_LOG("\n");
+
+    fflush(stdout);
+}
+
+void print_route_status_header() {
+    VTR_LOG("---- ------ ------- ---- ------- ------- ------- ----------------- --------------- -------- ---------- ---------- ---------- ---------- --------\n");
+    VTR_LOG("Iter   Time    pres  BBs    Heap  Re-Rtd  Re-Rtd Overused RR Nodes      Wirelength      CPD       sTNS       sWNS       hTNS       hWNS Est Succ\n");
+    VTR_LOG("      (sec)     fac Updt    push    Nets   Conns                                       (ns)       (ns)       (ns)       (ns)       (ns)     Iter\n");
+    VTR_LOG("---- ------ ------- ---- ------- ------- ------- ----------------- --------------- -------- ---------- ---------- ---------- ---------- --------\n");
+}
+
+void print_router_criticality_histogram(const Netlist<>& net_list,
+                                        const SetupTimingInfo& timing_info,
+                                        const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+                                        bool is_flat) {
+    print_histogram(create_criticality_histogram(net_list, timing_info, netlist_pin_lookup, is_flat, 10));
+}
+
+void prune_unused_non_configurable_nets(CBRR& connections_inf,
+                                        const Netlist<>& net_list) {
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& route_ctx = g_vpr_ctx.mutable_routing();
+
+    std::vector<int> non_config_node_set_usage(device_ctx.rr_non_config_node_sets.size(), 0);
+    for (auto net_id : net_list.nets()) {
+        if (!route_ctx.route_trees[net_id])
+            continue;
+        RouteTree& tree = route_ctx.route_trees[net_id].value();
+
+        connections_inf.clear_force_reroute_for_net(net_id);
+
+        std::vector<int> usage = tree.get_non_config_node_set_usage();
+
+        // Prune the branches of the tree that don't legally lead to sinks
+        tree.prune(connections_inf, &usage);
+    }
+}
+
+vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>> set_nets_choking_spots(const Netlist<>& net_list,
+                                                                                                const vtr::vector<ParentNetId,
+                                                                                                                  std::vector<std::vector<int>>>& net_terminal_groups,
+                                                                                                const vtr::vector<ParentNetId,
+                                                                                                                  std::vector<int>>& net_terminal_group_num,
+                                                                                                bool has_choking_spot,
+                                                                                                bool is_flat) {
+    vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>> choking_spots(net_list.nets().size());
+    for (const auto& net_id : net_list.nets()) {
+        choking_spots[net_id].resize(net_list.net_pins(net_id).size());
+    }
+
+    // Return if the architecture doesn't have any potential choke points
+    if (!has_choking_spot) {
+        return choking_spots;
+    }
+
+    // We only identify choke points if flat_routing is enabled.
+    VTR_ASSERT(is_flat);
+
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& rr_graph = device_ctx.rr_graph;
+    const auto& route_ctx = g_vpr_ctx.routing();
+    const auto& net_rr_terminal = route_ctx.net_rr_terminals;
+
+    for (const auto& net_id : net_list.nets()) {
+        int pin_count = 0;
+        // Global nets are not routed, thus we don't consider them.
+        if (net_list.net_is_global(net_id)) {
+            continue;
+        }
+        for (auto pin_id : net_list.net_pins(net_id)) {
+            // pin_count == 0 corresponds to the net's source pin
+            if (pin_count == 0) {
+                pin_count++;
+                continue;
+            }
+            auto block_id = net_list.pin_block(pin_id);
+            auto blk_loc = get_block_loc(block_id, is_flat);
+            int group_num = net_terminal_group_num[net_id][pin_count];
+            // This is a group of sinks, including the current pin_id, which share a specific number of parent blocks.
+            // To determine the choke points of the current sink, pin_id, we only consider the sinks in this group for the
+            // run-time purpose
+            std::vector<int> sink_grp = net_terminal_groups[net_id][group_num];
+            VTR_ASSERT((int)sink_grp.size() >= 1);
+            if (sink_grp.size() == 1) {
+                pin_count++;
+                continue;
+            } else {
+                // get the ptc_number of the sinks in the group
+                std::for_each(sink_grp.begin(), sink_grp.end(), [&rr_graph](int& sink_rr_num) {
+                    sink_rr_num = rr_graph.node_ptc_num(RRNodeId(sink_rr_num));
+                });
+                auto physical_type = device_ctx.grid.get_physical_type({blk_loc.loc.x, blk_loc.loc.y, blk_loc.loc.layer});
+                // Get the choke points of the sink corresponds to pin_count given the sink group
+                auto sink_choking_spots = get_sink_choking_points(physical_type,
+                                                                  rr_graph.node_ptc_num(RRNodeId(net_rr_terminal[net_id][pin_count])),
+                                                                  sink_grp);
+                // Store choke points rr_node_id and the number reachable sinks
+                for (const auto& choking_spot : sink_choking_spots) {
+                    int pin_physical_num = choking_spot.first;
+                    int num_reachable_sinks = choking_spot.second;
+                    auto pin_rr_node_id = get_pin_rr_node_id(rr_graph.node_lookup(),
+                                                             physical_type,
+                                                             blk_loc.loc.layer,
+                                                             blk_loc.loc.x,
+                                                             blk_loc.loc.y,
+                                                             pin_physical_num);
+                    if (pin_rr_node_id != RRNodeId::INVALID()) {
+                        choking_spots[net_id][pin_count].insert(std::make_pair(pin_rr_node_id, num_reachable_sinks));
+                    }
+                }
+            }
+            pin_count++;
+        }
+    }
+
+    return choking_spots;
+}
+
+/** Wrapper for create_rr_graph() with extra checks */
+void try_graph(int width_fac,
+               const t_router_opts& router_opts,
+               t_det_routing_arch* det_routing_arch,
+               std::vector<t_segment_inf>& segment_inf,
+               t_chan_width_dist chan_width_dist,
+               t_direct_inf* directs,
+               int num_directs,
+               bool is_flat) {
+    auto& device_ctx = g_vpr_ctx.mutable_device();
+
+    t_graph_type graph_type;
+    t_graph_type graph_directionality;
+    if (router_opts.route_type == GLOBAL) {
+        graph_type = GRAPH_GLOBAL;
+        graph_directionality = GRAPH_BIDIR;
+    } else {
+        graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
+        graph_directionality = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
+    }
+
+    /* Set the channel widths */
+    t_chan_width chan_width = init_chan(width_fac, chan_width_dist, graph_directionality);
+
+    /* Free any old routing graph, if one exists. */
+    free_rr_graph();
+
+    /* Set up the routing resource graph defined by this FPGA architecture. */
+    int warning_count;
+    create_rr_graph(graph_type,
+                    device_ctx.physical_tile_types,
+                    device_ctx.grid,
+                    chan_width,
+                    det_routing_arch,
+                    segment_inf,
+                    router_opts,
+                    directs, num_directs,
+                    &warning_count,
+                    is_flat);
+}
+
+float update_draw_pres_fac(float new_pres_fac) {
+#ifndef NO_GRAPHICS
+
+    // Only updates the drawing pres_fac if graphics is enabled
+    get_draw_state_vars()->pres_fac = new_pres_fac;
+
+#endif // NO_GRAPHICS
+
+    return new_pres_fac;
+}
+
+#ifndef NO_GRAPHICS
+void update_router_info_and_check_bp(bp_router_type type, int net_id) {
+    t_draw_state* draw_state = get_draw_state_vars();
+    if (draw_state->list_of_breakpoints.size() != 0) {
+        if (type == BP_ROUTE_ITER)
+            get_bp_state_globals()->get_glob_breakpoint_state()->router_iter++;
+        else if (type == BP_NET_ID)
+            get_bp_state_globals()->get_glob_breakpoint_state()->route_net_id = net_id;
+        f_router_debug = check_for_breakpoints(false);
+        if (f_router_debug) {
+            breakpoint_info_window(get_bp_state_globals()->get_glob_breakpoint_state()->bp_description, *get_bp_state_globals()->get_glob_breakpoint_state(), false);
+            update_screen(ScreenUpdatePriority::MAJOR, "Breakpoint Encountered", ROUTING, nullptr);
+        }
+    }
+}
+#endif
diff --git a/vpr/src/route/route_utils.h b/vpr/src/route/route_utils.h
new file mode 100644
index 00000000000..8b86f230290
--- /dev/null
+++ b/vpr/src/route/route_utils.h
@@ -0,0 +1,148 @@
+#pragma once
+
+/** @file Utility functions used in the top-level router (route.cpp). */
+
+#include "router_stats.h"
+#include "timing_info.h"
+#include "vpr_net_pins_matrix.h"
+#include "vpr_types.h"
+
+#include "RoutingDelayCalculator.h"
+
+constexpr float CONGESTED_SLOPE_VAL = -0.04;
+
+/** Identifies the two breakpoint types in routing */
+typedef enum router_breakpoint_type {
+    BP_ROUTE_ITER,
+    BP_NET_ID
+} bp_router_type;
+
+/** Per-iteration congestion mode for the router: focus more on routability after a certain threshold */
+enum class RouterCongestionMode {
+    NORMAL,
+    CONFLICTED
+};
+
+struct RoutingMetrics {
+    size_t used_wirelength = 0;
+
+    float sWNS = std::numeric_limits<float>::quiet_NaN();
+    float sTNS = std::numeric_limits<float>::quiet_NaN();
+    float hWNS = std::numeric_limits<float>::quiet_NaN();
+    float hTNS = std::numeric_limits<float>::quiet_NaN();
+    tatum::TimingPathInfo critical_path;
+};
+
+/** Returns the bounding box of a net's used routing resources */
+t_bb calc_current_bb(const RouteTree& tree);
+
+/** Get available wirelength for the current RR graph */
+size_t calculate_wirelength_available();
+
+/** Calculate wirelength for the current routing and populate a WirelengthInfo */
+WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t available_wirelength);
+
+/** Checks that the net delays computed incrementally during timing driven
+ * routing match those computed from scratch by the net_delay.cpp module. */
+bool check_net_delays(const Netlist<>& net_list, NetPinsMatrix<float>& net_delay);
+
+/** Update bounding box for net if existing routing is close to boundary */
+size_t dynamic_update_bounding_boxes(const std::vector<ParentNetId>& updated_nets);
+
+/** Early exit code for cases where it is obvious that a successful route will not be found
+ * Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */
+bool early_exit_heuristic(const t_router_opts& router_opts, const WirelengthInfo& wirelength_info);
+
+/** Give-up on reconvergent routing if the CPD improvement after the
+ * first iteration since convergence is small, compared to the best
+ * CPD seen so far */
+bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts,
+                                        int itry_since_last_convergence,
+                                        std::shared_ptr<const SetupHoldTimingInfo> timing_info,
+                                        const RoutingMetrics& best_routing_metrics);
+
+void generate_route_timing_reports(const t_router_opts& router_opts,
+                                   const t_analysis_opts& analysis_opts,
+                                   const SetupTimingInfo& timing_info,
+                                   const RoutingDelayCalculator& delay_calc,
+                                   bool is_flat);
+
+/** Get the maximum number of pins used in the netlist (used to allocate things) */
+int get_max_pins_per_net(const Netlist<>& net_list);
+
+/** Initialize net_delay based on best-case delay estimates from the router lookahead. */
+void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead,
+                                   const Netlist<>& net_list,
+                                   const vtr::vector<ParentNetId, std::vector<RRNodeId>>& net_rr_terminals,
+                                   NetPinsMatrix<float>& net_delay,
+                                   const RRGraphView& rr_graph,
+                                   bool is_flat);
+
+bool is_better_quality_routing(const vtr::vector<ParentNetId, vtr::optional<RouteTree>>& best_routing,
+                               const RoutingMetrics& best_routing_metrics,
+                               const WirelengthInfo& wirelength_info,
+                               std::shared_ptr<const SetupHoldTimingInfo> timing_info);
+
+/** This function checks if a routing iteration has completed.
+ * When VPR is run normally, we check if routing_budgets_algorithm is disabled, and if the routing is legal
+ * With the introduction of yoyo budgeting algorithm, we must check if there are no hold violations
+ * in addition to routing being legal and the correct budgeting algorithm being set. */
+bool is_iteration_complete(bool routing_is_feasible, const t_router_opts& router_opts, int itry, std::shared_ptr<const SetupHoldTimingInfo> timing_info, bool rcv_finished);
+
+void print_overused_nodes_status(const t_router_opts& router_opts, const OveruseInfo& overuse_info);
+
+void print_route_status(int itry, double elapsed_sec, float pres_fac, int num_bb_updated, const RouterStats& router_stats, const OveruseInfo& overuse_info, const WirelengthInfo& wirelength_info, std::shared_ptr<const SetupHoldTimingInfo> timing_info, float est_success_iteration);
+
+void print_route_status_header();
+
+void print_router_criticality_histogram(const Netlist<>& net_list,
+                                        const SetupTimingInfo& timing_info,
+                                        const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
+                                        bool is_flat);
+
+/** Prune stubs of non-config nodes from route_ctx.route_trees.
+ * If a route is ripped up during routing, non-configurable sets are left
+ * behind. As a result, the final routing may have stubs at
+ * non-configurable sets. This function tracks non-configurable set usage,
+ * and if the sets are unused, prunes them. */
+void prune_unused_non_configurable_nets(CBRR& connections_inf,
+                                        const Netlist<>& net_list);
+
+/** If flat_routing and has_choking_spot are true, there are some choke points inside the cluster which would increase the convergence time of routing.
+ * To address this issue, the congestion cost of those choke points needs to decrease. This function identify those choke points for each net,
+ * and since the amount of congestion reduction is dependant on the number sinks reachable from that choke point, it also store the number of reachable sinks
+ * for each choke point.
+ * @param net_list
+ * @param net_terminal_groups [Net_id][group_id] -> rr_node_id of the pins in the group
+ * @param net_terminal_group_num [Net_id][pin_id] -> group_id
+ * @param has_choking_spot is true if the given architecture has choking spots inside the cluster
+ * @param is_flat is true if flat_routing is enabled
+ * @return [Net_id][pin_id] -> [choke_point_rr_node_id, number of sinks reachable by this choke point] */
+vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>> set_nets_choking_spots(const Netlist<>& net_list,
+                                                                                                const vtr::vector<ParentNetId,
+                                                                                                                  std::vector<std::vector<int>>>& net_terminal_groups,
+                                                                                                const vtr::vector<ParentNetId,
+                                                                                                                  std::vector<int>>& net_terminal_group_num,
+                                                                                                bool has_choking_spot,
+                                                                                                bool is_flat);
+
+/** Wrapper for create_rr_graph() with extra checks */
+void try_graph(int width_fac,
+               const t_router_opts& router_opts,
+               t_det_routing_arch* det_routing_arch,
+               std::vector<t_segment_inf>& segment_inf,
+               t_chan_width_dist chan_width_dist,
+               t_direct_inf* directs,
+               int num_directs,
+               bool is_flat);
+
+/* This routine should take the new value of the present congestion factor
+ * and propagate it to all the relevant data fields in the vpr flow.
+ * Currently, it only updates the pres_fac used by the drawing functions */
+float update_draw_pres_fac(float new_pres_fac);
+
+#ifndef NO_GRAPHICS
+/** Updates router iteration information and checks for router iteration and net id breakpoints
+ * Stops after the specified router iteration or net id is encountered */
+void update_router_info_and_check_bp(bp_router_type type, int net_id);
+#endif
diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp
index eac8fdf28c4..d0b95283641 100644
--- a/vpr/src/route/router_delay_profiling.cpp
+++ b/vpr/src/route/router_delay_profiling.cpp
@@ -1,7 +1,7 @@
 #include "router_delay_profiling.h"
 #include "globals.h"
 #include "route_common.h"
-#include "route_timing.h"
+#include "route_net.h"
 #include "route_export.h"
 #include "route_tree.h"
 #include "rr_graph.h"
@@ -89,8 +89,7 @@ bool RouterDelayProfiler::calculate_delay(RRNodeId source_node,
         cost_params,
         bounding_box,
         router_stats,
-        conn_params,
-        true);
+        conn_params);
 
     if (found_path) {
         VTR_ASSERT(cheapest.index == sink_node);
diff --git a/vpr/src/route/router_delay_profiling.h b/vpr/src/route/router_delay_profiling.h
index 11d8eb25f1d..13bae0d0301 100644
--- a/vpr/src/route/router_delay_profiling.h
+++ b/vpr/src/route/router_delay_profiling.h
@@ -2,7 +2,6 @@
 #define ROUTER_DELAY_PROFILING_H_
 
 #include "vpr_types.h"
-#include "route_timing.h"
 #include "binary_heap.h"
 #include "connection_router.h"
 
diff --git a/vpr/src/route/router_lookahead.cpp b/vpr/src/route/router_lookahead.cpp
index 14b6aaa1959..545704e7d06 100644
--- a/vpr/src/route/router_lookahead.cpp
+++ b/vpr/src/route/router_lookahead.cpp
@@ -1,10 +1,10 @@
 #include "router_lookahead.h"
 
+#include "connection_router_interface.h"
 #include "router_lookahead_map.h"
 #include "router_lookahead_extended_map.h"
 #include "vpr_error.h"
 #include "globals.h"
-#include "route_timing.h"
 
 static int get_expected_segs_to_target(RRNodeId inode, RRNodeId target_node, int* num_segs_ortho_dir_ptr);
 static int round_up(float x);
diff --git a/vpr/src/route/router_lookahead_extended_map.cpp b/vpr/src/route/router_lookahead_extended_map.cpp
index b176372e686..102a176f92e 100644
--- a/vpr/src/route/router_lookahead_extended_map.cpp
+++ b/vpr/src/route/router_lookahead_extended_map.cpp
@@ -4,6 +4,7 @@
 #include <queue>
 #include <mutex>
 
+#include "connection_router_interface.h"
 #include "rr_node.h"
 #include "router_lookahead_map_utils.h"
 #include "router_lookahead_sampling.h"
@@ -14,8 +15,8 @@
 #include "echo_files.h"
 #include "rr_graph.h"
 
-#include "route_timing.h"
 #include "route_common.h"
+#include "route_debug.h"
 
 #ifdef VTR_ENABLE_CAPNPROTO
 #    include "capnp/serialize.h"
diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp
index 5f5f8e886f2..0eea1a6c913 100644
--- a/vpr/src/route/router_lookahead_map.cpp
+++ b/vpr/src/route/router_lookahead_map.cpp
@@ -25,6 +25,7 @@
 #include <vector>
 #include <queue>
 #include <ctime>
+#include "connection_router_interface.h"
 #include "vpr_types.h"
 #include "vpr_error.h"
 #include "vpr_utils.h"
@@ -39,7 +40,6 @@
 #include "rr_graph2.h"
 #include "rr_graph.h"
 #include "route_common.h"
-#include "route_timing.h"
 
 #ifdef VTR_ENABLE_CAPNPROTO
 #    include "capnp/serialize.h"
diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
index 01c0e79d16c..c9c7017f83c 100644
--- a/vpr/src/route/router_lookahead_map_utils.cpp
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -1,7 +1,6 @@
 #include "router_lookahead_map_utils.h"
 
-/*
- * This file contains utility functions that can be shared among different
+/** @file This file contains utility functions that can be shared among different
  * lookahead computation strategies.
  *
  * In general, this utility library contains:
@@ -9,15 +8,14 @@
  * - Different dijkstra expansion algorithms used to perform specific tasks, such as computing the SOURCE/OPIN --> CHAN lookup tables
  * - Cost Entries definitions used when generating and querying the lookahead
  *
- * To access the utility functions, the util namespace needs to be used.
- */
+ * To access the utility functions, the util namespace needs to be used. */
 
 #include "globals.h"
 #include "vpr_context.h"
 #include "vtr_math.h"
 #include "vtr_time.h"
 #include "route_common.h"
-#include "route_timing.h"
+#include "route_debug.h"
 
 static void dijkstra_flood_to_wires(int itile, RRNodeId inode, util::t_src_opin_delays& src_opin_delays, util::t_src_opin_inter_layer_delays& src_opin_inter_layer_delays, bool is_multi_layer);
 
diff --git a/vpr/src/route/router_stats.h b/vpr/src/route/router_stats.h
index 47e91731179..4f999a722d1 100644
--- a/vpr/src/route/router_stats.h
+++ b/vpr/src/route/router_stats.h
@@ -51,6 +51,30 @@ struct RouterStats {
     size_t add_all_rt_from_high_fanout = 0;
     size_t add_high_fanout_rt = 0;
     size_t add_all_rt = 0;
+
+    /** Add rhs's stats to mine */
+    void combine(RouterStats& rhs) {
+        connections_routed += rhs.connections_routed;
+        nets_routed += rhs.nets_routed;
+        heap_pushes += rhs.heap_pushes;
+        inter_cluster_node_pushes += rhs.inter_cluster_node_pushes;
+        intra_cluster_node_pushes += rhs.intra_cluster_node_pushes;
+        heap_pops += rhs.heap_pops;
+        inter_cluster_node_pops += rhs.inter_cluster_node_pops;
+        intra_cluster_node_pops += rhs.intra_cluster_node_pops;
+        for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) {
+            inter_cluster_node_type_cnt_pushes[node_type_idx] += rhs.inter_cluster_node_type_cnt_pushes[node_type_idx];
+            inter_cluster_node_type_cnt_pops[node_type_idx] += rhs.inter_cluster_node_type_cnt_pops[node_type_idx];
+            intra_cluster_node_type_cnt_pushes[node_type_idx] += rhs.intra_cluster_node_type_cnt_pushes[node_type_idx];
+            intra_cluster_node_type_cnt_pops[node_type_idx] += rhs.intra_cluster_node_type_cnt_pops[node_type_idx];
+            rt_node_pushes[node_type_idx] += rhs.rt_node_pushes[node_type_idx];
+            rt_node_high_fanout_pushes[node_type_idx] += rhs.rt_node_high_fanout_pushes[node_type_idx];
+            rt_node_entire_tree_pushes[node_type_idx] += rhs.rt_node_entire_tree_pushes[node_type_idx];
+        }
+        add_all_rt += rhs.add_all_rt;
+        add_all_rt_from_high_fanout += rhs.add_all_rt_from_high_fanout;
+        add_high_fanout_rt += rhs.add_high_fanout_rt;
+    }
 };
 
 class WirelengthInfo {
diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp
index 0fef4f22a84..1b0c236a29a 100644
--- a/vpr/test/test_connection_router.cpp
+++ b/vpr/test/test_connection_router.cpp
@@ -1,6 +1,7 @@
 #include <tuple>
 #include "catch2/catch_test_macros.hpp"
 
+#include "route_net.h"
 #include "rr_graph_fwd.h"
 #include "vpr_api.h"
 #include "vpr_signal_handler.h"
@@ -75,8 +76,7 @@ static float do_one_route(RRNodeId source_node,
                                                                                                         cost_params,
                                                                                                         bounding_box,
                                                                                                         router_stats,
-                                                                                                        conn_params,
-                                                                                                        true);
+                                                                                                        conn_params);
 
     // Default delay is infinity, which indicates that a route was not found.
     float delay = std::numeric_limits<float>::infinity();
diff --git a/vtr_flow/scripts/python_libs/vtr/task.py b/vtr_flow/scripts/python_libs/vtr/task.py
index 6bf898a5d22..0cfb6f3ebbe 100644
--- a/vtr_flow/scripts/python_libs/vtr/task.py
+++ b/vtr_flow/scripts/python_libs/vtr/task.py
@@ -633,9 +633,11 @@ def create_job(
             prev_run_dir = get_existing_run_dir(find_task_dir(config, args.alt_tasks_dir), prev_run)
             prev_work_path = Path(prev_run_dir) / work_dir / param_string
             prev_file = prev_work_path / "{}.{}".format(Path(circuit).stem, extension)
-            if not prev_file.exists():
-                raise FileNotFoundError("use_previous: file %s not found" % str(prev_file))
-            current_cmd += [option, str(prev_file)]
+            if option == "REPLACE_BLIF":
+                current_cmd[0] = str(prev_file)
+                current_cmd += ["-start", "vpr"]
+            else:
+                current_cmd += [option, str(prev_file)]
 
     if param_string != "common":
         current_cmd += param.split(" ")
diff --git a/vtr_flow/scripts/python_libs/vtr/util.py b/vtr_flow/scripts/python_libs/vtr/util.py
index 8eec41661ba..e19935b4b21 100644
--- a/vtr_flow/scripts/python_libs/vtr/util.py
+++ b/vtr_flow/scripts/python_libs/vtr/util.py
@@ -442,23 +442,24 @@ def format_elapsed_time(time_delta):
     "route": ["route", "--route_file"],
     "rr_graph": ["rr_graph.xml", "--read_rr_graph"],
     "lookahead": ["lookahead.bin", "--read_router_lookahead"],
+    "blif": ["pre-vpr.blif", "REPLACE_BLIF"],
 }
 
 
-def argparse_use_previous(inp: str) -> List[Tuple[str, List]]:
+def argparse_use_previous(x: str) -> List[Tuple[str, List]]:
     """
     Parse a -use_previous parameter. Throw if not valid.
     Returns a list with (run dir name, [extension, cmdline option]) elements.
     """
-    tokens = [w.strip() for w in inp.split(",")]
+    tokens = [w.strip() for w in x.split(",")]
     tokens = [w for w in tokens if len(w)]
     out = []
     for w in tokens:
         r = re.fullmatch(r"(\w+):(\w+)", w)
         if not r:
-            raise argparse.ArgumentTypeError("Invalid input to -use_previous: %s" % w)
+            raise argparse.ArgumentError("Invalid input to -use_previous: %s" % w)
         if not REUSABLE_FILES.get(r.group(2)):
-            raise argparse.ArgumentTypeError(
+            raise argparse.ArgumentError(
                 "Unknown file type to use_previous: %s, available types: %s"
                 % (r.group(2), ",".join(REUSABLE_FILES.keys()))
             )